diff options
author | jamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-12-14 22:30:41 +0000 |
---|---|---|
committer | jamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-12-14 22:30:41 +0000 |
commit | 4954efd47c136a844a8096db13dea7ca33165bd7 (patch) | |
tree | 21a84ad4210bfa6213a06a75a3311f6c824b1b65 | |
parent | 84960375bfca8b816d6c5021a7977072c88647ee (diff) | |
download | gcc-4954efd47c136a844a8096db13dea7ca33165bd7.tar.gz |
Split omp-low into multiple files
2016-12-14 Martin Jambor <mjambor@suse.cz>
* omp-general.h: New file.
* omp-general.c: New file.
* omp-expand.h: Likewise.
* omp-expand.c: Likewise.
* omp-offload.h: Likewise.
* omp-offload.c: Likewise.
* omp-grid.c: Likewise.
* omp-grid.c: Likewise.
* omp-low.h: Include omp-general.h and omp-grid.h. Removed includes
of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h,
ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h,
varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h,
tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h,
alloc-pool.h, cfghooks.h, rtl.h and memmodel.h.
(omp_find_combined_for): Declare.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.h.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.h.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.h.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.h.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.h.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.h.
(oacc_fn_attrib_kernels_p): Moved to omp-general.h.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(omp_expand_local): Moved to omp-expand.h.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.h.
(omp_finish_file): Moved to omp-offload.h.
(default_goacc_validate_dims): Renamed to
oacc_default_goacc_validate_dims and moved to omp-offload.h.
(offload_funcs, offload_vars): Moved to omp-offload.h.
* omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h.
(omp_region): Moved to omp-expand.c.
(omp_for_data_loop): Moved to omp-general.h.
(omp_for_data): Likewise.
(oacc_loop): Moved to omp-offload.c.
(oacc_loop_flags): Moved to omp-general.h.
(offload_funcs, offload_vars): Moved to omp-offload.c.
(root_omp_region): Moved to omp-expand.c.
(omp_any_child_fn_dumped): Likewise.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.c.
(is_combined_parallel): Moved to omp-expand.c.
(is_reference): Renamed to omp_is_reference and and moved to
omp-general.c.
(adjust_for_condition): Renamed to omp_adjust_for_condition and moved
to omp-general.c.
(get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr
and moved to omp-general.c.
(extract_omp_for_data): Renamed to omp_extract_for_data and moved to
omp-general.c.
(workshare_safe_to_combine_p): Moved to omp-expand.c.
(omp_adjust_chunk_size): Likewise.
(get_ws_args_for): Likewise.
(get_base_type): Removed.
(dump_omp_region): Moved to omp-expand.c.
(debug_omp_region): Likewise.
(debug_all_omp_regions): Likewise.
(new_omp_region): Likewise.
(free_omp_region_1): Likewise.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.c.
(find_combined_for): Renamed to omp_find_combined_for, made global.
(build_omp_barrier): Renamed to omp_build_barrier and moved to
omp-general.c.
(omp_max_vf): Moved to omp-general.c.
(omp_max_simt_vf): Likewise.
(gimple_build_cond_empty): Moved to omp-expand.c.
(parallel_needs_hsa_kernel_p): Likewise.
(expand_omp_build_assign): Moved declaration to omp-expand.c.
(expand_parallel_call): Moved to omp-expand.c.
(expand_cilk_for_call): Likewise.
(expand_task_call): Likewise.
(vec2chain): Likewise.
(remove_exit_barrier): Likewise.
(remove_exit_barriers): Likewise.
(optimize_omp_library_calls): Likewise.
(expand_omp_regimplify_p): Likewise.
(expand_omp_build_assign): Likewise.
(expand_omp_taskreg): Likewise.
(oacc_collapse): Likewise.
(expand_oacc_collapse_init): Likewise.
(expand_oacc_collapse_vars): Likewise.
(expand_omp_for_init_counts): Likewise.
(expand_omp_for_init_vars): Likewise.
(extract_omp_for_update_vars): Likewise.
(expand_omp_ordered_source): Likewise.
(expand_omp_ordered_sink): Likewise.
(expand_omp_ordered_source_sink): Likewise.
(expand_omp_for_ordered_loops): Likewise.
(expand_omp_for_generic): Likewise.
(expand_omp_for_static_nochunk): Likewise.
(find_phi_with_arg_on_edge): Likewise.
(expand_omp_for_static_chunk): Likewise.
(expand_cilk_for): Likewise.
(expand_omp_simd): Likewise.
(expand_omp_taskloop_for_outer): Likewise.
(expand_omp_taskloop_for_inner): Likewise.
(expand_oacc_for): Likewise.
(expand_omp_for): Likewise.
(expand_omp_sections): Likewise.
(expand_omp_single): Likewise.
(expand_omp_synch): Likewise.
(expand_omp_atomic_load): Likewise.
(expand_omp_atomic_store): Likewise.
(expand_omp_atomic_fetch_op): Likewise.
(expand_omp_atomic_pipeline): Likewise.
(expand_omp_atomic_mutex): Likewise.
(expand_omp_atomic): Likewise.
(oacc_launch_pack): and moved to omp-general.c, made public.
(OACC_FN_ATTRIB): Likewise.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.c.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.c.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.c.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.c.
(oacc_fn_attrib_kernels_p): Moved to omp-general.c.
(oacc_fn_attrib_level): Moved to omp-offload.c.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to
omp-general.c.
(mark_loops_in_oacc_kernels_region): Moved to omp-expand.c.
(grid_launch_attributes_trees): Likewise.
(grid_attr_trees): Likewise.
(grid_create_kernel_launch_attr_types): Likewise.
(grid_insert_store_range_dim): Likewise.
(grid_get_kernel_launch_attributes): Likewise.
(get_target_argument_identifier_1): Likewise.
(get_target_argument_identifier): Likewise.
(get_target_argument_value): Likewise.
(push_target_argument_according_to_value): Likewise.
(get_target_arguments): Likewise.
(expand_omp_target): Likewise.
(grid_expand_omp_for_loop): Moved to omp-grid.c.
(grid_arg_decl_map): Likewise.
(grid_remap_kernel_arg_accesses): Likewise.
(grid_expand_target_grid_body): Likewise.
(expand_omp): Renamed to omp_expand and moved to omp-expand.c.
(build_omp_regions_1): Moved to omp-expand.c.
(build_omp_regions_root): Likewise.
(omp_expand_local): Likewise.
(build_omp_regions): Likewise.
(execute_expand_omp): Likewise.
(pass_data_expand_omp): Likewise.
(pass_expand_omp): Likewise.
(make_pass_expand_omp): Likewise.
(pass_data_expand_omp_ssa): Likewise.
(pass_expand_omp_ssa): Likewise.
(make_pass_expand_omp_ssa): Likewise.
(grid_lastprivate_predicate): Renamed to
omp_grid_lastprivate_predicate and moved to omp-grid.c, made public.
(grid_prop): Moved to omp-grid.c.
(GRID_MISSED_MSG_PREFIX): Likewise.
(grid_safe_assignment_p): Likewise.
(grid_seq_only_contains_local_assignments): Likewise.
(grid_find_single_omp_among_assignments_1): Likewise.
(grid_find_single_omp_among_assignments): Likewise.
(grid_find_ungridifiable_statement): Likewise.
(grid_parallel_clauses_gridifiable): Likewise.
(grid_inner_loop_gridifiable_p): Likewise.
(grid_dist_follows_simple_pattern): Likewise.
(grid_gfor_follows_tiling_pattern): Likewise.
(grid_call_permissible_in_distribute_p): Likewise.
(grid_handle_call_in_distribute): Likewise.
(grid_dist_follows_tiling_pattern): Likewise.
(grid_target_follows_gridifiable_pattern): Likewise.
(grid_remap_prebody_decls): Likewise.
(grid_var_segment): Likewise.
(grid_mark_variable_segment): Likewise.
(grid_copy_leading_local_assignments): Likewise.
(grid_process_grid_body): Likewise.
(grid_eliminate_combined_simd_part): Likewise.
(grid_mark_tiling_loops): Likewise.
(grid_mark_tiling_parallels_and_loops): Likewise.
(grid_process_kernel_body_copy): Likewise.
(grid_attempt_target_gridification): Likewise.
(grid_gridify_all_targets_stmt): Likewise.
(grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets
and moved to omp-grid.c, made public.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.c.
(add_decls_addresses_to_decl_constructor): Moved to omp-offload.c.
(omp_finish_file): Likewise.
(oacc_thread_numbers): Likewise.
(oacc_xform_loop): Likewise.
(oacc_default_dims, oacc_min_dims): Likewise.
(oacc_parse_default_dims): Likewise.
(oacc_validate_dims): Likewise.
(new_oacc_loop_raw): Likewise.
(new_oacc_loop_outer): Likewise.
(new_oacc_loop): Likewise.
(new_oacc_loop_routine): Likewise.
(finish_oacc_loop): Likewise.
(free_oacc_loop): Likewise.
(dump_oacc_loop_part): Likewise.
(dump_oacc_loop): Likewise.
(debug_oacc_loop): Likewise.
(oacc_loop_discover_walk): Likewise.
(oacc_loop_sibling_nreverse): Likewise.
(oacc_loop_discovery): Likewise.
(oacc_loop_xform_head_tail): Likewise.
(oacc_loop_xform_loop): Likewise.
(oacc_loop_process): Likewise.
(oacc_loop_fixed_partitions): Likewise.
(oacc_loop_auto_partitions): Likewise.
(oacc_loop_partition): Likewise.
(default_goacc_fork_join): Likewise.
(default_goacc_reduction): Likewise.
(execute_oacc_device_lower): Likewise.
(default_goacc_validate_dims): Likewise.
(default_goacc_dim_limit): Likewise.
(pass_data_oacc_device_lower): Likewise.
(pass_oacc_device_lower): Likewise.
(make_pass_oacc_device_lower): Likewise.
(execute_omp_device_lower): Likewise.
(pass_data_omp_device_lower): Likewise.
(pass_omp_device_lower): Likewise.
(make_pass_omp_device_lower): Likewise.
(pass_data_omp_target_link): Likewise.
(pass_omp_target_link): Likewise.
(find_link_var_op): Likewise.
(pass_omp_target_link::execute): Likewise.
(make_pass_omp_target_link): Likewise.
* Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o
and omp-grid.o.
(GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed
omp-low.h.
* gimple-fold.c: Include omp-general.h instead of omp-low.h.
(fold_internal_goacc_dim): Adjusted calls to
get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names.
* gimplify.c: Include omp-low.h.
(omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use
its new name.
(gimplify_omp_task): Adjusted calls to find_omp_clause to use its new
name.
(gimplify_omp_for): Likewise.
* lto-cgraph.c: Include omp-offload.h instead of omp-low.h.
* toplev.c: Include omp-offload.h instead of omp-low.h.
* tree-cfg.c: Include omp-general.h instead of omp-low.h. Also
include omp-expand.h.
(make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its
new name.
(make_edges): Adjust the call to free_omp_regions to use its new name.
* tree-parloops.c: Include omp-general.h.
(create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use
its new name.
(parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-ssa-loop.c: Include omp-general.h instead of omp-low.h.
(gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-vrp.c: Include omp-general.h instead of omp-low.h.
(extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and
get_oacc_fn_dim_size to use their new names.
* varpool.c: Include omp-offload.h instead of omp-low.h.
* gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in
ifiles.
* config/nvptx/nvptx.c: Include omp-general.c.
(nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use
its new name.
(nvptx_reorg): Likewise.
(nvptx_record_offload_symbol): Likewise.
gcc/c-family:
* c-omp.c: Include omp-general.h instead of omp-low.h.
(c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new
name.
gcc/c/
* c-parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(c_finish_oacc_routine): Adjusted call to
get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib
to use their new names.
(c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to
use its new name.
(c_parser_oacc_update): Likewise.
(c_parser_omp_simd): Likewise.
(c_parser_omp_target_update): Likewise.
* c-typeck.c: Include omp-general.h instead of omp-low.h.
(c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new
name.
(c_finish_omp_cancellation_point): Likewise.
* gimple-parser.c: Do not include omp-low.h
gcc/cp/
* parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new
name.
(cp_parser_omp_target_update): Likewise.
(cp_parser_oacc_declare): Likewise.
(cp_parser_oacc_enter_exit_data): Likewise.
(cp_parser_oacc_update): Likewise.
(cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib,
build_oacc_routine_dims and replace_oacc_fn_attrib to use their new
names.
* semantics.c: Include omp-general insteda of omp-low.h.
(finish_omp_for): Adjusted calls to find_omp_clause to use its new
name.
(finish_omp_cancel): Likewise.
(finish_omp_cancellation_point): Likewise.
fortran/
* trans-openmp.c: Include omp-general.h.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@243673 138bc75d-0d04-0410-961f-82ee72b054a4
34 files changed, 12662 insertions, 12080 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7919ad8d272..0afefdbd76c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,278 @@ +2016-12-14 Martin Jambor <mjambor@suse.cz> + + * omp-general.h: New file. + * omp-general.c: New file. + * omp-expand.h: Likewise. + * omp-expand.c: Likewise. + * omp-offload.h: Likewise. + * omp-offload.c: Likewise. + * omp-grid.c: Likewise. + * omp-grid.c: Likewise. + * omp-low.h: Include omp-general.h and omp-grid.h. Removed includes + of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h, + ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h, + varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h, + tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h, + alloc-pool.h, cfghooks.h, rtl.h and memmodel.h. + (omp_find_combined_for): Declare. + (find_omp_clause): Renamed to omp_find_clause and moved to + omp-general.h. + (free_omp_regions): Renamed to omp_free_regions and moved to + omp-expand.h. + (replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved + to omp-general.h. + (set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to + omp-general.h. + (build_oacc_routine_dims): Renamed to oacc_build_routine_dims and + moved to omp-general.h. + (get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to + omp-general.h. + (oacc_fn_attrib_kernels_p): Moved to omp-general.h. + (get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to + omp-general.c. + (omp_expand_local): Moved to omp-expand.h. + (make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to + omp-expand.h. + (omp_finish_file): Moved to omp-offload.h. + (default_goacc_validate_dims): Renamed to + oacc_default_goacc_validate_dims and moved to omp-offload.h. + (offload_funcs, offload_vars): Moved to omp-offload.h. + * omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h. + (omp_region): Moved to omp-expand.c. + (omp_for_data_loop): Moved to omp-general.h. + (omp_for_data): Likewise. + (oacc_loop): Moved to omp-offload.c. + (oacc_loop_flags): Moved to omp-general.h. + (offload_funcs, offload_vars): Moved to omp-offload.c. + (root_omp_region): Moved to omp-expand.c. + (omp_any_child_fn_dumped): Likewise. + (find_omp_clause): Renamed to omp_find_clause and moved to + omp-general.c. + (is_combined_parallel): Moved to omp-expand.c. + (is_reference): Renamed to omp_is_reference and and moved to + omp-general.c. + (adjust_for_condition): Renamed to omp_adjust_for_condition and moved + to omp-general.c. + (get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr + and moved to omp-general.c. + (extract_omp_for_data): Renamed to omp_extract_for_data and moved to + omp-general.c. + (workshare_safe_to_combine_p): Moved to omp-expand.c. + (omp_adjust_chunk_size): Likewise. + (get_ws_args_for): Likewise. + (get_base_type): Removed. + (dump_omp_region): Moved to omp-expand.c. + (debug_omp_region): Likewise. + (debug_all_omp_regions): Likewise. + (new_omp_region): Likewise. + (free_omp_region_1): Likewise. + (free_omp_regions): Renamed to omp_free_regions and moved to + omp-expand.c. + (find_combined_for): Renamed to omp_find_combined_for, made global. + (build_omp_barrier): Renamed to omp_build_barrier and moved to + omp-general.c. + (omp_max_vf): Moved to omp-general.c. + (omp_max_simt_vf): Likewise. + (gimple_build_cond_empty): Moved to omp-expand.c. + (parallel_needs_hsa_kernel_p): Likewise. + (expand_omp_build_assign): Moved declaration to omp-expand.c. + (expand_parallel_call): Moved to omp-expand.c. + (expand_cilk_for_call): Likewise. + (expand_task_call): Likewise. + (vec2chain): Likewise. + (remove_exit_barrier): Likewise. + (remove_exit_barriers): Likewise. + (optimize_omp_library_calls): Likewise. + (expand_omp_regimplify_p): Likewise. + (expand_omp_build_assign): Likewise. + (expand_omp_taskreg): Likewise. + (oacc_collapse): Likewise. + (expand_oacc_collapse_init): Likewise. + (expand_oacc_collapse_vars): Likewise. + (expand_omp_for_init_counts): Likewise. + (expand_omp_for_init_vars): Likewise. + (extract_omp_for_update_vars): Likewise. + (expand_omp_ordered_source): Likewise. + (expand_omp_ordered_sink): Likewise. + (expand_omp_ordered_source_sink): Likewise. + (expand_omp_for_ordered_loops): Likewise. + (expand_omp_for_generic): Likewise. + (expand_omp_for_static_nochunk): Likewise. + (find_phi_with_arg_on_edge): Likewise. + (expand_omp_for_static_chunk): Likewise. + (expand_cilk_for): Likewise. + (expand_omp_simd): Likewise. + (expand_omp_taskloop_for_outer): Likewise. + (expand_omp_taskloop_for_inner): Likewise. + (expand_oacc_for): Likewise. + (expand_omp_for): Likewise. + (expand_omp_sections): Likewise. + (expand_omp_single): Likewise. + (expand_omp_synch): Likewise. + (expand_omp_atomic_load): Likewise. + (expand_omp_atomic_store): Likewise. + (expand_omp_atomic_fetch_op): Likewise. + (expand_omp_atomic_pipeline): Likewise. + (expand_omp_atomic_mutex): Likewise. + (expand_omp_atomic): Likewise. + (oacc_launch_pack): and moved to omp-general.c, made public. + (OACC_FN_ATTRIB): Likewise. + (replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved + to omp-general.c. + (set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to + omp-general.c. + (build_oacc_routine_dims): Renamed to oacc_build_routine_dims and + moved to omp-general.c. + (get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to + omp-general.c. + (oacc_fn_attrib_kernels_p): Moved to omp-general.c. + (oacc_fn_attrib_level): Moved to omp-offload.c. + (get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to + omp-general.c. + (get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to + omp-general.c. + (mark_loops_in_oacc_kernels_region): Moved to omp-expand.c. + (grid_launch_attributes_trees): Likewise. + (grid_attr_trees): Likewise. + (grid_create_kernel_launch_attr_types): Likewise. + (grid_insert_store_range_dim): Likewise. + (grid_get_kernel_launch_attributes): Likewise. + (get_target_argument_identifier_1): Likewise. + (get_target_argument_identifier): Likewise. + (get_target_argument_value): Likewise. + (push_target_argument_according_to_value): Likewise. + (get_target_arguments): Likewise. + (expand_omp_target): Likewise. + (grid_expand_omp_for_loop): Moved to omp-grid.c. + (grid_arg_decl_map): Likewise. + (grid_remap_kernel_arg_accesses): Likewise. + (grid_expand_target_grid_body): Likewise. + (expand_omp): Renamed to omp_expand and moved to omp-expand.c. + (build_omp_regions_1): Moved to omp-expand.c. + (build_omp_regions_root): Likewise. + (omp_expand_local): Likewise. + (build_omp_regions): Likewise. + (execute_expand_omp): Likewise. + (pass_data_expand_omp): Likewise. + (pass_expand_omp): Likewise. + (make_pass_expand_omp): Likewise. + (pass_data_expand_omp_ssa): Likewise. + (pass_expand_omp_ssa): Likewise. + (make_pass_expand_omp_ssa): Likewise. + (grid_lastprivate_predicate): Renamed to + omp_grid_lastprivate_predicate and moved to omp-grid.c, made public. + (grid_prop): Moved to omp-grid.c. + (GRID_MISSED_MSG_PREFIX): Likewise. + (grid_safe_assignment_p): Likewise. + (grid_seq_only_contains_local_assignments): Likewise. + (grid_find_single_omp_among_assignments_1): Likewise. + (grid_find_single_omp_among_assignments): Likewise. + (grid_find_ungridifiable_statement): Likewise. + (grid_parallel_clauses_gridifiable): Likewise. + (grid_inner_loop_gridifiable_p): Likewise. + (grid_dist_follows_simple_pattern): Likewise. + (grid_gfor_follows_tiling_pattern): Likewise. + (grid_call_permissible_in_distribute_p): Likewise. + (grid_handle_call_in_distribute): Likewise. + (grid_dist_follows_tiling_pattern): Likewise. + (grid_target_follows_gridifiable_pattern): Likewise. + (grid_remap_prebody_decls): Likewise. + (grid_var_segment): Likewise. + (grid_mark_variable_segment): Likewise. + (grid_copy_leading_local_assignments): Likewise. + (grid_process_grid_body): Likewise. + (grid_eliminate_combined_simd_part): Likewise. + (grid_mark_tiling_loops): Likewise. + (grid_mark_tiling_parallels_and_loops): Likewise. + (grid_process_kernel_body_copy): Likewise. + (grid_attempt_target_gridification): Likewise. + (grid_gridify_all_targets_stmt): Likewise. + (grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets + and moved to omp-grid.c, made public. + (make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to + omp-expand.c. + (add_decls_addresses_to_decl_constructor): Moved to omp-offload.c. + (omp_finish_file): Likewise. + (oacc_thread_numbers): Likewise. + (oacc_xform_loop): Likewise. + (oacc_default_dims, oacc_min_dims): Likewise. + (oacc_parse_default_dims): Likewise. + (oacc_validate_dims): Likewise. + (new_oacc_loop_raw): Likewise. + (new_oacc_loop_outer): Likewise. + (new_oacc_loop): Likewise. + (new_oacc_loop_routine): Likewise. + (finish_oacc_loop): Likewise. + (free_oacc_loop): Likewise. + (dump_oacc_loop_part): Likewise. + (dump_oacc_loop): Likewise. + (debug_oacc_loop): Likewise. + (oacc_loop_discover_walk): Likewise. + (oacc_loop_sibling_nreverse): Likewise. + (oacc_loop_discovery): Likewise. + (oacc_loop_xform_head_tail): Likewise. + (oacc_loop_xform_loop): Likewise. + (oacc_loop_process): Likewise. + (oacc_loop_fixed_partitions): Likewise. + (oacc_loop_auto_partitions): Likewise. + (oacc_loop_partition): Likewise. + (default_goacc_fork_join): Likewise. + (default_goacc_reduction): Likewise. + (execute_oacc_device_lower): Likewise. + (default_goacc_validate_dims): Likewise. + (default_goacc_dim_limit): Likewise. + (pass_data_oacc_device_lower): Likewise. + (pass_oacc_device_lower): Likewise. + (make_pass_oacc_device_lower): Likewise. + (execute_omp_device_lower): Likewise. + (pass_data_omp_device_lower): Likewise. + (pass_omp_device_lower): Likewise. + (make_pass_omp_device_lower): Likewise. + (pass_data_omp_target_link): Likewise. + (pass_omp_target_link): Likewise. + (find_link_var_op): Likewise. + (pass_omp_target_link::execute): Likewise. + (make_pass_omp_target_link): Likewise. + * Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o + and omp-grid.o. + (GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed + omp-low.h. + * gimple-fold.c: Include omp-general.h instead of omp-low.h. + (fold_internal_goacc_dim): Adjusted calls to + get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names. + * gimplify.c: Include omp-low.h. + (omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use + its new name. + (gimplify_omp_task): Adjusted calls to find_omp_clause to use its new + name. + (gimplify_omp_for): Likewise. + * lto-cgraph.c: Include omp-offload.h instead of omp-low.h. + * toplev.c: Include omp-offload.h instead of omp-low.h. + * tree-cfg.c: Include omp-general.h instead of omp-low.h. Also + include omp-expand.h. + (make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its + new name. + (make_edges): Adjust the call to free_omp_regions to use its new name. + * tree-parloops.c: Include omp-general.h. + (create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use + its new name. + (parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use + its new name. + * tree-ssa-loop.c: Include omp-general.h instead of omp-low.h. + (gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use + its new name. + * tree-vrp.c: Include omp-general.h instead of omp-low.h. + (extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and + get_oacc_fn_dim_size to use their new names. + * varpool.c: Include omp-offload.h instead of omp-low.h. + * gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in + ifiles. + * config/nvptx/nvptx.c: Include omp-general.c. + (nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use + its new name. + (nvptx_reorg): Likewise. + (nvptx_record_offload_symbol): Likewise. + 2016-12-14 Martin Sebor <msebor@redhat.com> PR middle-end/78786 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c7b1eaf5d62..f2c7cd017d4 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1399,6 +1399,10 @@ OBJS = \ mode-switching.o \ modulo-sched.o \ multiple_target.o \ + omp-offload.o \ + omp-expand.o \ + omp-general.o \ + omp-grid.o \ omp-low.o \ omp-simd-clone.o \ optabs.o \ @@ -2479,8 +2483,10 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/tree-scalar-evolution.c \ $(srcdir)/tree-ssa-operands.h \ $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \ + $(srcdir)/omp-offload.h \ + $(srcdir)/omp-offload.c \ + $(srcdir)/omp-expand.c \ $(srcdir)/omp-low.c \ - $(srcdir)/omp-low.h \ $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \ $(srcdir)/cgraphclones.c \ $(srcdir)/tree-phinodes.c \ diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 39a3582289f..d8bd2c383f9 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2016-12-14 Martin Jambor <mjambor@suse.cz> + + * c-omp.c: Include omp-general.h instead of omp-low.h. + (c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new + name. + 2016-12-14 Martin Sebor <msebor@redhat.com> PR c/17308 diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c index 5ccb62eea50..2b5ad04eb5f 100644 --- a/gcc/c-family/c-omp.c +++ b/gcc/c-family/c-omp.c @@ -28,7 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "c-common.h" #include "gimple-expr.h" #include "c-pragma.h" -#include "omp-low.h" +#include "omp-general.h" #include "gomp-constants.h" @@ -45,7 +45,7 @@ c_finish_oacc_wait (location_t loc, tree parms, tree clauses) vec_alloc (args, nparms + 2); stmt = builtin_decl_explicit (BUILT_IN_GOACC_WAIT); - if (find_omp_clause (clauses, OMP_CLAUSE_ASYNC)) + if (omp_find_clause (clauses, OMP_CLAUSE_ASYNC)) t = OMP_CLAUSE_ASYNC_EXPR (clauses); else t = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC); diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index f3626e28205..d10faa08c7f 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,21 @@ +2016-12-14 Martin Jambor <mjambor@suse.cz> + + * c-parser.c: Include omp-general.h and omp-offload.h instead of + omp-low.h. + (c_finish_oacc_routine): Adjusted call to + get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib + to use their new names. + (c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to + use its new name. + (c_parser_oacc_update): Likewise. + (c_parser_omp_simd): Likewise. + (c_parser_omp_target_update): Likewise. + * c-typeck.c: Include omp-general.h instead of omp-low.h. + (c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new + name. + (c_finish_omp_cancellation_point): Likewise. + * gimple-parser.c: Do not include omp-low.h + 2016-12-02 Cesar Philippidis <cesar@codesourcery.com> James Norris <jnorris@codesourcery.com> diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index f7bf9c4d9bd..a775b6064f6 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -52,7 +52,8 @@ along with GCC; see the file COPYING3. If not see #include "c-lang.h" #include "c-family/c-objc.h" #include "plugin.h" -#include "omp-low.h" +#include "omp-general.h" +#include "omp-offload.h" #include "builtins.h" #include "gomp-constants.h" #include "c-family/c-indentation.h" @@ -13922,7 +13923,7 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) clauses = c_parser_oacc_all_clauses (parser, OACC_EXIT_DATA_CLAUSE_MASK, "#pragma acc exit data"); - if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) { error_at (loc, enter ? "%<#pragma acc enter data%> has no data movement clause" @@ -14241,7 +14242,7 @@ c_finish_oacc_routine (struct oacc_routine_data *data, tree fndecl, return; } - if (get_oacc_fn_attrib (fndecl)) + if (oacc_get_fn_attrib (fndecl)) { error_at (data->loc, "%<#pragma acc routine%> already applied to %qD", fndecl); @@ -14259,8 +14260,8 @@ c_finish_oacc_routine (struct oacc_routine_data *data, tree fndecl, } /* Process the routine's dimension clauses. */ - tree dims = build_oacc_routine_dims (data->clauses); - replace_oacc_fn_attrib (fndecl, dims); + tree dims = oacc_build_routine_dims (data->clauses); + oacc_replace_fn_attrib (fndecl, dims); /* Add an "omp declare target" attribute. */ DECL_ATTRIBUTES (fndecl) @@ -14292,7 +14293,7 @@ c_parser_oacc_update (c_parser *parser) tree clauses = c_parser_oacc_all_clauses (parser, OACC_UPDATE_CLAUSE_MASK, "#pragma acc update"); - if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) { error_at (loc, "%<#pragma acc update%> must contain at least one " @@ -15263,7 +15264,7 @@ c_parser_omp_simd (location_t loc, c_parser *parser, { omp_split_clauses (loc, OMP_SIMD, mask, clauses, cclauses); clauses = cclauses[C_OMP_CLAUSE_SPLIT_SIMD]; - tree c = find_omp_clause (cclauses[C_OMP_CLAUSE_SPLIT_FOR], + tree c = omp_find_clause (cclauses[C_OMP_CLAUSE_SPLIT_FOR], OMP_CLAUSE_ORDERED); if (c && OMP_CLAUSE_ORDERED_EXPR (c)) { @@ -16107,8 +16108,8 @@ c_parser_omp_target_update (location_t loc, c_parser *parser, tree clauses = c_parser_omp_all_clauses (parser, OMP_TARGET_UPDATE_CLAUSE_MASK, "#pragma omp target update"); - if (find_omp_clause (clauses, OMP_CLAUSE_TO) == NULL_TREE - && find_omp_clause (clauses, OMP_CLAUSE_FROM) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_TO) == NULL_TREE + && omp_find_clause (clauses, OMP_CLAUSE_FROM) == NULL_TREE) { error_at (loc, "%<#pragma omp target update%> must contain at least one " diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index f0917ed788c..c134280325d 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -43,7 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-iterator.h" #include "gimplify.h" #include "tree-inline.h" -#include "omp-low.h" +#include "omp-general.h" #include "c-family/c-objc.h" #include "c-family/c-ubsan.h" #include "cilk.h" @@ -12012,13 +12012,13 @@ c_finish_omp_cancel (location_t loc, tree clauses) { tree fn = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL); int mask = 0; - if (find_omp_clause (clauses, OMP_CLAUSE_PARALLEL)) + if (omp_find_clause (clauses, OMP_CLAUSE_PARALLEL)) mask = 1; - else if (find_omp_clause (clauses, OMP_CLAUSE_FOR)) + else if (omp_find_clause (clauses, OMP_CLAUSE_FOR)) mask = 2; - else if (find_omp_clause (clauses, OMP_CLAUSE_SECTIONS)) + else if (omp_find_clause (clauses, OMP_CLAUSE_SECTIONS)) mask = 4; - else if (find_omp_clause (clauses, OMP_CLAUSE_TASKGROUP)) + else if (omp_find_clause (clauses, OMP_CLAUSE_TASKGROUP)) mask = 8; else { @@ -12027,7 +12027,7 @@ c_finish_omp_cancel (location_t loc, tree clauses) "clauses"); return; } - tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF); + tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); if (ifc != NULL_TREE) { tree type = TREE_TYPE (OMP_CLAUSE_IF_EXPR (ifc)); @@ -12051,13 +12051,13 @@ c_finish_omp_cancellation_point (location_t loc, tree clauses) { tree fn = builtin_decl_explicit (BUILT_IN_GOMP_CANCELLATION_POINT); int mask = 0; - if (find_omp_clause (clauses, OMP_CLAUSE_PARALLEL)) + if (omp_find_clause (clauses, OMP_CLAUSE_PARALLEL)) mask = 1; - else if (find_omp_clause (clauses, OMP_CLAUSE_FOR)) + else if (omp_find_clause (clauses, OMP_CLAUSE_FOR)) mask = 2; - else if (find_omp_clause (clauses, OMP_CLAUSE_SECTIONS)) + else if (omp_find_clause (clauses, OMP_CLAUSE_SECTIONS)) mask = 4; - else if (find_omp_clause (clauses, OMP_CLAUSE_TASKGROUP)) + else if (omp_find_clause (clauses, OMP_CLAUSE_TASKGROUP)) mask = 8; else { diff --git a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c index 9b6af13257f..ddecaec5128 100644 --- a/gcc/c/gimple-parser.c +++ b/gcc/c/gimple-parser.c @@ -34,7 +34,6 @@ along with GCC; see the file COPYING3. If not see #include "c-lang.h" #include "c-family/c-objc.h" #include "plugin.h" -#include "omp-low.h" #include "builtins.h" #include "gomp-constants.h" #include "c-family/c-indentation.h" diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 405a91b2604..17fe5518a86 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -55,6 +55,7 @@ #include "gimple.h" #include "stor-layout.h" #include "builtins.h" +#include "omp-general.h" #include "omp-low.h" #include "gomp-constants.h" #include "dumpfile.h" @@ -1389,7 +1390,7 @@ nvptx_expand_call (rtx retval, rtx address) if (DECL_STATIC_CHAIN (decl)) cfun->machine->has_chain = true; - tree attr = get_oacc_fn_attrib (decl); + tree attr = oacc_get_fn_attrib (decl); if (attr) { tree dims = TREE_VALUE (attr); @@ -4090,7 +4091,7 @@ nvptx_reorg (void) /* Determine launch dimensions of the function. If it is not an offloaded function (i.e. this is a regular compiler), the function has no neutering. */ - tree attr = get_oacc_fn_attrib (current_function_decl); + tree attr = oacc_get_fn_attrib (current_function_decl); if (attr) { /* If we determined this mask before RTL expansion, we could @@ -4243,7 +4244,7 @@ nvptx_record_offload_symbol (tree decl) case FUNCTION_DECL: { - tree attr = get_oacc_fn_attrib (decl); + tree attr = oacc_get_fn_attrib (decl); /* OpenMP offloading does not set this attribute. */ tree dims = attr ? TREE_VALUE (attr) : NULL_TREE; diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index a08c93a755e..60b514b389c 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,22 @@ +2016-12-14 Martin Jambor <mjambor@suse.cz> + + * parser.c: Include omp-general.h and omp-offload.h instead of + omp-low.h. + (cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new + name. + (cp_parser_omp_target_update): Likewise. + (cp_parser_oacc_declare): Likewise. + (cp_parser_oacc_enter_exit_data): Likewise. + (cp_parser_oacc_update): Likewise. + (cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib, + build_oacc_routine_dims and replace_oacc_fn_attrib to use their new + names. + * semantics.c: Include omp-general insteda of omp-low.h. + (finish_omp_for): Adjusted calls to find_omp_clause to use its new + name. + (finish_omp_cancel): Likewise. + (finish_omp_cancellation_point): Likewise. + 2016-12-14 Marek Polacek <polacek@redhat.com> PR c++/72775 diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 8ed75c70daa..e2a0a499450 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -35,8 +35,9 @@ along with GCC; see the file COPYING3. If not see #include "plugin.h" #include "tree-pretty-print.h" #include "parser.h" -#include "omp-low.h" #include "gomp-constants.h" +#include "omp-general.h" +#include "omp-offload.h" #include "c-family/c-indentation.h" #include "context.h" #include "cp-cilkplus.h" @@ -34675,7 +34676,7 @@ cp_parser_omp_simd (cp_parser *parser, cp_token *pragma_tok, { cp_omp_split_clauses (loc, OMP_SIMD, mask, clauses, cclauses); clauses = cclauses[C_OMP_CLAUSE_SPLIT_SIMD]; - tree c = find_omp_clause (cclauses[C_OMP_CLAUSE_SPLIT_FOR], + tree c = omp_find_clause (cclauses[C_OMP_CLAUSE_SPLIT_FOR], OMP_CLAUSE_ORDERED); if (c && OMP_CLAUSE_ORDERED_EXPR (c)) { @@ -35703,8 +35704,8 @@ cp_parser_omp_target_update (cp_parser *parser, cp_token *pragma_tok, tree clauses = cp_parser_omp_all_clauses (parser, OMP_TARGET_UPDATE_CLAUSE_MASK, "#pragma omp target update", pragma_tok); - if (find_omp_clause (clauses, OMP_CLAUSE_TO) == NULL_TREE - && find_omp_clause (clauses, OMP_CLAUSE_FROM) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_TO) == NULL_TREE + && omp_find_clause (clauses, OMP_CLAUSE_FROM) == NULL_TREE) { error_at (pragma_tok->location, "%<#pragma omp target update%> must contain at least one " @@ -36038,7 +36039,7 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok) "#pragma acc declare", pragma_tok, true); - if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) { error_at (pragma_tok->location, "no valid clauses specified in %<#pragma acc declare%>"); @@ -36211,7 +36212,7 @@ cp_parser_oacc_enter_exit_data (cp_parser *parser, cp_token *pragma_tok, clauses = cp_parser_oacc_all_clauses (parser, OACC_EXIT_DATA_CLAUSE_MASK, "#pragma acc exit data", pragma_tok); - if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) { error_at (loc, "%<#pragma acc %s data%> has no data movement clause", enter ? "enter" : "exit"); @@ -36385,7 +36386,7 @@ cp_parser_oacc_update (cp_parser *parser, cp_token *pragma_tok) clauses = cp_parser_oacc_all_clauses (parser, OACC_UPDATE_CLAUSE_MASK, "#pragma acc update", pragma_tok); - if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) + if (omp_find_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) { error_at (pragma_tok->location, "%<#pragma acc update%> must contain at least one " @@ -37461,7 +37462,7 @@ cp_finalize_oacc_routine (cp_parser *parser, tree fndecl, bool is_defn) return; } - if (get_oacc_fn_attrib (fndecl)) + if (oacc_get_fn_attrib (fndecl)) { error_at (parser->oacc_routine->loc, "%<#pragma acc routine%> already applied to %qD", fndecl); @@ -37479,9 +37480,9 @@ cp_finalize_oacc_routine (cp_parser *parser, tree fndecl, bool is_defn) } /* Process the routine's dimension clauses. */ - tree dims = build_oacc_routine_dims (parser->oacc_routine->clauses); - replace_oacc_fn_attrib (fndecl, dims); - + tree dims = oacc_build_routine_dims (parser->oacc_routine->clauses); + oacc_replace_fn_attrib (fndecl, dims); + /* Add an "omp declare target" attribute. */ DECL_ATTRIBUTES (fndecl) = tree_cons (get_identifier ("omp declare target"), diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 389e7f1b25b..a41bc73ed30 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -38,7 +38,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "intl.h" #include "tree-iterator.h" -#include "omp-low.h" +#include "omp-general.h" #include "convert.h" #include "gomp-constants.h" @@ -8001,7 +8001,7 @@ finish_omp_for (location_t locus, enum tree_code code, tree declv, gcc_assert (TREE_VEC_LENGTH (declv) == TREE_VEC_LENGTH (incrv)); if (TREE_VEC_LENGTH (declv) > 1) { - tree c = find_omp_clause (clauses, OMP_CLAUSE_COLLAPSE); + tree c = omp_find_clause (clauses, OMP_CLAUSE_COLLAPSE); if (c) collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (c)); if (collapse != TREE_VEC_LENGTH (declv)) @@ -8264,8 +8264,8 @@ finish_omp_for (location_t locus, enum tree_code code, tree declv, step at this point, fill it in. */ if (code == OMP_SIMD && !processing_template_decl && TREE_VEC_LENGTH (OMP_FOR_INCR (omp_for)) == 1) - for (tree c = find_omp_clause (clauses, OMP_CLAUSE_LINEAR); c; - c = find_omp_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE_LINEAR)) + for (tree c = omp_find_clause (clauses, OMP_CLAUSE_LINEAR); c; + c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE_LINEAR)) if (OMP_CLAUSE_LINEAR_STEP (c) == NULL_TREE) { decl = TREE_OPERAND (TREE_VEC_ELT (OMP_FOR_INIT (omp_for), 0), 0); @@ -8586,13 +8586,13 @@ finish_omp_cancel (tree clauses) { tree fn = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL); int mask = 0; - if (find_omp_clause (clauses, OMP_CLAUSE_PARALLEL)) + if (omp_find_clause (clauses, OMP_CLAUSE_PARALLEL)) mask = 1; - else if (find_omp_clause (clauses, OMP_CLAUSE_FOR)) + else if (omp_find_clause (clauses, OMP_CLAUSE_FOR)) mask = 2; - else if (find_omp_clause (clauses, OMP_CLAUSE_SECTIONS)) + else if (omp_find_clause (clauses, OMP_CLAUSE_SECTIONS)) mask = 4; - else if (find_omp_clause (clauses, OMP_CLAUSE_TASKGROUP)) + else if (omp_find_clause (clauses, OMP_CLAUSE_TASKGROUP)) mask = 8; else { @@ -8601,7 +8601,7 @@ finish_omp_cancel (tree clauses) return; } vec<tree, va_gc> *vec = make_tree_vector (); - tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF); + tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); if (ifc != NULL_TREE) { tree type = TREE_TYPE (OMP_CLAUSE_IF_EXPR (ifc)); @@ -8623,13 +8623,13 @@ finish_omp_cancellation_point (tree clauses) { tree fn = builtin_decl_explicit (BUILT_IN_GOMP_CANCELLATION_POINT); int mask = 0; - if (find_omp_clause (clauses, OMP_CLAUSE_PARALLEL)) + if (omp_find_clause (clauses, OMP_CLAUSE_PARALLEL)) mask = 1; - else if (find_omp_clause (clauses, OMP_CLAUSE_FOR)) + else if (omp_find_clause (clauses, OMP_CLAUSE_FOR)) mask = 2; - else if (find_omp_clause (clauses, OMP_CLAUSE_SECTIONS)) + else if (omp_find_clause (clauses, OMP_CLAUSE_SECTIONS)) mask = 4; - else if (find_omp_clause (clauses, OMP_CLAUSE_TASKGROUP)) + else if (omp_find_clause (clauses, OMP_CLAUSE_TASKGROUP)) mask = 8; else { diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index c7acf44c884..17bc404583f 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,7 @@ +2016-12-14 Martin Jambor <mjambor@suse.cz> + + * trans-openmp.c: Include omp-general.h. + 2016-12-14 Andre Vehreschild <vehre@gcc.gnu.org> PR fortran/78780 diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 6bc2dcdbaeb..53f92b049ab 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -35,8 +35,9 @@ along with GCC; see the file COPYING3. If not see #include "trans-array.h" #include "trans-const.h" #include "arith.h" -#include "omp-low.h" #include "gomp-constants.h" +#include "omp-general.h" +#include "omp-low.h" int ompws_flags; diff --git a/gcc/gengtype.c b/gcc/gengtype.c index a5795473b73..dcc2ff5c358 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -1719,7 +1719,7 @@ open_base_files (void) "tree-dfa.h", "tree-ssa.h", "reload.h", "cpp-id-data.h", "tree-chrec.h", "except.h", "output.h", "cfgloop.h", "target.h", "lto-streamer.h", "target-globals.h", "ipa-ref.h", "cgraph.h", "symbol-summary.h", - "ipa-prop.h", "ipa-inline.h", "dwarf2out.h", "omp-low.h", NULL + "ipa-prop.h", "ipa-inline.h", "dwarf2out.h", "omp-offload.h", NULL }; const char *const *ifp; outf_p gtype_desc_c; diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index d00625bec88..9c86f158503 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -52,7 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-match.h" #include "gomp-constants.h" #include "optabs-query.h" -#include "omp-low.h" +#include "omp-general.h" #include "ipa-chkp.h" #include "tree-cfg.h" #include "fold-const-call.h" @@ -3416,8 +3416,8 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi) static tree fold_internal_goacc_dim (const gimple *call) { - int axis = get_oacc_ifn_dim_arg (call); - int size = get_oacc_fn_dim_size (current_function_decl, axis); + int axis = oacc_get_ifn_dim_arg (call); + int size = oacc_get_fn_dim_size (current_function_decl, axis); bool is_pos = gimple_call_internal_fn (call) == IFN_GOACC_DIM_POS; tree result = NULL_TREE; diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 87615a95fb5..a3001331181 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "tree-cfg.h" #include "tree-ssa.h" +#include "omp-general.h" #include "omp-low.h" #include "gimple-low.h" #include "cilk.h" @@ -6959,7 +6960,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) if (gimplify_omp_ctxp->outer_context == NULL && VAR_P (decl) - && get_oacc_fn_attrib (current_function_decl)) + && oacc_get_fn_attrib (current_function_decl)) { location_t loc = DECL_SOURCE_LOCATION (decl); @@ -9314,7 +9315,7 @@ gimplify_omp_task (tree *expr_p, gimple_seq *pre_p) gimple_seq body = NULL; gimplify_scan_omp_clauses (&OMP_TASK_CLAUSES (expr), pre_p, - find_omp_clause (OMP_TASK_CLAUSES (expr), + omp_find_clause (OMP_TASK_CLAUSES (expr), OMP_CLAUSE_UNTIED) ? ORT_UNTIED_TASK : ORT_TASK, OMP_TASK); @@ -9390,7 +9391,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) ort = ORT_ACC; break; case OMP_TASKLOOP: - if (find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_UNTIED)) + if (omp_find_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_UNTIED)) ort = ORT_UNTIED_TASK; else ort = ORT_TASK; @@ -9555,7 +9556,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt))); - tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED); + tree c = omp_find_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED); bool is_doacross = false; if (c && OMP_CLAUSE_ORDERED_EXPR (c)) { @@ -9565,7 +9566,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) * 2); } int collapse = 1; - c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_COLLAPSE); + c = omp_find_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_COLLAPSE); if (c) collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (c)); for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++) diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 26420411b0d..947d0877539 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -36,7 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "pass_manager.h" #include "ipa-utils.h" -#include "omp-low.h" +#include "omp-offload.h" #include "ipa-chkp.h" /* True when asm nodes has been output. */ diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c new file mode 100644 index 00000000000..a953c8b0750 --- /dev/null +++ b/gcc/omp-expand.c @@ -0,0 +1,8195 @@ +/* Expansion pass for OMP directives. Outlines regions of certain OMP + directives to separate functions, converts others into explicit calls to the + runtime library (libgomp) and so forth + +Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "memmodel.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "cfghooks.h" +#include "tree-pass.h" +#include "ssa.h" +#include "optabs.h" +#include "cgraph.h" +#include "pretty-print.h" +#include "diagnostic-core.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "cfganal.h" +#include "internal-fn.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimplify-me.h" +#include "gimple-walk.h" +#include "tree-cfg.h" +#include "tree-into-ssa.h" +#include "tree-ssa.h" +#include "splay-tree.h" +#include "cfgloop.h" +#include "omp-general.h" +#include "omp-offload.h" +#include "tree-cfgcleanup.h" +#include "symbol-summary.h" +#include "cilk.h" +#include "gomp-constants.h" +#include "gimple-pretty-print.h" +#include "hsa.h" + + +/* OMP region information. Every parallel and workshare + directive is enclosed between two markers, the OMP_* directive + and a corresponding GIMPLE_OMP_RETURN statement. */ + +struct omp_region +{ + /* The enclosing region. */ + struct omp_region *outer; + + /* First child region. */ + struct omp_region *inner; + + /* Next peer region. */ + struct omp_region *next; + + /* Block containing the omp directive as its last stmt. */ + basic_block entry; + + /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ + basic_block exit; + + /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ + basic_block cont; + + /* If this is a combined parallel+workshare region, this is a list + of additional arguments needed by the combined parallel+workshare + library call. */ + vec<tree, va_gc> *ws_args; + + /* The code for the omp directive of this region. */ + enum gimple_code type; + + /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ + enum omp_clause_schedule_kind sched_kind; + + /* Schedule modifiers. */ + unsigned char sched_modifiers; + + /* True if this is a combined parallel+workshare region. */ + bool is_combined_parallel; + + /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has + a depend clause. */ + gomp_ordered *ord_stmt; +}; + +static struct omp_region *root_omp_region; +static bool omp_any_child_fn_dumped; + +static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, + bool = false); +static gphi *find_phi_with_arg_on_edge (tree, edge); +static void expand_omp (struct omp_region *region); + +/* Return true if REGION is a combined parallel+workshare region. */ + +static inline bool +is_combined_parallel (struct omp_region *region) +{ + return region->is_combined_parallel; +} + +/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB + is the immediate dominator of PAR_ENTRY_BB, return true if there + are no data dependencies that would prevent expanding the parallel + directive at PAR_ENTRY_BB as a combined parallel+workshare region. + + When expanding a combined parallel+workshare region, the call to + the child function may need additional arguments in the case of + GIMPLE_OMP_FOR regions. In some cases, these arguments are + computed out of variables passed in from the parent to the child + via 'struct .omp_data_s'. For instance: + + #pragma omp parallel for schedule (guided, i * 4) + for (j ...) + + Is lowered into: + + # BLOCK 2 (PAR_ENTRY_BB) + .omp_data_o.i = i; + #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) + + # BLOCK 3 (WS_ENTRY_BB) + .omp_data_i = &.omp_data_o; + D.1667 = .omp_data_i->i; + D.1598 = D.1667 * 4; + #pragma omp for schedule (guided, D.1598) + + When we outline the parallel region, the call to the child function + 'bar.omp_fn.0' will need the value D.1598 in its argument list, but + that value is computed *after* the call site. So, in principle we + cannot do the transformation. + + To see whether the code in WS_ENTRY_BB blocks the combined + parallel+workshare call, we collect all the variables used in the + GIMPLE_OMP_FOR header check whether they appear on the LHS of any + statement in WS_ENTRY_BB. If so, then we cannot emit the combined + call. + + FIXME. If we had the SSA form built at this point, we could merely + hoist the code in block 3 into block 2 and be done with it. But at + this point we don't have dataflow information and though we could + hack something up here, it is really not worth the aggravation. */ + +static bool +workshare_safe_to_combine_p (basic_block ws_entry_bb) +{ + struct omp_for_data fd; + gimple *ws_stmt = last_stmt (ws_entry_bb); + + if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) + return true; + + gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); + + omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); + + if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) + return false; + if (fd.iter_type != long_integer_type_node) + return false; + + /* FIXME. We give up too easily here. If any of these arguments + are not constants, they will likely involve variables that have + been mapped into fields of .omp_data_s for sharing with the child + function. With appropriate data flow, it would be possible to + see through this. */ + if (!is_gimple_min_invariant (fd.loop.n1) + || !is_gimple_min_invariant (fd.loop.n2) + || !is_gimple_min_invariant (fd.loop.step) + || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) + return false; + + return true; +} + +/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier + presence (SIMD_SCHEDULE). */ + +static tree +omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) +{ + if (!simd_schedule) + return chunk_size; + + int vf = omp_max_vf (); + if (vf == 1) + return chunk_size; + + tree type = TREE_TYPE (chunk_size); + chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, + build_int_cst (type, vf - 1)); + return fold_build2 (BIT_AND_EXPR, type, chunk_size, + build_int_cst (type, -vf)); +} + +/* Collect additional arguments needed to emit a combined + parallel+workshare call. WS_STMT is the workshare directive being + expanded. */ + +static vec<tree, va_gc> * +get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) +{ + tree t; + location_t loc = gimple_location (ws_stmt); + vec<tree, va_gc> *ws_args; + + if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) + { + struct omp_for_data fd; + tree n1, n2; + + omp_extract_for_data (for_stmt, &fd, NULL); + n1 = fd.loop.n1; + n2 = fd.loop.n2; + + if (gimple_omp_for_combined_into_p (for_stmt)) + { + tree innerc + = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + + vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); + + t = fold_convert_loc (loc, long_integer_type_node, n1); + ws_args->quick_push (t); + + t = fold_convert_loc (loc, long_integer_type_node, n2); + ws_args->quick_push (t); + + t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); + ws_args->quick_push (t); + + if (fd.chunk_size) + { + t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); + t = omp_adjust_chunk_size (t, fd.simd_schedule); + ws_args->quick_push (t); + } + + return ws_args; + } + else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) + { + /* Number of sections is equal to the number of edges from the + GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to + the exit of the sections region. */ + basic_block bb = single_succ (gimple_bb (ws_stmt)); + t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); + vec_alloc (ws_args, 1); + ws_args->quick_push (t); + return ws_args; + } + + gcc_unreachable (); +} + +/* Discover whether REGION is a combined parallel+workshare region. */ + +static void +determine_parallel_type (struct omp_region *region) +{ + basic_block par_entry_bb, par_exit_bb; + basic_block ws_entry_bb, ws_exit_bb; + + if (region == NULL || region->inner == NULL + || region->exit == NULL || region->inner->exit == NULL + || region->inner->cont == NULL) + return; + + /* We only support parallel+for and parallel+sections. */ + if (region->type != GIMPLE_OMP_PARALLEL + || (region->inner->type != GIMPLE_OMP_FOR + && region->inner->type != GIMPLE_OMP_SECTIONS)) + return; + + /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and + WS_EXIT_BB -> PAR_EXIT_BB. */ + par_entry_bb = region->entry; + par_exit_bb = region->exit; + ws_entry_bb = region->inner->entry; + ws_exit_bb = region->inner->exit; + + if (single_succ (par_entry_bb) == ws_entry_bb + && single_succ (ws_exit_bb) == par_exit_bb + && workshare_safe_to_combine_p (ws_entry_bb) + && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) + || (last_and_only_stmt (ws_entry_bb) + && last_and_only_stmt (par_exit_bb)))) + { + gimple *par_stmt = last_stmt (par_entry_bb); + gimple *ws_stmt = last_stmt (ws_entry_bb); + + if (region->inner->type == GIMPLE_OMP_FOR) + { + /* If this is a combined parallel loop, we need to determine + whether or not to use the combined library calls. There + are two cases where we do not apply the transformation: + static loops and any kind of ordered loop. In the first + case, we already open code the loop so there is no need + to do anything else. In the latter case, the combined + parallel loop call would still need extra synchronization + to implement ordered semantics, so there would not be any + gain in using the combined call. */ + tree clauses = gimple_omp_for_clauses (ws_stmt); + tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); + if (c == NULL + || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) + == OMP_CLAUSE_SCHEDULE_STATIC) + || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) + { + region->is_combined_parallel = false; + region->inner->is_combined_parallel = false; + return; + } + } + + region->is_combined_parallel = true; + region->inner->is_combined_parallel = true; + region->ws_args = get_ws_args_for (par_stmt, ws_stmt); + } +} + +/* Debugging dumps for parallel regions. */ +void dump_omp_region (FILE *, struct omp_region *, int); +void debug_omp_region (struct omp_region *); +void debug_all_omp_regions (void); + +/* Dump the parallel region tree rooted at REGION. */ + +void +dump_omp_region (FILE *file, struct omp_region *region, int indent) +{ + fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, + gimple_code_name[region->type]); + + if (region->inner) + dump_omp_region (file, region->inner, indent + 4); + + if (region->cont) + { + fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", + region->cont->index); + } + + if (region->exit) + fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", + region->exit->index); + else + fprintf (file, "%*s[no exit marker]\n", indent, ""); + + if (region->next) + dump_omp_region (file, region->next, indent); +} + +DEBUG_FUNCTION void +debug_omp_region (struct omp_region *region) +{ + dump_omp_region (stderr, region, 0); +} + +DEBUG_FUNCTION void +debug_all_omp_regions (void) +{ + dump_omp_region (stderr, root_omp_region, 0); +} + +/* Create a new parallel region starting at STMT inside region PARENT. */ + +static struct omp_region * +new_omp_region (basic_block bb, enum gimple_code type, + struct omp_region *parent) +{ + struct omp_region *region = XCNEW (struct omp_region); + + region->outer = parent; + region->entry = bb; + region->type = type; + + if (parent) + { + /* This is a nested region. Add it to the list of inner + regions in PARENT. */ + region->next = parent->inner; + parent->inner = region; + } + else + { + /* This is a toplevel region. Add it to the list of toplevel + regions in ROOT_OMP_REGION. */ + region->next = root_omp_region; + root_omp_region = region; + } + + return region; +} + +/* Release the memory associated with the region tree rooted at REGION. */ + +static void +free_omp_region_1 (struct omp_region *region) +{ + struct omp_region *i, *n; + + for (i = region->inner; i ; i = n) + { + n = i->next; + free_omp_region_1 (i); + } + + free (region); +} + +/* Release the memory for the entire omp region tree. */ + +void +omp_free_regions (void) +{ + struct omp_region *r, *n; + for (r = root_omp_region; r ; r = n) + { + n = r->next; + free_omp_region_1 (r); + } + root_omp_region = NULL; +} + +/* A convenience function to build an empty GIMPLE_COND with just the + condition. */ + +static gcond * +gimple_build_cond_empty (tree cond) +{ + enum tree_code pred_code; + tree lhs, rhs; + + gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); + return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); +} + +/* Return true if a parallel REGION is within a declare target function or + within a target region and is not a part of a gridified target. */ + +static bool +parallel_needs_hsa_kernel_p (struct omp_region *region) +{ + bool indirect = false; + for (region = region->outer; region; region = region->outer) + { + if (region->type == GIMPLE_OMP_PARALLEL) + indirect = true; + else if (region->type == GIMPLE_OMP_TARGET) + { + gomp_target *tgt_stmt + = as_a <gomp_target *> (last_stmt (region->entry)); + + if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + return indirect; + else + return true; + } + } + + if (lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (current_function_decl))) + return true; + + return false; +} + +/* Build the function calls to GOMP_parallel_start etc to actually + generate the parallel operation. REGION is the parallel region + being expanded. BB is the block where to insert the code. WS_ARGS + will be set if this is a call to a combined parallel+workshare + construct, it contains the list of additional arguments needed by + the workshare construct. */ + +static void +expand_parallel_call (struct omp_region *region, basic_block bb, + gomp_parallel *entry_stmt, + vec<tree, va_gc> *ws_args) +{ + tree t, t1, t2, val, cond, c, clauses, flags; + gimple_stmt_iterator gsi; + gimple *stmt; + enum built_in_function start_ix; + int start_ix2; + location_t clause_loc; + vec<tree, va_gc> *args; + + clauses = gimple_omp_parallel_clauses (entry_stmt); + + /* Determine what flavor of GOMP_parallel we will be + emitting. */ + start_ix = BUILT_IN_GOMP_PARALLEL; + if (is_combined_parallel (region)) + { + switch (region->inner->type) + { + case GIMPLE_OMP_FOR: + gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); + switch (region->inner->sched_kind) + { + case OMP_CLAUSE_SCHEDULE_RUNTIME: + start_ix2 = 3; + break; + case OMP_CLAUSE_SCHEDULE_DYNAMIC: + case OMP_CLAUSE_SCHEDULE_GUIDED: + if (region->inner->sched_modifiers + & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) + { + start_ix2 = 3 + region->inner->sched_kind; + break; + } + /* FALLTHRU */ + default: + start_ix2 = region->inner->sched_kind; + break; + } + start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; + start_ix = (enum built_in_function) start_ix2; + break; + case GIMPLE_OMP_SECTIONS: + start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; + break; + default: + gcc_unreachable (); + } + } + + /* By default, the value of NUM_THREADS is zero (selected at run time) + and there is no conditional. */ + cond = NULL_TREE; + val = build_int_cst (unsigned_type_node, 0); + flags = build_int_cst (unsigned_type_node, 0); + + c = omp_find_clause (clauses, OMP_CLAUSE_IF); + if (c) + cond = OMP_CLAUSE_IF_EXPR (c); + + c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); + if (c) + { + val = OMP_CLAUSE_NUM_THREADS_EXPR (c); + clause_loc = OMP_CLAUSE_LOCATION (c); + } + else + clause_loc = gimple_location (entry_stmt); + + c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); + if (c) + flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); + + /* Ensure 'val' is of the correct type. */ + val = fold_convert_loc (clause_loc, unsigned_type_node, val); + + /* If we found the clause 'if (cond)', build either + (cond != 0) or (cond ? val : 1u). */ + if (cond) + { + cond = gimple_boolify (cond); + + if (integer_zerop (val)) + val = fold_build2_loc (clause_loc, + EQ_EXPR, unsigned_type_node, cond, + build_int_cst (TREE_TYPE (cond), 0)); + else + { + basic_block cond_bb, then_bb, else_bb; + edge e, e_then, e_else; + tree tmp_then, tmp_else, tmp_join, tmp_var; + + tmp_var = create_tmp_var (TREE_TYPE (val)); + if (gimple_in_ssa_p (cfun)) + { + tmp_then = make_ssa_name (tmp_var); + tmp_else = make_ssa_name (tmp_var); + tmp_join = make_ssa_name (tmp_var); + } + else + { + tmp_then = tmp_var; + tmp_else = tmp_var; + tmp_join = tmp_var; + } + + e = split_block_after_labels (bb); + cond_bb = e->src; + bb = e->dest; + remove_edge (e); + + then_bb = create_empty_bb (cond_bb); + else_bb = create_empty_bb (then_bb); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); + + stmt = gimple_build_cond_empty (cond); + gsi = gsi_start_bb (cond_bb); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (then_bb); + expand_omp_build_assign (&gsi, tmp_then, val, true); + + gsi = gsi_start_bb (else_bb); + expand_omp_build_assign (&gsi, tmp_else, + build_int_cst (unsigned_type_node, 1), + true); + + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); + add_bb_to_loop (then_bb, cond_bb->loop_father); + add_bb_to_loop (else_bb, cond_bb->loop_father); + e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); + e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); + + if (gimple_in_ssa_p (cfun)) + { + gphi *phi = create_phi_node (tmp_join, bb); + add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); + add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); + } + + val = tmp_join; + } + + gsi = gsi_start_bb (bb); + val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + } + + gsi = gsi_last_bb (bb); + t = gimple_omp_parallel_data_arg (entry_stmt); + if (t == NULL) + t1 = null_pointer_node; + else + t1 = build_fold_addr_expr (t); + tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); + t2 = build_fold_addr_expr (child_fndecl); + + vec_alloc (args, 4 + vec_safe_length (ws_args)); + args->quick_push (t2); + args->quick_push (t1); + args->quick_push (val); + if (ws_args) + args->splice (*ws_args); + args->quick_push (flags); + + t = build_call_expr_loc_vec (UNKNOWN_LOCATION, + builtin_decl_explicit (start_ix), args); + + force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + if (hsa_gen_requested_p () + && parallel_needs_hsa_kernel_p (region)) + { + cgraph_node *child_cnode = cgraph_node::get (child_fndecl); + hsa_register_kernel (child_cnode); + } +} + +/* Insert a function call whose name is FUNC_NAME with the information from + ENTRY_STMT into the basic_block BB. */ + +static void +expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt, + vec <tree, va_gc> *ws_args) +{ + tree t, t1, t2; + gimple_stmt_iterator gsi; + vec <tree, va_gc> *args; + + gcc_assert (vec_safe_length (ws_args) == 2); + tree func_name = (*ws_args)[0]; + tree grain = (*ws_args)[1]; + + tree clauses = gimple_omp_parallel_clauses (entry_stmt); + tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_); + gcc_assert (count != NULL_TREE); + count = OMP_CLAUSE_OPERAND (count, 0); + + gsi = gsi_last_bb (bb); + t = gimple_omp_parallel_data_arg (entry_stmt); + if (t == NULL) + t1 = null_pointer_node; + else + t1 = build_fold_addr_expr (t); + t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); + + vec_alloc (args, 4); + args->quick_push (t2); + args->quick_push (t1); + args->quick_push (count); + args->quick_push (grain); + t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args); + + force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, + GSI_CONTINUE_LINKING); +} + +/* Build the function call to GOMP_task to actually + generate the task operation. BB is the block where to insert the code. */ + +static void +expand_task_call (struct omp_region *region, basic_block bb, + gomp_task *entry_stmt) +{ + tree t1, t2, t3; + gimple_stmt_iterator gsi; + location_t loc = gimple_location (entry_stmt); + + tree clauses = gimple_omp_task_clauses (entry_stmt); + + tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); + tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); + tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); + tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); + tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); + tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); + + unsigned int iflags + = (untied ? GOMP_TASK_FLAG_UNTIED : 0) + | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) + | (depend ? GOMP_TASK_FLAG_DEPEND : 0); + + bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); + tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; + tree num_tasks = NULL_TREE; + bool ull = false; + if (taskloop_p) + { + gimple *g = last_stmt (region->outer->entry); + gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); + struct omp_for_data fd; + omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); + startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), + OMP_CLAUSE__LOOPTEMP_); + startvar = OMP_CLAUSE_DECL (startvar); + endvar = OMP_CLAUSE_DECL (endvar); + step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); + if (fd.loop.cond_code == LT_EXPR) + iflags |= GOMP_TASK_FLAG_UP; + tree tclauses = gimple_omp_for_clauses (g); + num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); + if (num_tasks) + num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); + else + { + num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); + if (num_tasks) + { + iflags |= GOMP_TASK_FLAG_GRAINSIZE; + num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); + } + else + num_tasks = integer_zero_node; + } + num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); + if (ifc == NULL_TREE) + iflags |= GOMP_TASK_FLAG_IF; + if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) + iflags |= GOMP_TASK_FLAG_NOGROUP; + ull = fd.iter_type == long_long_unsigned_type_node; + } + else if (priority) + iflags |= GOMP_TASK_FLAG_PRIORITY; + + tree flags = build_int_cst (unsigned_type_node, iflags); + + tree cond = boolean_true_node; + if (ifc) + { + if (taskloop_p) + { + tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); + t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, + build_int_cst (unsigned_type_node, + GOMP_TASK_FLAG_IF), + build_int_cst (unsigned_type_node, 0)); + flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, + flags, t); + } + else + cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); + } + + if (finalc) + { + tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); + t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, + build_int_cst (unsigned_type_node, + GOMP_TASK_FLAG_FINAL), + build_int_cst (unsigned_type_node, 0)); + flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); + } + if (depend) + depend = OMP_CLAUSE_DECL (depend); + else + depend = build_int_cst (ptr_type_node, 0); + if (priority) + priority = fold_convert (integer_type_node, + OMP_CLAUSE_PRIORITY_EXPR (priority)); + else + priority = integer_zero_node; + + gsi = gsi_last_bb (bb); + tree t = gimple_omp_task_data_arg (entry_stmt); + if (t == NULL) + t2 = null_pointer_node; + else + t2 = build_fold_addr_expr_loc (loc, t); + t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); + t = gimple_omp_task_copy_fn (entry_stmt); + if (t == NULL) + t3 = null_pointer_node; + else + t3 = build_fold_addr_expr_loc (loc, t); + + if (taskloop_p) + t = build_call_expr (ull + ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) + : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), + 11, t1, t2, t3, + gimple_omp_task_arg_size (entry_stmt), + gimple_omp_task_arg_align (entry_stmt), flags, + num_tasks, priority, startvar, endvar, step); + else + t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), + 9, t1, t2, t3, + gimple_omp_task_arg_size (entry_stmt), + gimple_omp_task_arg_align (entry_stmt), cond, flags, + depend, priority); + + force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); +} + +/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ + +static tree +vec2chain (vec<tree, va_gc> *v) +{ + tree chain = NULL_TREE, t; + unsigned ix; + + FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) + { + DECL_CHAIN (t) = chain; + chain = t; + } + + return chain; +} + +/* Remove barriers in REGION->EXIT's block. Note that this is only + valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region + is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that + left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be + removed. */ + +static void +remove_exit_barrier (struct omp_region *region) +{ + gimple_stmt_iterator gsi; + basic_block exit_bb; + edge_iterator ei; + edge e; + gimple *stmt; + int any_addressable_vars = -1; + + exit_bb = region->exit; + + /* If the parallel region doesn't return, we don't have REGION->EXIT + block at all. */ + if (! exit_bb) + return; + + /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The + workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of + statements that can appear in between are extremely limited -- no + memory operations at all. Here, we allow nothing at all, so the + only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ + gsi = gsi_last_bb (exit_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gsi_prev (&gsi); + if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) + return; + + FOR_EACH_EDGE (e, ei, exit_bb->preds) + { + gsi = gsi_last_bb (e->src); + if (gsi_end_p (gsi)) + continue; + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_OMP_RETURN + && !gimple_omp_return_nowait_p (stmt)) + { + /* OpenMP 3.0 tasks unfortunately prevent this optimization + in many cases. If there could be tasks queued, the barrier + might be needed to let the tasks run before some local + variable of the parallel that the task uses as shared + runs out of scope. The task can be spawned either + from within current function (this would be easy to check) + or from some function it calls and gets passed an address + of such a variable. */ + if (any_addressable_vars < 0) + { + gomp_parallel *parallel_stmt + = as_a <gomp_parallel *> (last_stmt (region->entry)); + tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); + tree local_decls, block, decl; + unsigned ix; + + any_addressable_vars = 0; + FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) + if (TREE_ADDRESSABLE (decl)) + { + any_addressable_vars = 1; + break; + } + for (block = gimple_block (stmt); + !any_addressable_vars + && block + && TREE_CODE (block) == BLOCK; + block = BLOCK_SUPERCONTEXT (block)) + { + for (local_decls = BLOCK_VARS (block); + local_decls; + local_decls = DECL_CHAIN (local_decls)) + if (TREE_ADDRESSABLE (local_decls)) + { + any_addressable_vars = 1; + break; + } + if (block == gimple_block (parallel_stmt)) + break; + } + } + if (!any_addressable_vars) + gimple_omp_return_set_nowait (stmt); + } + } +} + +static void +remove_exit_barriers (struct omp_region *region) +{ + if (region->type == GIMPLE_OMP_PARALLEL) + remove_exit_barrier (region); + + if (region->inner) + { + region = region->inner; + remove_exit_barriers (region); + while (region->next) + { + region = region->next; + remove_exit_barriers (region); + } + } +} + +/* Optimize omp_get_thread_num () and omp_get_num_threads () + calls. These can't be declared as const functions, but + within one parallel body they are constant, so they can be + transformed there into __builtin_omp_get_{thread_num,num_threads} () + which are declared const. Similarly for task body, except + that in untied task omp_get_thread_num () can change at any task + scheduling point. */ + +static void +optimize_omp_library_calls (gimple *entry_stmt) +{ + basic_block bb; + gimple_stmt_iterator gsi; + tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); + tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); + bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK + && omp_find_clause (gimple_omp_task_clauses (entry_stmt), + OMP_CLAUSE_UNTIED) != NULL); + + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *call = gsi_stmt (gsi); + tree decl; + + if (is_gimple_call (call) + && (decl = gimple_call_fndecl (call)) + && DECL_EXTERNAL (decl) + && TREE_PUBLIC (decl) + && DECL_INITIAL (decl) == NULL) + { + tree built_in; + + if (DECL_NAME (decl) == thr_num_id) + { + /* In #pragma omp task untied omp_get_thread_num () can change + during the execution of the task region. */ + if (untied_task) + continue; + built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + } + else if (DECL_NAME (decl) == num_thr_id) + built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + else + continue; + + if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) + || gimple_call_num_args (call) != 0) + continue; + + if (flag_exceptions && !TREE_NOTHROW (decl)) + continue; + + if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE + || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), + TREE_TYPE (TREE_TYPE (built_in)))) + continue; + + gimple_call_set_fndecl (call, built_in); + } + } +} + +/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be + regimplified. */ + +static tree +expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) +{ + tree t = *tp; + + /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ + if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) + return t; + + if (TREE_CODE (t) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (t); + + *walk_subtrees = !TYPE_P (t) && !DECL_P (t); + return NULL_TREE; +} + +/* Prepend or append TO = FROM assignment before or after *GSI_P. */ + +static void +expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, + bool after) +{ + bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); + from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, + !after, after ? GSI_CONTINUE_LINKING + : GSI_SAME_STMT); + gimple *stmt = gimple_build_assign (to, from); + if (after) + gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); + else + gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); + if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) + || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gimple_regimplify_operands (stmt, &gsi); + } +} + +/* Expand the OpenMP parallel or task directive starting at REGION. */ + +static void +expand_omp_taskreg (struct omp_region *region) +{ + basic_block entry_bb, exit_bb, new_bb; + struct function *child_cfun; + tree child_fn, block, t; + gimple_stmt_iterator gsi; + gimple *entry_stmt, *stmt; + edge e; + vec<tree, va_gc> *ws_args; + + entry_stmt = last_stmt (region->entry); + child_fn = gimple_omp_taskreg_child_fn (entry_stmt); + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + + entry_bb = region->entry; + if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) + exit_bb = region->cont; + else + exit_bb = region->exit; + + bool is_cilk_for + = (flag_cilkplus + && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL + && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt), + OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE); + + if (is_cilk_for) + /* If it is a _Cilk_for statement, it is modelled *like* a parallel for, + and the inner statement contains the name of the built-in function + and grain. */ + ws_args = region->inner->ws_args; + else if (is_combined_parallel (region)) + ws_args = region->ws_args; + else + ws_args = NULL; + + if (child_cfun->cfg) + { + /* Due to inlining, it may happen that we have already outlined + the region, in which case all we need to do is make the + sub-graph unreachable and emit the parallel call. */ + edge entry_succ_e, exit_succ_e; + + entry_succ_e = single_succ_edge (entry_bb); + + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL + || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); + gsi_remove (&gsi, true); + + new_bb = entry_bb; + if (exit_bb) + { + exit_succ_e = single_succ_edge (exit_bb); + make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); + } + remove_edge_and_dominated_blocks (entry_succ_e); + } + else + { + unsigned srcidx, dstidx, num; + + /* If the parallel region needs data sent from the parent + function, then the very first statement (except possible + tree profile counter updates) of the parallel body + is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since + &.OMP_DATA_O is passed as an argument to the child function, + we need to replace it with the argument as seen by the child + function. + + In most cases, this will end up being the identity assignment + .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had + a function call that has been inlined, the original PARM_DECL + .OMP_DATA_I may have been converted into a different local + variable. In which case, we need to keep the assignment. */ + if (gimple_omp_taskreg_data_arg (entry_stmt)) + { + basic_block entry_succ_bb + = single_succ_p (entry_bb) ? single_succ (entry_bb) + : FALLTHRU_EDGE (entry_bb)->dest; + tree arg; + gimple *parcopy_stmt = NULL; + + for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) + { + gimple *stmt; + + gcc_assert (!gsi_end_p (gsi)); + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + + if (gimple_num_ops (stmt) == 2) + { + tree arg = gimple_assign_rhs1 (stmt); + + /* We're ignore the subcode because we're + effectively doing a STRIP_NOPS. */ + + if (TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) + == gimple_omp_taskreg_data_arg (entry_stmt)) + { + parcopy_stmt = stmt; + break; + } + } + } + + gcc_assert (parcopy_stmt != NULL); + arg = DECL_ARGUMENTS (child_fn); + + if (!gimple_in_ssa_p (cfun)) + { + if (gimple_assign_lhs (parcopy_stmt) == arg) + gsi_remove (&gsi, true); + else + { + /* ?? Is setting the subcode really necessary ?? */ + gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); + gimple_assign_set_rhs1 (parcopy_stmt, arg); + } + } + else + { + tree lhs = gimple_assign_lhs (parcopy_stmt); + gcc_assert (SSA_NAME_VAR (lhs) == arg); + /* We'd like to set the rhs to the default def in the child_fn, + but it's too early to create ssa names in the child_fn. + Instead, we set the rhs to the parm. In + move_sese_region_to_fn, we introduce a default def for the + parm, map the parm to it's default def, and once we encounter + this stmt, replace the parm with the default def. */ + gimple_assign_set_rhs1 (parcopy_stmt, arg); + update_stmt (parcopy_stmt); + } + } + + /* Declare local variables needed in CHILD_CFUN. */ + block = DECL_INITIAL (child_fn); + BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); + /* The gimplifier could record temporaries in parallel/task block + rather than in containing function's local_decls chain, + which would mean cgraph missed finalizing them. Do it now. */ + for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) + if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) + varpool_node::finalize_decl (t); + DECL_SAVED_TREE (child_fn) = NULL; + /* We'll create a CFG for child_fn, so no gimple body is needed. */ + gimple_set_body (child_fn, NULL); + TREE_USED (block) = 1; + + /* Reset DECL_CONTEXT on function arguments. */ + for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, + so that it can be moved to the child function. */ + gsi = gsi_last_bb (entry_bb); + stmt = gsi_stmt (gsi); + gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL + || gimple_code (stmt) == GIMPLE_OMP_TASK)); + e = split_block (entry_bb, stmt); + gsi_remove (&gsi, true); + entry_bb = e->dest; + edge e2 = NULL; + if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + else + { + e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); + gcc_assert (e2->dest == region->exit); + remove_edge (BRANCH_EDGE (entry_bb)); + set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); + gsi = gsi_last_bb (region->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gsi_remove (&gsi, true); + } + + /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ + if (exit_bb) + { + gsi = gsi_last_bb (exit_bb); + gcc_assert (!gsi_end_p (gsi) + && (gimple_code (gsi_stmt (gsi)) + == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); + stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + } + + /* Move the parallel region into CHILD_CFUN. */ + + if (gimple_in_ssa_p (cfun)) + { + init_tree_ssa (child_cfun); + init_ssa_operands (child_cfun); + child_cfun->gimple_df->in_ssa_p = true; + block = NULL_TREE; + } + else + block = gimple_block (entry_stmt); + + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); + if (exit_bb) + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + if (e2) + { + basic_block dest_bb = e2->dest; + if (!exit_bb) + make_edge (new_bb, dest_bb, EDGE_FALLTHRU); + remove_edge (e2); + set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); + } + /* When the OMP expansion process cannot guarantee an up-to-date + loop tree arrange for the child function to fixup loops. */ + if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; + + /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ + num = vec_safe_length (child_cfun->local_decls); + for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) + { + t = (*child_cfun->local_decls)[srcidx]; + if (DECL_CONTEXT (t) == cfun->decl) + continue; + if (srcidx != dstidx) + (*child_cfun->local_decls)[dstidx] = t; + dstidx++; + } + if (dstidx != num) + vec_safe_truncate (child_cfun->local_decls, dstidx); + + /* Inform the callgraph about the new function. */ + child_cfun->curr_properties = cfun->curr_properties; + child_cfun->has_simduid_loops |= cfun->has_simduid_loops; + child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; + cgraph_node *node = cgraph_node::get_create (child_fn); + node->parallelized_function = 1; + cgraph_node::add_new_function (child_fn, true); + + bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) + && !DECL_ASSEMBLER_NAME_SET_P (child_fn); + + /* Fix the callgraph edges for child_cfun. Those for cfun will be + fixed in a following pass. */ + push_cfun (child_cfun); + if (need_asm) + assign_assembler_name_if_neeeded (child_fn); + + if (optimize) + optimize_omp_library_calls (entry_stmt); + cgraph_edge::rebuild_edges (); + + /* Some EH regions might become dead, see PR34608. If + pass_cleanup_cfg isn't the first pass to happen with the + new child, these dead EH edges might cause problems. + Clean them up now. */ + if (flag_exceptions) + { + basic_block bb; + bool changed = false; + + FOR_EACH_BB_FN (bb, cfun) + changed |= gimple_purge_dead_eh_edges (bb); + if (changed) + cleanup_tree_cfg (); + } + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa); + if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + verify_loop_structure (); + pop_cfun (); + + if (dump_file && !gimple_in_ssa_p (cfun)) + { + omp_any_child_fn_dumped = true; + dump_function_header (dump_file, child_fn, dump_flags); + dump_function_to_file (child_fn, dump_file, dump_flags); + } + } + + /* Emit a library call to launch the children threads. */ + if (is_cilk_for) + expand_cilk_for_call (new_bb, + as_a <gomp_parallel *> (entry_stmt), ws_args); + else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) + expand_parallel_call (region, new_bb, + as_a <gomp_parallel *> (entry_stmt), ws_args); + else + expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_only_virtuals); +} + +/* Information about members of an OpenACC collapsed loop nest. */ + +struct oacc_collapse +{ + tree base; /* Base value. */ + tree iters; /* Number of steps. */ + tree step; /* step size. */ +}; + +/* Helper for expand_oacc_for. Determine collapsed loop information. + Fill in COUNTS array. Emit any initialization code before GSI. + Return the calculated outer loop bound of BOUND_TYPE. */ + +static tree +expand_oacc_collapse_init (const struct omp_for_data *fd, + gimple_stmt_iterator *gsi, + oacc_collapse *counts, tree bound_type) +{ + tree total = build_int_cst (bound_type, 1); + int ix; + + gcc_assert (integer_onep (fd->loop.step)); + gcc_assert (integer_zerop (fd->loop.n1)); + + for (ix = 0; ix != fd->collapse; ix++) + { + const omp_for_data_loop *loop = &fd->loops[ix]; + + tree iter_type = TREE_TYPE (loop->v); + tree diff_type = iter_type; + tree plus_type = iter_type; + + gcc_assert (loop->cond_code == fd->loop.cond_code); + + if (POINTER_TYPE_P (iter_type)) + plus_type = sizetype; + if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) + diff_type = signed_type_for (diff_type); + + tree b = loop->n1; + tree e = loop->n2; + tree s = loop->step; + bool up = loop->cond_code == LT_EXPR; + tree dir = build_int_cst (diff_type, up ? +1 : -1); + bool negating; + tree expr; + + b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, + true, GSI_SAME_STMT); + e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, + true, GSI_SAME_STMT); + + /* Convert the step, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); + if (negating) + s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); + s = fold_convert (diff_type, s); + if (negating) + s = fold_build1 (NEGATE_EXPR, diff_type, s); + s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, + true, GSI_SAME_STMT); + + /* Determine the range, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (iter_type); + expr = fold_build2 (MINUS_EXPR, plus_type, + fold_convert (plus_type, negating ? b : e), + fold_convert (plus_type, negating ? e : b)); + expr = fold_convert (diff_type, expr); + if (negating) + expr = fold_build1 (NEGATE_EXPR, diff_type, expr); + tree range = force_gimple_operand_gsi + (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); + + /* Determine number of iterations. */ + expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); + expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); + expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); + + tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, + true, GSI_SAME_STMT); + + counts[ix].base = b; + counts[ix].iters = iters; + counts[ix].step = s; + + total = fold_build2 (MULT_EXPR, bound_type, total, + fold_convert (bound_type, iters)); + } + + return total; +} + +/* Emit initializers for collapsed loop members. IVAR is the outer + loop iteration variable, from which collapsed loop iteration values + are calculated. COUNTS array has been initialized by + expand_oacc_collapse_inits. */ + +static void +expand_oacc_collapse_vars (const struct omp_for_data *fd, + gimple_stmt_iterator *gsi, + const oacc_collapse *counts, tree ivar) +{ + tree ivar_type = TREE_TYPE (ivar); + + /* The most rapidly changing iteration variable is the innermost + one. */ + for (int ix = fd->collapse; ix--;) + { + const omp_for_data_loop *loop = &fd->loops[ix]; + const oacc_collapse *collapse = &counts[ix]; + tree iter_type = TREE_TYPE (loop->v); + tree diff_type = TREE_TYPE (collapse->step); + tree plus_type = iter_type; + enum tree_code plus_code = PLUS_EXPR; + tree expr; + + if (POINTER_TYPE_P (iter_type)) + { + plus_code = POINTER_PLUS_EXPR; + plus_type = sizetype; + } + + expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar, + fold_convert (ivar_type, collapse->iters)); + expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), + collapse->step); + expr = fold_build2 (plus_code, iter_type, collapse->base, + fold_convert (plus_type, expr)); + expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + gassign *ass = gimple_build_assign (loop->v, expr); + gsi_insert_before (gsi, ass, GSI_SAME_STMT); + + if (ix) + { + expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar, + fold_convert (ivar_type, collapse->iters)); + ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, + true, GSI_SAME_STMT); + } + } +} + +/* Helper function for expand_omp_{for_*,simd}. If this is the outermost + of the combined collapse > 1 loop constructs, generate code like: + if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: + count = 0; + and set ZERO_ITER_BB to that bb. If this isn't the outermost + of the combined loop constructs, just initialize COUNTS array + from the _looptemp_ clauses. */ + +/* NOTE: It *could* be better to moosh all of the BBs together, + creating one larger BB with all the computation and the unexpected + jump at the end. I.e. + + bool zero3, zero2, zero1, zero; + + zero3 = N32 c3 N31; + count3 = (N32 - N31) /[cl] STEP3; + zero2 = N22 c2 N21; + count2 = (N22 - N21) /[cl] STEP2; + zero1 = N12 c1 N11; + count1 = (N12 - N11) /[cl] STEP1; + zero = zero3 || zero2 || zero1; + count = count1 * count2 * count3; + if (__builtin_expect(zero, false)) goto zero_iter_bb; + + After all, we expect the zero=false, and thus we expect to have to + evaluate all of the comparison expressions, so short-circuiting + oughtn't be a win. Since the condition isn't protecting a + denominator, we're not concerned about divide-by-zero, so we can + fully evaluate count even if a numerator turned out to be wrong. + + It seems like putting this all together would create much better + scheduling opportunities, and less pressure on the chip's branch + predictor. */ + +static void +expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, + basic_block &entry_bb, tree *counts, + basic_block &zero_iter1_bb, int &first_zero_iter1, + basic_block &zero_iter2_bb, int &first_zero_iter2, + basic_block &l2_dom_bb) +{ + tree t, type = TREE_TYPE (fd->loop.v); + edge e, ne; + int i; + + /* Collapsed loops need work for expansion into SSA form. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + + if (gimple_omp_for_combined_into_p (fd->for_stmt) + && TREE_CODE (fd->loop.n2) != INTEGER_CST) + { + gcc_assert (fd->ordered == 0); + /* First two _looptemp_ clauses are for istart/iend, counts[0] + isn't supposed to be handled, as the inner loop doesn't + use it. */ + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + for (i = 0; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + if (i) + counts[i] = OMP_CLAUSE_DECL (innerc); + else + counts[0] = NULL_TREE; + } + return; + } + + for (i = fd->collapse; i < fd->ordered; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + counts[i] = NULL_TREE; + t = fold_binary (fd->loops[i].cond_code, boolean_type_node, + fold_convert (itype, fd->loops[i].n1), + fold_convert (itype, fd->loops[i].n2)); + if (t && integer_zerop (t)) + { + for (i = fd->collapse; i < fd->ordered; i++) + counts[i] = build_int_cst (type, 0); + break; + } + } + for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + + if (i >= fd->collapse && counts[i]) + continue; + if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) + && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, + fold_convert (itype, fd->loops[i].n1), + fold_convert (itype, fd->loops[i].n2))) + == NULL_TREE || !integer_onep (t))) + { + gcond *cond_stmt; + tree n1, n2; + n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); + n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); + n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + *gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, gsi); + } + e = split_block (entry_bb, cond_stmt); + basic_block &zero_iter_bb + = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; + int &first_zero_iter + = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; + if (zero_iter_bb == NULL) + { + gassign *assign_stmt; + first_zero_iter = i; + zero_iter_bb = create_empty_bb (entry_bb); + add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); + *gsi = gsi_after_labels (zero_iter_bb); + if (i < fd->collapse) + assign_stmt = gimple_build_assign (fd->loop.n2, + build_zero_cst (type)); + else + { + counts[i] = create_tmp_reg (type, ".count"); + assign_stmt + = gimple_build_assign (counts[i], build_zero_cst (type)); + } + gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); + set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, + entry_bb); + } + ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); + ne->probability = REG_BR_PROB_BASE / 2000 - 1; + e->flags = EDGE_TRUE_VALUE; + e->probability = REG_BR_PROB_BASE - ne->probability; + if (l2_dom_bb == NULL) + l2_dom_bb = entry_bb; + entry_bb = e->dest; + *gsi = gsi_last_bb (entry_bb); + } + + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n1)); + /* ?? We could probably use CEIL_DIV_EXPR instead of + TRUNC_DIV_EXPR and adjusting by hand. Unless we can't + generate the same code in the end because generically we + don't know that the values involved must be negative for + GT?? */ + if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, + fd->loops[i].step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + t = fold_convert (type, t); + if (TREE_CODE (t) == INTEGER_CST) + counts[i] = t; + else + { + if (i < fd->collapse || i != first_zero_iter2) + counts[i] = create_tmp_reg (type, ".count"); + expand_omp_build_assign (gsi, counts[i], t); + } + if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) + { + if (i == 0) + t = counts[0]; + else + t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); + expand_omp_build_assign (gsi, fd->loop.n2, t); + } + } +} + +/* Helper function for expand_omp_{for_*,simd}. Generate code like: + T = V; + V3 = N31 + (T % count3) * STEP3; + T = T / count3; + V2 = N21 + (T % count2) * STEP2; + T = T / count2; + V1 = N11 + T * STEP1; + if this loop doesn't have an inner loop construct combined with it. + If it does have an inner loop construct combined with it and the + iteration count isn't known constant, store values from counts array + into its _looptemp_ temporaries instead. */ + +static void +expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, + tree *counts, gimple *inner_stmt, tree startvar) +{ + int i; + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + /* If fd->loop.n2 is constant, then no propagation of the counts + is needed, they are constant. */ + if (TREE_CODE (fd->loop.n2) == INTEGER_CST) + return; + + tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR + ? gimple_omp_taskreg_clauses (inner_stmt) + : gimple_omp_for_clauses (inner_stmt); + /* First two _looptemp_ clauses are for istart/iend, counts[0] + isn't supposed to be handled, as the inner loop doesn't + use it. */ + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + for (i = 0; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + if (i) + { + tree tem = OMP_CLAUSE_DECL (innerc); + tree t = fold_convert (TREE_TYPE (tem), counts[i]); + t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, + false, GSI_CONTINUE_LINKING); + gassign *stmt = gimple_build_assign (tem, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + } + } + return; + } + + tree type = TREE_TYPE (fd->loop.v); + tree tem = create_tmp_reg (type, ".tem"); + gassign *stmt = gimple_build_assign (tem, startvar); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + + for (i = fd->collapse - 1; i >= 0; i--) + { + tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; + itype = vtype; + if (POINTER_TYPE_P (vtype)) + itype = signed_type_for (vtype); + if (i != 0) + t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); + else + t = tem; + t = fold_convert (itype, t); + t = fold_build2 (MULT_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + if (POINTER_TYPE_P (vtype)) + t = fold_build_pointer_plus (fd->loops[i].n1, t); + else + t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); + t = force_gimple_operand_gsi (gsi, t, + DECL_P (fd->loops[i].v) + && TREE_ADDRESSABLE (fd->loops[i].v), + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i].v, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + if (i != 0) + { + t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); + t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, + false, GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (tem, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + } + } +} + +/* Helper function for expand_omp_for_*. Generate code like: + L10: + V3 += STEP3; + if (V3 cond3 N32) goto BODY_BB; else goto L11; + L11: + V3 = N31; + V2 += STEP2; + if (V2 cond2 N22) goto BODY_BB; else goto L12; + L12: + V2 = N21; + V1 += STEP1; + goto BODY_BB; */ + +static basic_block +extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, + basic_block body_bb) +{ + basic_block last_bb, bb, collapse_bb = NULL; + int i; + gimple_stmt_iterator gsi; + edge e; + tree t; + gimple *stmt; + + last_bb = cont_bb; + for (i = fd->collapse - 1; i >= 0; i--) + { + tree vtype = TREE_TYPE (fd->loops[i].v); + + bb = create_empty_bb (last_bb); + add_bb_to_loop (bb, last_bb->loop_father); + gsi = gsi_start_bb (bb); + + if (i < fd->collapse - 1) + { + e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); + e->probability = REG_BR_PROB_BASE / 8; + + t = fd->loops[i + 1].n1; + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (fd->loops[i + 1].v) + && TREE_ADDRESSABLE (fd->loops[i + + 1].v), + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i + 1].v, t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + else + collapse_bb = bb; + + set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); + + if (POINTER_TYPE_P (vtype)) + t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); + else + t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (fd->loops[i].v) + && TREE_ADDRESSABLE (fd->loops[i].v), + NULL_TREE, false, GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i].v, t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + if (i > 0) + { + t = fd->loops[i].n2; + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loops[i].v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); + stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); + e->probability = REG_BR_PROB_BASE * 7 / 8; + } + else + make_edge (bb, body_bb, EDGE_FALLTHRU); + last_bb = bb; + } + + return collapse_bb; +} + +/* Expand #pragma omp ordered depend(source). */ + +static void +expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, + tree *counts, location_t loc) +{ + enum built_in_function source_ix + = fd->iter_type == long_integer_type_node + ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; + gimple *g + = gimple_build_call (builtin_decl_explicit (source_ix), 1, + build_fold_addr_expr (counts[fd->ordered])); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); +} + +/* Expand a single depend from #pragma omp ordered depend(sink:...). */ + +static void +expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, + tree *counts, tree c, location_t loc) +{ + auto_vec<tree, 10> args; + enum built_in_function sink_ix + = fd->iter_type == long_integer_type_node + ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; + tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; + int i; + gimple_stmt_iterator gsi2 = *gsi; + bool warned_step = false; + + for (i = 0; i < fd->ordered; i++) + { + tree step = NULL_TREE; + off = TREE_PURPOSE (deps); + if (TREE_CODE (off) == TRUNC_DIV_EXPR) + { + step = TREE_OPERAND (off, 1); + off = TREE_OPERAND (off, 0); + } + if (!integer_zerop (off)) + { + gcc_assert (fd->loops[i].cond_code == LT_EXPR + || fd->loops[i].cond_code == GT_EXPR); + bool forward = fd->loops[i].cond_code == LT_EXPR; + if (step) + { + /* Non-simple Fortran DO loops. If step is variable, + we don't know at compile even the direction, so can't + warn. */ + if (TREE_CODE (step) != INTEGER_CST) + break; + forward = tree_int_cst_sgn (step) != -1; + } + if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + warning_at (loc, 0, "%<depend(sink)%> clause waiting for " + "lexically later iteration"); + break; + } + deps = TREE_CHAIN (deps); + } + /* If all offsets corresponding to the collapsed loops are zero, + this depend clause can be ignored. FIXME: but there is still a + flush needed. We need to emit one __sync_synchronize () for it + though (perhaps conditionally)? Solve this together with the + conservative dependence folding optimization. + if (i >= fd->collapse) + return; */ + + deps = OMP_CLAUSE_DECL (c); + gsi_prev (&gsi2); + edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); + edge e2 = split_block_after_labels (e1->dest); + + gsi2 = gsi_after_labels (e1->dest); + *gsi = gsi_last_bb (e1->src); + for (i = 0; i < fd->ordered; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + tree step = NULL_TREE; + tree orig_off = NULL_TREE; + if (POINTER_TYPE_P (itype)) + itype = sizetype; + if (i) + deps = TREE_CHAIN (deps); + off = TREE_PURPOSE (deps); + if (TREE_CODE (off) == TRUNC_DIV_EXPR) + { + step = TREE_OPERAND (off, 1); + off = TREE_OPERAND (off, 0); + gcc_assert (fd->loops[i].cond_code == LT_EXPR + && integer_onep (fd->loops[i].step) + && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); + } + tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); + if (step) + { + off = fold_convert_loc (loc, itype, off); + orig_off = off; + off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); + } + + if (integer_zerop (off)) + t = boolean_true_node; + else + { + tree a; + tree co = fold_convert_loc (loc, itype, off); + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) + { + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); + a = fold_build2_loc (loc, POINTER_PLUS_EXPR, + TREE_TYPE (fd->loops[i].v), fd->loops[i].v, + co); + } + else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, co); + else + a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, co); + if (step) + { + tree t1, t2; + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + else + t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + else + t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, + step, build_int_cst (TREE_TYPE (step), 0)); + if (TREE_CODE (step) != INTEGER_CST) + { + t1 = unshare_expr (t1); + t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t2 = unshare_expr (t2); + t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + } + t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, + t, t2, t1); + } + else if (fd->loops[i].cond_code == LT_EXPR) + { + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + else + t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + } + else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + else + t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + } + if (cond) + cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); + else + cond = t; + + off = fold_convert_loc (loc, itype, off); + + if (step + || (fd->loops[i].cond_code == LT_EXPR + ? !integer_onep (fd->loops[i].step) + : !integer_minus_onep (fd->loops[i].step))) + { + if (step == NULL_TREE + && TYPE_UNSIGNED (itype) + && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, + fold_build1_loc (loc, NEGATE_EXPR, itype, + s)); + else + t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, + orig_off ? orig_off : off, s); + t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, + build_int_cst (itype, 0)); + if (integer_zerop (t) && !warned_step) + { + warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " + "in the iteration space"); + warned_step = true; + } + cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, + cond, t); + } + + if (i <= fd->collapse - 1 && fd->collapse > 1) + t = fd->loop.v; + else if (counts[i]) + t = counts[i]; + else + { + t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, fd->loops[i].n1); + t = fold_convert_loc (loc, fd->iter_type, t); + } + if (step) + /* We have divided off by step already earlier. */; + else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, + fold_build1_loc (loc, NEGATE_EXPR, itype, + s)); + else + off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); + off = fold_convert_loc (loc, fd->iter_type, off); + if (i <= fd->collapse - 1 && fd->collapse > 1) + { + if (i) + off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, + off); + if (i < fd->collapse - 1) + { + coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, + counts[i]); + continue; + } + } + off = unshare_expr (off); + t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); + t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + args.safe_push (t); + } + gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); + gimple_set_location (g, loc); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + + cond = unshare_expr (cond); + cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, + GSI_CONTINUE_LINKING); + gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); + edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); + e3->probability = REG_BR_PROB_BASE / 8; + e1->probability = REG_BR_PROB_BASE - e3->probability; + e1->flags = EDGE_TRUE_VALUE; + set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); + + *gsi = gsi_after_labels (e2->dest); +} + +/* Expand all #pragma omp ordered depend(source) and + #pragma omp ordered depend(sink:...) constructs in the current + #pragma omp for ordered(n) region. */ + +static void +expand_omp_ordered_source_sink (struct omp_region *region, + struct omp_for_data *fd, tree *counts, + basic_block cont_bb) +{ + struct omp_region *inner; + int i; + for (i = fd->collapse - 1; i < fd->ordered; i++) + if (i == fd->collapse - 1 && fd->collapse > 1) + counts[i] = NULL_TREE; + else if (i >= fd->collapse && !cont_bb) + counts[i] = build_zero_cst (fd->iter_type); + else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) + && integer_onep (fd->loops[i].step)) + counts[i] = NULL_TREE; + else + counts[i] = create_tmp_var (fd->iter_type, ".orditer"); + tree atype + = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); + counts[fd->ordered] = create_tmp_var (atype, ".orditera"); + TREE_ADDRESSABLE (counts[fd->ordered]) = 1; + + for (inner = region->inner; inner; inner = inner->next) + if (inner->type == GIMPLE_OMP_ORDERED) + { + gomp_ordered *ord_stmt = inner->ord_stmt; + gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); + location_t loc = gimple_location (ord_stmt); + tree c; + for (c = gimple_omp_ordered_clauses (ord_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) + break; + if (c) + expand_omp_ordered_source (&gsi, fd, counts, loc); + for (c = gimple_omp_ordered_clauses (ord_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) + expand_omp_ordered_sink (&gsi, fd, counts, c, loc); + gsi_remove (&gsi, true); + } +} + +/* Wrap the body into fd->ordered - fd->collapse loops that aren't + collapsed. */ + +static basic_block +expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, + basic_block cont_bb, basic_block body_bb, + bool ordered_lastprivate) +{ + if (fd->ordered == fd->collapse) + return cont_bb; + + if (!cont_bb) + { + gimple_stmt_iterator gsi = gsi_after_labels (body_bb); + for (int i = fd->collapse; i < fd->ordered; i++) + { + tree type = TREE_TYPE (fd->loops[i].v); + tree n1 = fold_convert (type, fd->loops[i].n1); + expand_omp_build_assign (&gsi, fd->loops[i].v, n1); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_int (i - fd->collapse + 1), + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); + } + return NULL; + } + + for (int i = fd->ordered - 1; i >= fd->collapse; i--) + { + tree t, type = TREE_TYPE (fd->loops[i].v); + gimple_stmt_iterator gsi = gsi_after_labels (body_bb); + expand_omp_build_assign (&gsi, fd->loops[i].v, + fold_convert (type, fd->loops[i].n1)); + if (counts[i]) + expand_omp_build_assign (&gsi, counts[i], + build_zero_cst (fd->iter_type)); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_int (i - fd->collapse + 1), + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); + if (!gsi_end_p (gsi)) + gsi_prev (&gsi); + else + gsi = gsi_last_bb (body_bb); + edge e1 = split_block (body_bb, gsi_stmt (gsi)); + basic_block new_body = e1->dest; + if (body_bb == cont_bb) + cont_bb = new_body; + edge e2 = NULL; + basic_block new_header; + if (EDGE_COUNT (cont_bb->preds) > 0) + { + gsi = gsi_last_bb (cont_bb); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loops[i].v, + fold_convert (sizetype, + fd->loops[i].step)); + else + t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, + fold_convert (type, fd->loops[i].step)); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + if (counts[i]) + { + t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], + build_int_cst (fd->iter_type, 1)); + expand_omp_build_assign (&gsi, counts[i], t); + t = counts[i]; + } + else + { + t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, fd->loops[i].n1); + t = fold_convert (fd->iter_type, t); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + } + aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_int (i - fd->collapse + 1), + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, t); + gsi_prev (&gsi); + e2 = split_block (cont_bb, gsi_stmt (gsi)); + new_header = e2->dest; + } + else + new_header = cont_bb; + gsi = gsi_after_labels (new_header); + tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, + true, GSI_SAME_STMT); + tree n2 + = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), + true, NULL_TREE, true, GSI_SAME_STMT); + t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); + edge e3 = split_block (new_header, gsi_stmt (gsi)); + cont_bb = e3->dest; + remove_edge (e1); + make_edge (body_bb, new_header, EDGE_FALLTHRU); + e3->flags = EDGE_FALSE_VALUE; + e3->probability = REG_BR_PROB_BASE / 8; + e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); + e1->probability = REG_BR_PROB_BASE - e3->probability; + + set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); + set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); + + if (e2) + { + struct loop *loop = alloc_loop (); + loop->header = new_header; + loop->latch = e2->src; + add_loop (loop, body_bb->loop_father); + } + } + + /* If there are any lastprivate clauses and it is possible some loops + might have zero iterations, ensure all the decls are initialized, + otherwise we could crash evaluating C++ class iterators with lastprivate + clauses. */ + bool need_inits = false; + for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) + if (need_inits) + { + tree type = TREE_TYPE (fd->loops[i].v); + gimple_stmt_iterator gsi = gsi_after_labels (body_bb); + expand_omp_build_assign (&gsi, fd->loops[i].v, + fold_convert (type, fd->loops[i].n1)); + } + else + { + tree type = TREE_TYPE (fd->loops[i].v); + tree this_cond = fold_build2 (fd->loops[i].cond_code, + boolean_type_node, + fold_convert (type, fd->loops[i].n1), + fold_convert (type, fd->loops[i].n2)); + if (!integer_onep (this_cond)) + need_inits = true; + } + + return cont_bb; +} + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with any schedule. Given parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); + if (more) goto L0; else goto L3; + L0: + V = istart0; + iend = iend0; + L1: + BODY; + V += STEP; + if (V cond iend) goto L1; else goto L2; + L2: + if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; + L3: + + If this is a combined omp parallel loop, instead of the call to + GOMP_loop_foo_start, we call GOMP_loop_foo_next. + If this is gimple_omp_for_combined_p loop, then instead of assigning + V and iend in L0 we assign the first two _looptemp_ clause decls of the + inner GIMPLE_OMP_FOR and V += STEP; and + if (V cond iend) goto L1; else goto L2; are removed. + + For collapsed loops, given parameters: + collapse(3) + for (V1 = N11; V1 cond1 N12; V1 += STEP1) + for (V2 = N21; V2 cond2 N22; V2 += STEP2) + for (V3 = N31; V3 cond3 N32; V3 += STEP3) + BODY; + + we generate pseudocode + + if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + goto Z1; + Z0: + count = 0; + Z1: + more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); + if (more) goto L0; else goto L3; + L0: + V = istart0; + T = V; + V3 = N31 + (T % count3) * STEP3; + T = T / count3; + V2 = N21 + (T % count2) * STEP2; + T = T / count2; + V1 = N11 + T * STEP1; + iend = iend0; + L1: + BODY; + V += 1; + if (V < iend) goto L10; else goto L2; + L10: + V3 += STEP3; + if (V3 cond3 N32) goto L1; else goto L11; + L11: + V3 = N31; + V2 += STEP2; + if (V2 cond2 N22) goto L1; else goto L12; + L12: + V2 = N21; + V1 += STEP1; + goto L1; + L2: + if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; + L3: + + */ + +static void +expand_omp_for_generic (struct omp_region *region, + struct omp_for_data *fd, + enum built_in_function start_fn, + enum built_in_function next_fn, + gimple *inner_stmt) +{ + tree type, istart0, iend0, iend; + tree t, vmain, vback, bias = NULL_TREE; + basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; + basic_block l2_bb = NULL, l3_bb = NULL; + gimple_stmt_iterator gsi; + gassign *assign_stmt; + bool in_combined_parallel = is_combined_parallel (region); + bool broken_loop = region->cont == NULL; + edge e, ne; + tree *counts = NULL; + int i; + bool ordered_lastprivate = false; + + gcc_assert (!broken_loop || !in_combined_parallel); + gcc_assert (fd->iter_type == long_integer_type_node + || !in_combined_parallel); + + entry_bb = region->entry; + cont_bb = region->cont; + collapse_bb = NULL; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + l1_bb = single_succ (l0_bb); + if (!broken_loop) + { + l2_bb = create_empty_bb (cont_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb + || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest + == l1_bb)); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + else + l2_bb = NULL; + l3_bb = BRANCH_EDGE (entry_bb)->dest; + exit_bb = region->exit; + + gsi = gsi_last_bb (entry_bb); + + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + if (fd->ordered + && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), + OMP_CLAUSE_LASTPRIVATE)) + ordered_lastprivate = false; + if (fd->collapse > 1 || fd->ordered) + { + int first_zero_iter1 = -1, first_zero_iter2 = -1; + basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; + + counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + zero_iter1_bb, first_zero_iter1, + zero_iter2_bb, first_zero_iter2, l2_dom_bb); + + if (zero_iter1_bb) + { + /* Some counts[i] vars might be uninitialized if + some loop has zero iterations. But the body shouldn't + be executed in that case, so just avoid uninit warnings. */ + for (i = first_zero_iter1; + i < (fd->ordered ? fd->ordered : fd->collapse); i++) + if (SSA_VAR_P (counts[i])) + TREE_NO_WARNING (counts[i]) = 1; + gsi_prev (&gsi); + e = split_block (entry_bb, gsi_stmt (gsi)); + entry_bb = e->dest; + make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); + gsi = gsi_last_bb (entry_bb); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, + get_immediate_dominator (CDI_DOMINATORS, + zero_iter1_bb)); + } + if (zero_iter2_bb) + { + /* Some counts[i] vars might be uninitialized if + some loop has zero iterations. But the body shouldn't + be executed in that case, so just avoid uninit warnings. */ + for (i = first_zero_iter2; i < fd->ordered; i++) + if (SSA_VAR_P (counts[i])) + TREE_NO_WARNING (counts[i]) = 1; + if (zero_iter1_bb) + make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); + else + { + gsi_prev (&gsi); + e = split_block (entry_bb, gsi_stmt (gsi)); + entry_bb = e->dest; + make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); + gsi = gsi_last_bb (entry_bb); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, + get_immediate_dominator + (CDI_DOMINATORS, zero_iter2_bb)); + } + } + if (fd->collapse == 1) + { + counts[0] = fd->loop.n2; + fd->loop = fd->loops[0]; + } + } + + type = TREE_TYPE (fd->loop.v); + istart0 = create_tmp_var (fd->iter_type, ".istart0"); + iend0 = create_tmp_var (fd->iter_type, ".iend0"); + TREE_ADDRESSABLE (istart0) = 1; + TREE_ADDRESSABLE (iend0) = 1; + + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type) + && fd->ordered == 0) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + + gimple_stmt_iterator gsif = gsi; + gsi_prev (&gsif); + + tree arr = NULL_TREE; + if (in_combined_parallel) + { + gcc_assert (fd->ordered == 0); + /* In a combined parallel loop, emit a call to + GOMP_loop_foo_next. */ + t = build_call_expr (builtin_decl_explicit (next_fn), 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + } + else + { + tree t0, t1, t2, t3, t4; + /* If this is not a combined parallel loop, emit a call to + GOMP_loop_foo_start in ENTRY_BB. */ + t4 = build_fold_addr_expr (iend0); + t3 = build_fold_addr_expr (istart0); + if (fd->ordered) + { + t0 = build_int_cst (unsigned_type_node, + fd->ordered - fd->collapse + 1); + arr = create_tmp_var (build_array_type_nelts (fd->iter_type, + fd->ordered + - fd->collapse + 1), + ".omp_counts"); + DECL_NAMELESS (arr) = 1; + TREE_ADDRESSABLE (arr) = 1; + TREE_STATIC (arr) = 1; + vec<constructor_elt, va_gc> *v; + vec_alloc (v, fd->ordered - fd->collapse + 1); + int idx; + + for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) + { + tree c; + if (idx == 0 && fd->collapse > 1) + c = fd->loop.n2; + else + c = counts[idx + fd->collapse - 1]; + tree purpose = size_int (idx); + CONSTRUCTOR_APPEND_ELT (v, purpose, c); + if (TREE_CODE (c) != INTEGER_CST) + TREE_STATIC (arr) = 0; + } + + DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); + if (!TREE_STATIC (arr)) + force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, + void_type_node, arr), + true, NULL_TREE, true, GSI_SAME_STMT); + t1 = build_fold_addr_expr (arr); + t2 = NULL_TREE; + } + else + { + t2 = fold_convert (fd->iter_type, fd->loop.step); + t1 = fd->loop.n2; + t0 = fd->loop.n1; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc + = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + t0 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + t1 = OMP_CLAUSE_DECL (innerc); + } + if (POINTER_TYPE_P (TREE_TYPE (t0)) + && TYPE_PRECISION (TREE_TYPE (t0)) + != TYPE_PRECISION (fd->iter_type)) + { + /* Avoid casting pointers to integer of a different size. */ + tree itype = signed_type_for (type); + t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); + t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); + } + else + { + t1 = fold_convert (fd->iter_type, t1); + t0 = fold_convert (fd->iter_type, t0); + } + if (bias) + { + t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); + t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); + } + } + if (fd->iter_type == long_integer_type_node || fd->ordered) + { + if (fd->chunk_size) + { + t = fold_convert (fd->iter_type, fd->chunk_size); + t = omp_adjust_chunk_size (t, fd->simd_schedule); + if (fd->ordered) + t = build_call_expr (builtin_decl_explicit (start_fn), + 5, t0, t1, t, t3, t4); + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 6, t0, t1, t2, t, t3, t4); + } + else if (fd->ordered) + t = build_call_expr (builtin_decl_explicit (start_fn), + 4, t0, t1, t3, t4); + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 5, t0, t1, t2, t3, t4); + } + else + { + tree t5; + tree c_bool_type; + tree bfn_decl; + + /* The GOMP_loop_ull_*start functions have additional boolean + argument, true for < loops and false for > loops. + In Fortran, the C bool type can be different from + boolean_type_node. */ + bfn_decl = builtin_decl_explicit (start_fn); + c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); + t5 = build_int_cst (c_bool_type, + fd->loop.cond_code == LT_EXPR ? 1 : 0); + if (fd->chunk_size) + { + tree bfn_decl = builtin_decl_explicit (start_fn); + t = fold_convert (fd->iter_type, fd->chunk_size); + t = omp_adjust_chunk_size (t, fd->simd_schedule); + t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); + } + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 6, t5, t0, t1, t2, t3, t4); + } + } + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + if (arr && !TREE_STATIC (arr)) + { + tree clobber = build_constructor (TREE_TYPE (arr), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), + GSI_SAME_STMT); + } + gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi_remove (&gsi, true); + + if (gsi_end_p (gsif)) + gsif = gsi_after_labels (gsi_bb (gsif)); + gsi_next (&gsif); + + /* Iteration setup for sequential loop goes in L0_BB. */ + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (inner_stmt) + == GF_OMP_FOR_KIND_SIMD); + tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + } + + gsi = gsi_start_bb (l0_bb); + t = istart0; + if (fd->ordered && fd->collapse == 1) + t = fold_build2 (MULT_EXPR, fd->iter_type, t, + fold_convert (fd->iter_type, fd->loop.step)); + else if (bias) + t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); + if (fd->ordered && fd->collapse == 1) + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, fold_convert (sizetype, t)); + else + { + t = fold_convert (TREE_TYPE (startvar), t); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, t); + } + } + else + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); + t = fold_convert (TREE_TYPE (startvar), t); + } + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = iend0; + if (fd->ordered && fd->collapse == 1) + t = fold_build2 (MULT_EXPR, fd->iter_type, t, + fold_convert (fd->iter_type, fd->loop.step)); + else if (bias) + t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); + if (fd->ordered && fd->collapse == 1) + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, fold_convert (sizetype, t)); + else + { + t = fold_convert (TREE_TYPE (startvar), t); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, t); + } + } + else + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); + t = fold_convert (TREE_TYPE (startvar), t); + } + iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, iend); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) + assign_stmt = gimple_build_assign (fd->loop.v, iend); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = omp_is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + dest = unshare_expr (t); + tree v = create_tmp_var (TREE_TYPE (t), NULL); + expand_omp_build_assign (&gsif, v, t); + if (itercnt == NULL_TREE) + { + itercnt = startvar; + tree n1 = fd->loop.n1; + if (POINTER_TYPE_P (TREE_TYPE (itercnt))) + { + itercnt + = fold_convert (signed_type_for (TREE_TYPE (itercnt)), + itercnt); + n1 = fold_convert (TREE_TYPE (itercnt), n1); + } + itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), + itercnt, n1); + itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), + itercnt, fd->loop.step); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (fd->ordered) + { + /* Until now, counts array contained number of iterations or + variable containing it for ith loop. From now on, we need + those counts only for collapsed loops, and only for the 2nd + till the last collapsed one. Move those one element earlier, + we'll use counts[fd->collapse - 1] for the first source/sink + iteration counter and so on and counts[fd->ordered] + as the array holding the current counter values for + depend(source). */ + if (fd->collapse > 1) + memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); + if (broken_loop) + { + int i; + for (i = fd->collapse; i < fd->ordered; i++) + { + tree type = TREE_TYPE (fd->loops[i].v); + tree this_cond + = fold_build2 (fd->loops[i].cond_code, boolean_type_node, + fold_convert (type, fd->loops[i].n1), + fold_convert (type, fd->loops[i].n2)); + if (!integer_onep (this_cond)) + break; + } + if (i < fd->ordered) + { + cont_bb + = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); + add_bb_to_loop (cont_bb, l1_bb->loop_father); + gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); + gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); + make_edge (cont_bb, l1_bb, 0); + l2_bb = create_empty_bb (cont_bb); + broken_loop = false; + } + } + expand_omp_ordered_source_sink (region, fd, counts, cont_bb); + cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, + ordered_lastprivate); + if (counts[fd->collapse - 1]) + { + gcc_assert (fd->collapse == 1); + gsi = gsi_last_bb (l0_bb); + expand_omp_build_assign (&gsi, counts[fd->collapse - 1], + istart0, true); + gsi = gsi_last_bb (cont_bb); + t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], + build_int_cst (fd->iter_type, 1)); + expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_zero_node, NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); + t = counts[fd->collapse - 1]; + } + else if (fd->collapse > 1) + t = fd->loop.v; + else + { + t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), + fd->loops[0].v, fd->loops[0].n1); + t = fold_convert (fd->iter_type, t); + } + gsi = gsi_last_bb (l0_bb); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_zero_node, NULL_TREE, NULL_TREE); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, aref, t, true); + } + + if (!broken_loop) + { + /* Code to control the increment and predicate for the sequential + loop goes in the CONT_BB. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, fd->loop.step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (vback) + && TREE_ADDRESSABLE (vback), + NULL_TREE, true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) + { + if (fd->collapse > 1) + t = fd->loop.v; + else + { + t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), + fd->loops[0].v, fd->loops[0].n1); + t = fold_convert (fd->iter_type, t); + } + tree aref = build4 (ARRAY_REF, fd->iter_type, + counts[fd->ordered], size_zero_node, + NULL_TREE, NULL_TREE); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + expand_omp_build_assign (&gsi, aref, t); + } + + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, + iend); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + } + + /* Remove GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); + + /* Emit code to get the next parallel iteration in L2_BB. */ + gsi = gsi_start_bb (l2_bb); + + t = build_call_expr (builtin_decl_explicit (next_fn), 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + } + + /* Add the loop cleanup function. */ + gsi = gsi_last_bb (exit_bb); + if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); + else if (gimple_omp_return_lhs (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); + else + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); + gcall *call_stmt = gimple_build_call (t, 0); + if (gimple_omp_return_lhs (gsi_stmt (gsi))) + gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + if (fd->ordered) + { + tree arr = counts[fd->ordered]; + tree clobber = build_constructor (TREE_TYPE (arr), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), + GSI_SAME_STMT); + } + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; + find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; + + if (!broken_loop) + { + gimple_seq phis; + + e = find_edge (cont_bb, l3_bb); + ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); + + phis = phi_nodes (l3_bb); + for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *phi = gsi_stmt (gsi); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), + PHI_ARG_DEF_FROM_EDGE (phi, e)); + } + remove_edge (e); + + make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); + e = find_edge (cont_bb, l1_bb); + if (e == NULL) + { + e = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (e->dest) == l1_bb); + } + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (e); + e = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (e); + e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + e->flags = EDGE_TRUE_VALUE; + if (e) + { + e->probability = REG_BR_PROB_BASE * 7 / 8; + find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; + } + else + { + e = find_edge (cont_bb, l2_bb); + e->flags = EDGE_FALLTHRU; + } + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + + if (gimple_in_ssa_p (cfun)) + { + /* Add phis to the outer loop that connect to the phis in the inner, + original loop, and move the loop entry value of the inner phi to + the loop entry value of the outer phi. */ + gphi_iterator psi; + for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) + { + source_location locus; + gphi *nphi; + gphi *exit_phi = psi.phi (); + + edge l2_to_l3 = find_edge (l2_bb, l3_bb); + tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); + + basic_block latch = BRANCH_EDGE (cont_bb)->dest; + edge latch_to_l1 = find_edge (latch, l1_bb); + gphi *inner_phi + = find_phi_with_arg_on_edge (exit_res, latch_to_l1); + + tree t = gimple_phi_result (exit_phi); + tree new_res = copy_ssa_name (t, NULL); + nphi = create_phi_node (new_res, l0_bb); + + edge l0_to_l1 = find_edge (l0_bb, l1_bb); + t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); + locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); + edge entry_to_l0 = find_edge (entry_bb, l0_bb); + add_phi_arg (nphi, t, entry_to_l0, locus); + + edge l2_to_l0 = find_edge (l2_bb, l0_bb); + add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); + + add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); + }; + } + + set_immediate_dominator (CDI_DOMINATORS, l2_bb, + recompute_dominator (CDI_DOMINATORS, l2_bb)); + set_immediate_dominator (CDI_DOMINATORS, l3_bb, + recompute_dominator (CDI_DOMINATORS, l3_bb)); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, + recompute_dominator (CDI_DOMINATORS, l0_bb)); + set_immediate_dominator (CDI_DOMINATORS, l1_bb, + recompute_dominator (CDI_DOMINATORS, l1_bb)); + + /* We enter expand_omp_for_generic with a loop. This original loop may + have its own loop struct, or it may be part of an outer loop struct + (which may be the fake loop). */ + struct loop *outer_loop = entry_bb->loop_father; + bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; + + add_bb_to_loop (l2_bb, outer_loop); + + /* We've added a new loop around the original loop. Allocate the + corresponding loop struct. */ + struct loop *new_loop = alloc_loop (); + new_loop->header = l0_bb; + new_loop->latch = l2_bb; + add_loop (new_loop, outer_loop); + + /* Allocate a loop structure for the original loop unless we already + had one. */ + if (!orig_loop_has_loop_struct + && !gimple_omp_for_combined_p (fd->for_stmt)) + { + struct loop *orig_loop = alloc_loop (); + orig_loop->header = l1_bb; + /* The loop may have multiple latches. */ + add_loop (orig_loop, new_loop); + } + } +} + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with static schedule and no specified chunk size. Given + parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; + if (cond is <) + adj = STEP - 1; + else + adj = STEP + 1; + if ((__typeof (V)) -1 > 0 && cond is >) + n = -(adj + N2 - N1) / -STEP; + else + n = (adj + N2 - N1) / STEP; + q = n / nthreads; + tt = n % nthreads; + if (threadid < tt) goto L3; else goto L4; + L3: + tt = 0; + q = q + 1; + L4: + s0 = q * threadid + tt; + e0 = s0 + q; + V = s0 * STEP + N1; + if (s0 >= e0) goto L2; else goto L0; + L0: + e = e0 * STEP + N1; + L1: + BODY; + V += STEP; + if (V cond e) goto L1; + L2: +*/ + +static void +expand_omp_for_static_nochunk (struct omp_region *region, + struct omp_for_data *fd, + gimple *inner_stmt) +{ + tree n, q, s0, e0, e, t, tt, nthreads, threadid; + tree type, itype, vmain, vback; + basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; + basic_block body_bb, cont_bb, collapse_bb = NULL; + basic_block fin_bb; + gimple_stmt_iterator gsi; + edge ep; + bool broken_loop = region->cont == NULL; + tree *counts = NULL; + tree n1, n2, step; + + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + fin_bb = BRANCH_EDGE (entry_bb)->dest; + gcc_assert (broken_loop + || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); + seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + body_bb = single_succ (seq_start_bb); + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb + || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + exit_bb = region->exit; + + /* Iteration space partitioning goes in ENTRY_BB. */ + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block l2_dom_bb = NULL, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + fin_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + t = NULL_TREE; + } + else if (gimple_omp_for_combined_into_p (fd->for_stmt)) + t = integer_one_node; + else + t = fold_binary (fd->loop.cond_code, boolean_type_node, + fold_convert (type, fd->loop.n1), + fold_convert (type, fd->loop.n2)); + if (fd->collapse == 1 + && TYPE_UNSIGNED (type) + && (t == NULL_TREE || !integer_onep (t))) + { + n1 = fold_convert (type, unshare_expr (fd->loop.n1)); + n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + n2 = fold_convert (type, unshare_expr (fd->loop.n2)); + n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + ep = split_block (entry_bb, cond_stmt); + ep->flags = EDGE_TRUE_VALUE; + entry_bb = ep->dest; + ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); + ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); + ep->probability = REG_BR_PROB_BASE / 2000 - 1; + if (gimple_in_ssa_p (cfun)) + { + int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; + for (gphi_iterator gpi = gsi_start_phis (fin_bb); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), + ep, UNKNOWN_LOCATION); + } + } + gsi = gsi_last_bb (entry_bb); + } + + switch (gimple_omp_for_kind (fd->for_stmt)) + { + case GF_OMP_FOR_KIND_FOR: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + break; + default: + gcc_unreachable (); + } + nthreads = build_call_expr (nthreads, 0); + nthreads = fold_convert (itype, nthreads); + nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, + true, GSI_SAME_STMT); + threadid = build_call_expr (threadid, 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + n1 = fd->loop.n1; + n2 = fd->loop.n2; + step = fd->loop.step; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (itype, t); + n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); + + q = create_tmp_reg (itype, "q"); + t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); + gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); + + tt = create_tmp_reg (itype, "tt"); + t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); + gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); + + t = build2 (LT_EXPR, boolean_type_node, threadid, tt); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + + second_bb = split_block (entry_bb, cond_stmt)->dest; + gsi = gsi_last_bb (second_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), + GSI_SAME_STMT); + gassign *assign_stmt + = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + third_bb = split_block (second_bb, assign_stmt)->dest; + gsi = gsi_last_bb (third_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + t = build2 (MULT_EXPR, itype, q, threadid); + t = build2 (PLUS_EXPR, itype, t, tt); + s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); + + t = fold_build2 (PLUS_EXPR, itype, s0, q); + e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); + + t = build2 (GE_EXPR, boolean_type_node, s0, e0); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi_remove (&gsi, true); + + /* Setup code for sequential iteration goes in SEQ_START_BB. */ + gsi = gsi_start_bb (seq_start_bb); + + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL + ? gimple_omp_parallel_clauses (inner_stmt) + : gimple_omp_for_clauses (inner_stmt); + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST + && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) + { + int i; + for (i = 1; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (distribute parallel for with lastprivate), + propagate down the total number of iterations. */ + tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), + fd->loop.n2); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } + } + t = fold_convert (itype, s0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = fold_convert (itype, e0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) + assign_stmt = gimple_build_assign (fd->loop.v, e); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = omp_is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + if (itercnt == NULL_TREE) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + itercnt = fold_build2 (MINUS_EXPR, itype, + fold_convert (itype, n1), + fold_convert (itype, fd->loop.n1)); + itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); + itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + else + itercnt = s0; + } + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + dest = unshare_expr (t); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (!broken_loop) + { + /* The code controlling the sequential loop replaces the + GIMPLE_OMP_CONTINUE. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (vback) + && TREE_ADDRESSABLE (vback), + NULL_TREE, true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) + ? t : vback, e); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + } + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + } + + /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ + gsi = gsi_last_bb (exit_bb); + if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) + { + t = gimple_omp_return_lhs (gsi_stmt (gsi)); + gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); + } + gsi_remove (&gsi, true); + + /* Connect all the blocks. */ + ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); + ep->probability = REG_BR_PROB_BASE / 4 * 3; + ep = find_edge (entry_bb, second_bb); + ep->flags = EDGE_TRUE_VALUE; + ep->probability = REG_BR_PROB_BASE / 4; + find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; + find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; + + if (!broken_loop) + { + ep = find_edge (cont_bb, body_bb); + if (ep == NULL) + { + ep = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (ep->dest) == body_bb); + } + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (ep); + ep = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (ep); + ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + ep->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, fin_bb)->flags + = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; + } + + set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); + + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + + struct loop *loop = body_bb->loop_father; + if (loop != entry_bb->loop_father) + { + gcc_assert (broken_loop || loop->header == body_bb); + gcc_assert (broken_loop + || loop->latch == region->cont + || single_pred (loop->latch) == region->cont); + return; + } + + if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) + { + loop = alloc_loop (); + loop->header = body_bb; + if (collapse_bb == NULL) + loop->latch = cont_bb; + add_loop (loop, body_bb->loop_father); + } +} + +/* Return phi in E->DEST with ARG on edge E. */ + +static gphi * +find_phi_with_arg_on_edge (tree arg, edge e) +{ + basic_block bb = e->dest; + + for (gphi_iterator gpi = gsi_start_phis (bb); + !gsi_end_p (gpi); + gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) + return phi; + } + + return NULL; +} + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with static schedule and a specified chunk size. Given + parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; + if (cond is <) + adj = STEP - 1; + else + adj = STEP + 1; + if ((__typeof (V)) -1 > 0 && cond is >) + n = -(adj + N2 - N1) / -STEP; + else + n = (adj + N2 - N1) / STEP; + trip = 0; + V = threadid * CHUNK * STEP + N1; -- this extra definition of V is + here so that V is defined + if the loop is not entered + L0: + s0 = (trip * nthreads + threadid) * CHUNK; + e0 = min(s0 + CHUNK, n); + if (s0 < n) goto L1; else goto L4; + L1: + V = s0 * STEP + N1; + e = e0 * STEP + N1; + L2: + BODY; + V += STEP; + if (V cond e) goto L2; else goto L3; + L3: + trip += 1; + goto L0; + L4: +*/ + +static void +expand_omp_for_static_chunk (struct omp_region *region, + struct omp_for_data *fd, gimple *inner_stmt) +{ + tree n, s0, e0, e, t; + tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; + tree type, itype, vmain, vback, vextra; + basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; + basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; + gimple_stmt_iterator gsi; + edge se; + bool broken_loop = region->cont == NULL; + tree *counts = NULL; + tree n1, n2, step; + + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + entry_bb = region->entry; + se = split_block (entry_bb, last_stmt (entry_bb)); + entry_bb = se->src; + iter_part_bb = se->dest; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); + fin_bb = BRANCH_EDGE (iter_part_bb)->dest; + gcc_assert (broken_loop + || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); + seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); + body_bb = single_succ (seq_start_bb); + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb + || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); + } + exit_bb = region->exit; + + /* Trip and adjustment setup goes in ENTRY_BB. */ + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block l2_dom_bb = NULL, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + fin_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + t = NULL_TREE; + } + else if (gimple_omp_for_combined_into_p (fd->for_stmt)) + t = integer_one_node; + else + t = fold_binary (fd->loop.cond_code, boolean_type_node, + fold_convert (type, fd->loop.n1), + fold_convert (type, fd->loop.n2)); + if (fd->collapse == 1 + && TYPE_UNSIGNED (type) + && (t == NULL_TREE || !integer_onep (t))) + { + n1 = fold_convert (type, unshare_expr (fd->loop.n1)); + n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + n2 = fold_convert (type, unshare_expr (fd->loop.n2)); + n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + se = split_block (entry_bb, cond_stmt); + se->flags = EDGE_TRUE_VALUE; + entry_bb = se->dest; + se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); + se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); + se->probability = REG_BR_PROB_BASE / 2000 - 1; + if (gimple_in_ssa_p (cfun)) + { + int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; + for (gphi_iterator gpi = gsi_start_phis (fin_bb); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), + se, UNKNOWN_LOCATION); + } + } + gsi = gsi_last_bb (entry_bb); + } + + switch (gimple_omp_for_kind (fd->for_stmt)) + { + case GF_OMP_FOR_KIND_FOR: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + break; + default: + gcc_unreachable (); + } + nthreads = build_call_expr (nthreads, 0); + nthreads = fold_convert (itype, nthreads); + nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, + true, GSI_SAME_STMT); + threadid = build_call_expr (threadid, 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + n1 = fd->loop.n1; + n2 = fd->loop.n2; + step = fd->loop.step; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + tree chunk_size = fold_convert (itype, fd->chunk_size); + chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); + chunk_size + = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, + GSI_SAME_STMT); + + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (itype, t); + n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + + trip_var = create_tmp_reg (itype, ".trip"); + if (gimple_in_ssa_p (cfun)) + { + trip_init = make_ssa_name (trip_var); + trip_main = make_ssa_name (trip_var); + trip_back = make_ssa_name (trip_var); + } + else + { + trip_init = trip_var; + trip_main = trip_var; + trip_back = trip_var; + } + + gassign *assign_stmt + = gimple_build_assign (trip_init, build_int_cst (itype, 0)); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR. */ + gsi_remove (&gsi, true); + + gimple_stmt_iterator gsif = gsi; + + /* Iteration space partitioning goes in ITER_PART_BB. */ + gsi = gsi_last_bb (iter_part_bb); + + t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); + t = fold_build2 (PLUS_EXPR, itype, t, threadid); + t = fold_build2 (MULT_EXPR, itype, t, chunk_size); + s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); + t = fold_build2 (MIN_EXPR, itype, t, n); + e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + t = build2 (LT_EXPR, boolean_type_node, s0, n); + gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); + + /* Setup code for sequential iteration goes in SEQ_START_BB. */ + gsi = gsi_start_bb (seq_start_bb); + + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL + ? gimple_omp_parallel_clauses (inner_stmt) + : gimple_omp_for_clauses (inner_stmt); + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST + && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) + { + int i; + for (i = 1; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (distribute parallel for with lastprivate), + propagate down the total number of iterations. */ + tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), + fd->loop.n2); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } + } + + t = fold_convert (itype, s0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = fold_convert (itype, e0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) + assign_stmt = gimple_build_assign (fd->loop.v, e); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE, itercntbias = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = omp_is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + dest = unshare_expr (t); + tree v = create_tmp_var (TREE_TYPE (t), NULL); + expand_omp_build_assign (&gsif, v, t); + if (itercnt == NULL_TREE) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + itercntbias + = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), + fold_convert (itype, fd->loop.n1)); + itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, + itercntbias, step); + itercntbias + = force_gimple_operand_gsi (&gsif, itercntbias, true, + NULL_TREE, true, + GSI_SAME_STMT); + itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + else + itercnt = s0; + } + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (!broken_loop) + { + /* The code controlling the sequential loop goes in CONT_BB, + replacing the GIMPLE_OMP_CONTINUE. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, step); + if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) + t = build2 (EQ_EXPR, boolean_type_node, + build_int_cst (itype, 0), + build_int_cst (itype, 1)); + else + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) + ? t : vback, e); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + } + + /* Remove GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + + /* Trip update code goes into TRIP_UPDATE_BB. */ + gsi = gsi_start_bb (trip_update_bb); + + t = build_int_cst (itype, 1); + t = build2 (PLUS_EXPR, itype, trip_main, t); + assign_stmt = gimple_build_assign (trip_back, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + + /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ + gsi = gsi_last_bb (exit_bb); + if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) + { + t = gimple_omp_return_lhs (gsi_stmt (gsi)); + gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); + } + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; + find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; + + if (!broken_loop) + { + se = find_edge (cont_bb, body_bb); + if (se == NULL) + { + se = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (se->dest) == body_bb); + } + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (se); + se = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (se); + se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + se->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, trip_update_bb)->flags + = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; + + redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb); + } + + if (gimple_in_ssa_p (cfun)) + { + gphi_iterator psi; + gphi *phi; + edge re, ene; + edge_var_map *vm; + size_t i; + + gcc_assert (fd->collapse == 1 && !broken_loop); + + /* When we redirect the edge from trip_update_bb to iter_part_bb, we + remove arguments of the phi nodes in fin_bb. We need to create + appropriate phi nodes in iter_part_bb instead. */ + se = find_edge (iter_part_bb, fin_bb); + re = single_succ_edge (trip_update_bb); + vec<edge_var_map> *head = redirect_edge_var_map_vector (re); + ene = single_succ_edge (entry_bb); + + psi = gsi_start_phis (fin_bb); + for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); + gsi_next (&psi), ++i) + { + gphi *nphi; + source_location locus; + + phi = psi.phi (); + t = gimple_phi_result (phi); + gcc_assert (t == redirect_edge_var_map_result (vm)); + + if (!single_pred_p (fin_bb)) + t = copy_ssa_name (t, phi); + + nphi = create_phi_node (t, iter_part_bb); + + t = PHI_ARG_DEF_FROM_EDGE (phi, se); + locus = gimple_phi_arg_location_from_edge (phi, se); + + /* A special case -- fd->loop.v is not yet computed in + iter_part_bb, we need to use vextra instead. */ + if (t == fd->loop.v) + t = vextra; + add_phi_arg (nphi, t, ene, locus); + locus = redirect_edge_var_map_location (vm); + tree back_arg = redirect_edge_var_map_def (vm); + add_phi_arg (nphi, back_arg, re, locus); + edge ce = find_edge (cont_bb, body_bb); + if (ce == NULL) + { + ce = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (ce->dest) == body_bb); + ce = single_succ_edge (ce->dest); + } + gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); + gcc_assert (inner_loop_phi != NULL); + add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), + find_edge (seq_start_bb, body_bb), locus); + + if (!single_pred_p (fin_bb)) + add_phi_arg (phi, gimple_phi_result (nphi), se, locus); + } + gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); + redirect_edge_var_map_clear (re); + if (single_pred_p (fin_bb)) + while (1) + { + psi = gsi_start_phis (fin_bb); + if (gsi_end_p (psi)) + break; + remove_phi_node (&psi, false); + } + + /* Make phi node for trip. */ + phi = create_phi_node (trip_main, iter_part_bb); + add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), + UNKNOWN_LOCATION); + add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), + UNKNOWN_LOCATION); + } + + if (!broken_loop) + set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); + set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, + recompute_dominator (CDI_DOMINATORS, iter_part_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, + recompute_dominator (CDI_DOMINATORS, seq_start_bb)); + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + + if (!broken_loop) + { + struct loop *loop = body_bb->loop_father; + struct loop *trip_loop = alloc_loop (); + trip_loop->header = iter_part_bb; + trip_loop->latch = trip_update_bb; + add_loop (trip_loop, iter_part_bb->loop_father); + + if (loop != entry_bb->loop_father) + { + gcc_assert (loop->header == body_bb); + gcc_assert (loop->latch == region->cont + || single_pred (loop->latch) == region->cont); + trip_loop->inner = loop; + return; + } + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + loop = alloc_loop (); + loop->header = body_bb; + if (collapse_bb == NULL) + loop->latch = cont_bb; + add_loop (loop, trip_loop); + } + } +} + +/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop. + Given parameters: + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">" or "!=", we generate pseudocode + + for (ind_var = low; ind_var < high; ind_var++) + { + V = n1 + (ind_var * STEP) + + <BODY> + } + + In the above pseudocode, low and high are function parameters of the + child function. In the function below, we are inserting a temp. + variable that will be making a call to two OMP functions that will not be + found in the body of _Cilk_for (since OMP_FOR cannot be mixed + with _Cilk_for). These functions are replaced with low and high + by the function that handles taskreg. */ + + +static void +expand_cilk_for (struct omp_region *region, struct omp_for_data *fd) +{ + bool broken_loop = region->cont == NULL; + basic_block entry_bb = region->entry; + basic_block cont_bb = region->cont; + + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest; + basic_block l1_bb, l2_bb; + + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; + l2_bb = BRANCH_EDGE (entry_bb)->dest; + } + else + { + BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; + l1_bb = split_edge (BRANCH_EDGE (entry_bb)); + l2_bb = single_succ (l1_bb); + } + basic_block exit_bb = region->exit; + basic_block l2_dom_bb = NULL; + + gimple_stmt_iterator gsi = gsi_last_bb (entry_bb); + + /* Below statements until the "tree high_val = ..." are pseudo statements + used to pass information to be used by expand_omp_taskreg. + low_val and high_val will be replaced by the __low and __high + parameter from the child function. + + The call_exprs part is a place-holder, it is mainly used + to distinctly identify to the top-level part that this is + where we should put low and high (reasoning given in header + comment). */ + + tree child_fndecl + = gimple_omp_parallel_child_fn ( + as_a <gomp_parallel *> (last_stmt (region->outer->entry))); + tree t, low_val = NULL_TREE, high_val = NULL_TREE; + for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t)) + { + if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high")) + high_val = t; + else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low")) + low_val = t; + } + gcc_assert (low_val && high_val); + + tree type = TREE_TYPE (low_val); + tree ind_var = create_tmp_reg (type, "__cilk_ind_var"); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + /* Not needed in SSA form right now. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + if (l2_dom_bb == NULL) + l2_dom_bb = l1_bb; + + tree n1 = low_val; + tree n2 = high_val; + + gimple *stmt = gimple_build_assign (ind_var, n1); + + /* Replace the GIMPLE_OMP_FOR statement. */ + gsi_replace (&gsi, stmt, true); + + if (!broken_loop) + { + /* Code to control the increment goes in the CONT_BB. */ + gsi = gsi_last_bb (cont_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); + stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var, + build_one_cst (type)); + + /* Replace GIMPLE_OMP_CONTINUE. */ + gsi_replace (&gsi, stmt, true); + } + + /* Emit the condition in L1_BB. */ + gsi = gsi_after_labels (l1_bb); + t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step), + fold_convert (TREE_TYPE (fd->loop.step), ind_var), + fd->loop.step); + if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1), + fd->loop.n1, fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1), + fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t)); + t = fold_convert (TREE_TYPE (fd->loop.v), t); + expand_omp_build_assign (&gsi, fd->loop.v, t); + + /* The condition is always '<' since the runtime will fill in the low + and high values. */ + stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + + /* Remove GIMPLE_OMP_RETURN. */ + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + remove_edge (FALLTHRU_EDGE (entry_bb)); + + edge e, ne; + if (!broken_loop) + { + remove_edge (BRANCH_EDGE (entry_bb)); + make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); + + e = BRANCH_EDGE (l1_bb); + ne = FALLTHRU_EDGE (l1_bb); + e->flags = EDGE_TRUE_VALUE; + } + else + { + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + ne = single_succ_edge (l1_bb); + e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); + + } + ne->flags = EDGE_FALSE_VALUE; + e->probability = REG_BR_PROB_BASE * 7 / 8; + ne->probability = REG_BR_PROB_BASE / 8; + + set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); + + if (!broken_loop) + { + struct loop *loop = alloc_loop (); + loop->header = l1_bb; + loop->latch = cont_bb; + add_loop (loop, l1_bb->loop_father); + loop->safelen = INT_MAX; + } + + /* Pick the correct library function based on the precision of the + induction variable type. */ + tree lib_fun = NULL_TREE; + if (TYPE_PRECISION (type) == 32) + lib_fun = cilk_for_32_fndecl; + else if (TYPE_PRECISION (type) == 64) + lib_fun = cilk_for_64_fndecl; + else + gcc_unreachable (); + + gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR); + + /* WS_ARGS contains the library function flavor to call: + __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the + user-defined grain value. If the user does not define one, then zero + is passed in by the parser. */ + vec_alloc (region->ws_args, 2); + region->ws_args->quick_push (lib_fun); + region->ws_args->quick_push (fd->chunk_size); +} + +/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing + loop. Given parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + V = N1; + goto L1; + L0: + BODY; + V += STEP; + L1: + if (V cond N2) goto L0; else goto L2; + L2: + + For collapsed loops, given parameters: + collapse(3) + for (V1 = N11; V1 cond1 N12; V1 += STEP1) + for (V2 = N21; V2 cond2 N22; V2 += STEP2) + for (V3 = N31; V3 cond3 N32; V3 += STEP3) + BODY; + + we generate pseudocode + + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + V = 0; + V1 = N11; + V2 = N21; + V3 = N31; + goto L1; + L0: + BODY; + V += 1; + V3 += STEP3; + V2 += (V3 cond3 N32) ? 0 : STEP2; + V3 = (V3 cond3 N32) ? V3 : N31; + V1 += (V2 cond2 N22) ? 0 : STEP1; + V2 = (V2 cond2 N22) ? V2 : N21; + L1: + if (V < count) goto L0; else goto L2; + L2: + + */ + +static void +expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) +{ + tree type, t; + basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; + gimple_stmt_iterator gsi; + gimple *stmt; + gcond *cond_stmt; + bool broken_loop = region->cont == NULL; + edge e, ne; + tree *counts = NULL; + int i; + int safelen_int = INT_MAX; + tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE_SAFELEN); + tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__SIMDUID_); + tree n1, n2; + + if (safelen) + { + safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); + if (TREE_CODE (safelen) != INTEGER_CST) + safelen_int = 0; + else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) + safelen_int = tree_to_uhwi (safelen); + if (safelen_int == 1) + safelen_int = 0; + } + type = TREE_TYPE (fd->loop.v); + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + l0_bb = FALLTHRU_EDGE (entry_bb)->dest; + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; + l2_bb = BRANCH_EDGE (entry_bb)->dest; + } + else + { + BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; + l1_bb = split_edge (BRANCH_EDGE (entry_bb)); + l2_bb = single_succ (l1_bb); + } + exit_bb = region->exit; + l2_dom_bb = NULL; + + gsi = gsi_last_bb (entry_bb); + + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + /* Not needed in SSA form right now. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + zero_iter_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + } + if (l2_dom_bb == NULL) + l2_dom_bb = l1_bb; + + n1 = fd->loop.n1; + n2 = fd->loop.n2; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + tree step = fd->loop.step; + + bool is_simt = (safelen_int > 1 + && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__SIMT_)); + tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; + if (is_simt) + { + cfun->curr_properties &= ~PROP_gimple_lomp_dev; + simt_lane = create_tmp_var (unsigned_type_node); + gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); + gimple_call_set_lhs (g, simt_lane); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, + fold_convert (TREE_TYPE (step), simt_lane)); + n1 = fold_convert (type, n1); + if (POINTER_TYPE_P (type)) + n1 = fold_build_pointer_plus (n1, offset); + else + n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); + + /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ + if (fd->collapse > 1) + simt_maxlane = build_one_cst (unsigned_type_node); + else if (safelen_int < omp_max_simt_vf ()) + simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); + tree vf + = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, + unsigned_type_node, 0); + if (simt_maxlane) + vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); + vf = fold_convert (TREE_TYPE (step), vf); + step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); + } + + expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + if (fd->collapse > 1) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + gsi_prev (&gsi); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); + gsi_next (&gsi); + } + else + for (i = 0; i < fd->collapse; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + } + } + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi_remove (&gsi, true); + + if (!broken_loop) + { + /* Code to control the increment goes in the CONT_BB. */ + gsi = gsi_last_bb (cont_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); + + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loop.v, step); + else + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); + expand_omp_build_assign (&gsi, fd->loop.v, t); + + if (fd->collapse > 1) + { + i = fd->collapse - 1; + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) + { + t = fold_convert (sizetype, fd->loops[i].step); + t = fold_build_pointer_plus (fd->loops[i].v, t); + } + else + { + t = fold_convert (TREE_TYPE (fd->loops[i].v), + fd->loops[i].step); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, t); + } + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + + for (i = fd->collapse - 1; i > 0; i--) + { + tree itype = TREE_TYPE (fd->loops[i].v); + tree itype2 = TREE_TYPE (fd->loops[i - 1].v); + if (POINTER_TYPE_P (itype2)) + itype2 = signed_type_for (itype2); + t = build3 (COND_EXPR, itype2, + build2 (fd->loops[i].cond_code, boolean_type_node, + fd->loops[i].v, + fold_convert (itype, fd->loops[i].n2)), + build_int_cst (itype2, 0), + fold_convert (itype2, fd->loops[i - 1].step)); + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) + t = fold_build_pointer_plus (fd->loops[i - 1].v, t); + else + t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); + expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); + + t = build3 (COND_EXPR, itype, + build2 (fd->loops[i].cond_code, boolean_type_node, + fd->loops[i].v, + fold_convert (itype, fd->loops[i].n2)), + fd->loops[i].v, + fold_convert (itype, fd->loops[i].n1)); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + } + } + + /* Remove GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + } + + /* Emit the condition in L1_BB. */ + gsi = gsi_start_bb (l1_bb); + + t = fold_convert (type, n2); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loop.v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = build2 (fd->loop.cond_code, boolean_type_node, v, t); + cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, + NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, + NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + + /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ + if (is_simt) + { + gsi = gsi_start_bb (l2_bb); + step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loop.v, step); + else + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); + expand_omp_build_assign (&gsi, fd->loop.v, t); + } + + /* Remove GIMPLE_OMP_RETURN. */ + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + remove_edge (FALLTHRU_EDGE (entry_bb)); + + if (!broken_loop) + { + remove_edge (BRANCH_EDGE (entry_bb)); + make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); + + e = BRANCH_EDGE (l1_bb); + ne = FALLTHRU_EDGE (l1_bb); + e->flags = EDGE_TRUE_VALUE; + } + else + { + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + ne = single_succ_edge (l1_bb); + e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); + + } + ne->flags = EDGE_FALSE_VALUE; + e->probability = REG_BR_PROB_BASE * 7 / 8; + ne->probability = REG_BR_PROB_BASE / 8; + + set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); + + if (simt_maxlane) + { + cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, + NULL_TREE, NULL_TREE); + gsi = gsi_last_bb (entry_bb); + gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); + make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); + FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; + BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; + l2_dom_bb = entry_bb; + } + set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); + + if (!broken_loop) + { + struct loop *loop = alloc_loop (); + loop->header = l1_bb; + loop->latch = cont_bb; + add_loop (loop, l1_bb->loop_father); + loop->safelen = safelen_int; + if (simduid) + { + loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); + cfun->has_simduid_loops = true; + } + /* If not -fno-tree-loop-vectorize, hint that we want to vectorize + the loop. */ + if ((flag_tree_loop_vectorize + || (!global_options_set.x_flag_tree_loop_vectorize + && !global_options_set.x_flag_tree_vectorize)) + && flag_tree_loop_optimize + && loop->safelen > 1) + { + loop->force_vectorize = true; + cfun->has_force_vectorize_loops = true; + } + } + else if (simduid) + cfun->has_simduid_loops = true; +} + +/* Taskloop construct is represented after gimplification with + two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched + in between them. This routine expands the outer GIMPLE_OMP_FOR, + which should just compute all the needed loop temporaries + for GIMPLE_OMP_TASK. */ + +static void +expand_omp_taskloop_for_outer (struct omp_region *region, + struct omp_for_data *fd, + gimple *inner_stmt) +{ + tree type, bias = NULL_TREE; + basic_block entry_bb, cont_bb, exit_bb; + gimple_stmt_iterator gsi; + gassign *assign_stmt; + tree *counts = NULL; + int i; + + gcc_assert (inner_stmt); + gcc_assert (region->cont); + gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK + && gimple_omp_task_taskloop_p (inner_stmt)); + type = TREE_TYPE (fd->loop.v); + + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type)) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + exit_bb = region->exit; + + gsi = gsi_last_bb (entry_bb); + gimple *for_stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + zero_iter_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + + if (zero_iter_bb) + { + /* Some counts[i] vars might be uninitialized if + some loop has zero iterations. But the body shouldn't + be executed in that case, so just avoid uninit warnings. */ + for (i = first_zero_iter; i < fd->collapse; i++) + if (SSA_VAR_P (counts[i])) + TREE_NO_WARNING (counts[i]) = 1; + gsi_prev (&gsi); + edge e = split_block (entry_bb, gsi_stmt (gsi)); + entry_bb = e->dest; + make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); + gsi = gsi_last_bb (entry_bb); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, + get_immediate_dominator (CDI_DOMINATORS, + zero_iter_bb)); + } + } + + tree t0, t1; + t1 = fd->loop.n2; + t0 = fd->loop.n1; + if (POINTER_TYPE_P (TREE_TYPE (t0)) + && TYPE_PRECISION (TREE_TYPE (t0)) + != TYPE_PRECISION (fd->iter_type)) + { + /* Avoid casting pointers to integer of a different size. */ + tree itype = signed_type_for (type); + t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); + t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); + } + else + { + t1 = fold_convert (fd->iter_type, t1); + t0 = fold_convert (fd->iter_type, t0); + } + if (bias) + { + t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); + t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); + } + + tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + tree startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + tree endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) + { + gcc_assert (innerc); + for (i = 1; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (inner taskloop has lastprivate clause), propagate + down the total number of iterations. */ + tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } + + t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t0); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (endvar, t1); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi = gsi_for_stmt (for_stmt); + gsi_remove (&gsi, true); + + gsi = gsi_last_bb (cont_bb); + gsi_remove (&gsi, true); + + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; + remove_edge (BRANCH_EDGE (entry_bb)); + FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE; + remove_edge (BRANCH_EDGE (cont_bb)); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); + set_immediate_dominator (CDI_DOMINATORS, region->entry, + recompute_dominator (CDI_DOMINATORS, region->entry)); +} + +/* Taskloop construct is represented after gimplification with + two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched + in between them. This routine expands the inner GIMPLE_OMP_FOR. + GOMP_taskloop{,_ull} function arranges for each task to be given just + a single range of iterations. */ + +static void +expand_omp_taskloop_for_inner (struct omp_region *region, + struct omp_for_data *fd, + gimple *inner_stmt) +{ + tree e, t, type, itype, vmain, vback, bias = NULL_TREE; + basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; + basic_block fin_bb; + gimple_stmt_iterator gsi; + edge ep; + bool broken_loop = region->cont == NULL; + tree *counts = NULL; + tree n1, n2, step; + + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type)) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + fin_bb = BRANCH_EDGE (entry_bb)->dest; + gcc_assert (broken_loop + || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); + body_bb = FALLTHRU_EDGE (entry_bb)->dest; + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + exit_bb = region->exit; + + /* Iteration space partitioning goes in ENTRY_BB. */ + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block l2_dom_bb = NULL, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + fin_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + t = NULL_TREE; + } + else + t = integer_one_node; + + step = fd->loop.step; + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + if (bias) + { + n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); + n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); + } + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + tree clauses = gimple_omp_for_clauses (inner_stmt); + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + } + t = fold_convert (TREE_TYPE (startvar), n1); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + gimple *assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = fold_convert (TREE_TYPE (startvar), n2); + e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) + assign_stmt = gimple_build_assign (fd->loop.v, e); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (!broken_loop) + { + /* The code controlling the sequential loop replaces the + GIMPLE_OMP_CONTINUE. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (vback) + && TREE_ADDRESSABLE (vback), + NULL_TREE, true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) + ? t : vback, e); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + } + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + } + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi = gsi_for_stmt (fd->for_stmt); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_RETURN statement. */ + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; + if (!broken_loop) + remove_edge (BRANCH_EDGE (entry_bb)); + else + { + remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); + region->outer->cont = NULL; + } + + /* Connect all the blocks. */ + if (!broken_loop) + { + ep = find_edge (cont_bb, body_bb); + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (ep); + ep = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (ep); + ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + ep->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, fin_bb)->flags + = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; + } + + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + if (!broken_loop) + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + + if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) + { + struct loop *loop = alloc_loop (); + loop->header = body_bb; + if (collapse_bb == NULL) + loop->latch = cont_bb; + add_loop (loop, body_bb->loop_father); + } +} + +/* A subroutine of expand_omp_for. Generate code for an OpenACC + partitioned loop. The lowering here is abstracted, in that the + loop parameters are passed through internal functions, which are + further lowered by oacc_device_lower, once we get to the target + compiler. The loop is of the form: + + for (V = B; V LTGT E; V += S) {BODY} + + where LTGT is < or >. We may have a specified chunking size, CHUNKING + (constant 0 for no chunking) and we will have a GWV partitioning + mask, specifying dimensions over which the loop is to be + partitioned (see note below). We generate code that looks like: + + <entry_bb> [incoming FALL->body, BRANCH->exit] + typedef signedintify (typeof (V)) T; // underlying signed integral type + T range = E - B; + T chunk_no = 0; + T DIR = LTGT == '<' ? +1 : -1; + T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); + T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); + + <head_bb> [created by splitting end of entry_bb] + T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); + T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); + if (!(offset LTGT bound)) goto bottom_bb; + + <body_bb> [incoming] + V = B + offset; + {BODY} + + <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] + offset += step; + if (offset LTGT bound) goto body_bb; [*] + + <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb + chunk_no++; + if (chunk < chunk_max) goto head_bb; + + <exit_bb> [incoming] + V = B + ((range -/+ 1) / S +/- 1) * S [*] + + [*] Needed if V live at end of loop + + Note: CHUNKING & GWV mask are specified explicitly here. This is a + transition, and will be specified by a more general mechanism shortly. + */ + +static void +expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) +{ + tree v = fd->loop.v; + enum tree_code cond_code = fd->loop.cond_code; + enum tree_code plus_code = PLUS_EXPR; + + tree chunk_size = integer_minus_one_node; + tree gwv = integer_zero_node; + tree iter_type = TREE_TYPE (v); + tree diff_type = iter_type; + tree plus_type = iter_type; + struct oacc_collapse *counts = NULL; + + gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) + == GF_OMP_FOR_KIND_OACC_LOOP); + gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); + gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); + + if (POINTER_TYPE_P (iter_type)) + { + plus_code = POINTER_PLUS_EXPR; + plus_type = sizetype; + } + if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) + diff_type = signed_type_for (diff_type); + + basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ + basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ + basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ + basic_block bottom_bb = NULL; + + /* entry_bb has two sucessors; the branch edge is to the exit + block, fallthrough edge to body. */ + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 + && BRANCH_EDGE (entry_bb)->dest == exit_bb); + + /* If cont_bb non-NULL, it has 2 successors. The branch successor is + body_bb, or to a block whose only successor is the body_bb. Its + fallthrough successor is the final block (same as the branch + successor of the entry_bb). */ + if (cont_bb) + { + basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; + basic_block bed = BRANCH_EDGE (cont_bb)->dest; + + gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); + gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); + } + else + gcc_assert (!gimple_in_ssa_p (cfun)); + + /* The exit block only has entry_bb and cont_bb as predecessors. */ + gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); + + tree chunk_no; + tree chunk_max = NULL_TREE; + tree bound, offset; + tree step = create_tmp_var (diff_type, ".step"); + bool up = cond_code == LT_EXPR; + tree dir = build_int_cst (diff_type, up ? +1 : -1); + bool chunking = !gimple_in_ssa_p (cfun);; + bool negating; + + /* SSA instances. */ + tree offset_incr = NULL_TREE; + tree offset_init = NULL_TREE; + + gimple_stmt_iterator gsi; + gassign *ass; + gcall *call; + gimple *stmt; + tree expr; + location_t loc; + edge split, be, fte; + + /* Split the end of entry_bb to create head_bb. */ + split = split_block (entry_bb, last_stmt (entry_bb)); + basic_block head_bb = split->dest; + entry_bb = split->src; + + /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ + gsi = gsi_last_bb (entry_bb); + gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); + loc = gimple_location (for_stmt); + + if (gimple_in_ssa_p (cfun)) + { + offset_init = gimple_omp_for_index (for_stmt, 0); + gcc_assert (integer_zerop (fd->loop.n1)); + /* The SSA parallelizer does gang parallelism. */ + gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); + } + + if (fd->collapse > 1) + { + counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); + tree total = expand_oacc_collapse_init (fd, &gsi, counts, + TREE_TYPE (fd->loop.n2)); + + if (SSA_VAR_P (fd->loop.n2)) + { + total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (fd->loop.n2, total); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + } + + } + + tree b = fd->loop.n1; + tree e = fd->loop.n2; + tree s = fd->loop.step; + + b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); + e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); + + /* Convert the step, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); + if (negating) + s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); + s = fold_convert (diff_type, s); + if (negating) + s = fold_build1 (NEGATE_EXPR, diff_type, s); + s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); + + if (!chunking) + chunk_size = integer_zero_node; + expr = fold_convert (diff_type, chunk_size); + chunk_size = force_gimple_operand_gsi (&gsi, expr, true, + NULL_TREE, true, GSI_SAME_STMT); + /* Determine the range, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (iter_type); + expr = fold_build2 (MINUS_EXPR, plus_type, + fold_convert (plus_type, negating ? b : e), + fold_convert (plus_type, negating ? e : b)); + expr = fold_convert (diff_type, expr); + if (negating) + expr = fold_build1 (NEGATE_EXPR, diff_type, expr); + tree range = force_gimple_operand_gsi (&gsi, expr, true, + NULL_TREE, true, GSI_SAME_STMT); + + chunk_no = build_int_cst (diff_type, 0); + if (chunking) + { + gcc_assert (!gimple_in_ssa_p (cfun)); + + expr = chunk_no; + chunk_max = create_tmp_var (diff_type, ".chunk_max"); + chunk_no = create_tmp_var (diff_type, ".chunk_no"); + + ass = gimple_build_assign (chunk_no, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_CHUNKS), + dir, range, s, chunk_size, gwv); + gimple_call_set_lhs (call, chunk_max); + gimple_set_location (call, loc); + gsi_insert_before (&gsi, call, GSI_SAME_STMT); + } + else + chunk_size = chunk_no; + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_STEP), + dir, range, s, chunk_size, gwv); + gimple_call_set_lhs (call, step); + gimple_set_location (call, loc); + gsi_insert_before (&gsi, call, GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR. */ + gsi_remove (&gsi, true); + + /* Fixup edges from head_bb */ + be = BRANCH_EDGE (head_bb); + fte = FALLTHRU_EDGE (head_bb); + be->flags |= EDGE_FALSE_VALUE; + fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; + + basic_block body_bb = fte->dest; + + if (gimple_in_ssa_p (cfun)) + { + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + + offset = gimple_omp_continue_control_use (cont_stmt); + offset_incr = gimple_omp_continue_control_def (cont_stmt); + } + else + { + offset = create_tmp_var (diff_type, ".offset"); + offset_init = offset_incr = offset; + } + bound = create_tmp_var (TREE_TYPE (offset), ".bound"); + + /* Loop offset & bound go into head_bb. */ + gsi = gsi_start_bb (head_bb); + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_OFFSET), + dir, range, s, + chunk_size, gwv, chunk_no); + gimple_call_set_lhs (call, offset_init); + gimple_set_location (call, loc); + gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_BOUND), + dir, range, s, + chunk_size, gwv, offset_init); + gimple_call_set_lhs (call, bound); + gimple_set_location (call, loc); + gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); + + expr = build2 (cond_code, boolean_type_node, offset_init, bound); + gsi_insert_after (&gsi, gimple_build_cond_empty (expr), + GSI_CONTINUE_LINKING); + + /* V assignment goes into body_bb. */ + if (!gimple_in_ssa_p (cfun)) + { + gsi = gsi_start_bb (body_bb); + + expr = build2 (plus_code, iter_type, b, + fold_convert (plus_type, offset)); + expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (v, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + if (fd->collapse > 1) + expand_oacc_collapse_vars (fd, &gsi, counts, v); + } + + /* Loop increment goes into cont_bb. If this is not a loop, we + will have spawned threads as if it was, and each one will + execute one iteration. The specification is not explicit about + whether such constructs are ill-formed or not, and they can + occur, especially when noreturn routines are involved. */ + if (cont_bb) + { + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + loc = gimple_location (cont_stmt); + + /* Increment offset. */ + if (gimple_in_ssa_p (cfun)) + expr= build2 (plus_code, iter_type, offset, + fold_convert (plus_type, step)); + else + expr = build2 (PLUS_EXPR, diff_type, offset, step); + expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (offset_incr, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + expr = build2 (cond_code, boolean_type_node, offset_incr, bound); + gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + + /* Fixup edges from cont_bb */ + be = BRANCH_EDGE (cont_bb); + fte = FALLTHRU_EDGE (cont_bb); + be->flags |= EDGE_TRUE_VALUE; + fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; + + if (chunking) + { + /* Split the beginning of exit_bb to make bottom_bb. We + need to insert a nop at the start, because splitting is + after a stmt, not before. */ + gsi = gsi_start_bb (exit_bb); + stmt = gimple_build_nop (); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + split = split_block (exit_bb, stmt); + bottom_bb = split->src; + exit_bb = split->dest; + gsi = gsi_last_bb (bottom_bb); + + /* Chunk increment and test goes into bottom_bb. */ + expr = build2 (PLUS_EXPR, diff_type, chunk_no, + build_int_cst (diff_type, 1)); + ass = gimple_build_assign (chunk_no, expr); + gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); + + /* Chunk test at end of bottom_bb. */ + expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); + gsi_insert_after (&gsi, gimple_build_cond_empty (expr), + GSI_CONTINUE_LINKING); + + /* Fixup edges from bottom_bb. */ + split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; + make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); + } + } + + gsi = gsi_last_bb (exit_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + loc = gimple_location (gsi_stmt (gsi)); + + if (!gimple_in_ssa_p (cfun)) + { + /* Insert the final value of V, in case it is live. This is the + value for the only thread that survives past the join. */ + expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); + expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); + expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); + expr = fold_build2 (MULT_EXPR, diff_type, expr, s); + expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); + expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (v, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + } + + /* Remove the OMP_RETURN. */ + gsi_remove (&gsi, true); + + if (cont_bb) + { + /* We now have one or two nested loops. Update the loop + structures. */ + struct loop *parent = entry_bb->loop_father; + struct loop *body = body_bb->loop_father; + + if (chunking) + { + struct loop *chunk_loop = alloc_loop (); + chunk_loop->header = head_bb; + chunk_loop->latch = bottom_bb; + add_loop (chunk_loop, parent); + parent = chunk_loop; + } + else if (parent != body) + { + gcc_assert (body->header == body_bb); + gcc_assert (body->latch == cont_bb + || single_pred (body->latch) == cont_bb); + parent = NULL; + } + + if (parent) + { + struct loop *body_loop = alloc_loop (); + body_loop->header = body_bb; + body_loop->latch = cont_bb; + add_loop (body_loop, parent); + } + } +} + +/* Expand the OMP loop defined by REGION. */ + +static void +expand_omp_for (struct omp_region *region, gimple *inner_stmt) +{ + struct omp_for_data fd; + struct omp_for_data_loop *loops; + + loops + = (struct omp_for_data_loop *) + alloca (gimple_omp_for_collapse (last_stmt (region->entry)) + * sizeof (struct omp_for_data_loop)); + omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), + &fd, loops); + region->sched_kind = fd.sched_kind; + region->sched_modifiers = fd.sched_modifiers; + + gcc_assert (EDGE_COUNT (region->entry->succs) == 2); + BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + if (region->cont) + { + gcc_assert (EDGE_COUNT (region->cont->succs) == 2); + BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + } + else + /* If there isn't a continue then this is a degerate case where + the introduction of abnormal edges during lowering will prevent + original loops from being detected. Fix that up. */ + loops_state_set (LOOPS_NEED_FIXUP); + + if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) + expand_omp_simd (region, &fd); + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) + expand_cilk_for (region, &fd); + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) + { + gcc_assert (!inner_stmt); + expand_oacc_for (region, &fd); + } + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) + { + if (gimple_omp_for_combined_into_p (fd.for_stmt)) + expand_omp_taskloop_for_inner (region, &fd, inner_stmt); + else + expand_omp_taskloop_for_outer (region, &fd, inner_stmt); + } + else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC + && !fd.have_ordered) + { + if (fd.chunk_size == NULL) + expand_omp_for_static_nochunk (region, &fd, inner_stmt); + else + expand_omp_for_static_chunk (region, &fd, inner_stmt); + } + else + { + int fn_index, start_ix, next_ix; + + gcc_assert (gimple_omp_for_kind (fd.for_stmt) + == GF_OMP_FOR_KIND_FOR); + if (fd.chunk_size == NULL + && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) + fd.chunk_size = integer_zero_node; + gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); + switch (fd.sched_kind) + { + case OMP_CLAUSE_SCHEDULE_RUNTIME: + fn_index = 3; + break; + case OMP_CLAUSE_SCHEDULE_DYNAMIC: + case OMP_CLAUSE_SCHEDULE_GUIDED: + if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) + && !fd.ordered + && !fd.have_ordered) + { + fn_index = 3 + fd.sched_kind; + break; + } + /* FALLTHRU */ + default: + fn_index = fd.sched_kind; + break; + } + if (!fd.ordered) + fn_index += fd.have_ordered * 6; + if (fd.ordered) + start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; + else + start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; + next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; + if (fd.iter_type == long_long_unsigned_type_node) + { + start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START + - (int)BUILT_IN_GOMP_LOOP_STATIC_START); + next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT + - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); + } + expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, + (enum built_in_function) next_ix, inner_stmt); + } + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_only_virtuals); +} + +/* Expand code for an OpenMP sections directive. In pseudo code, we generate + + v = GOMP_sections_start (n); + L0: + switch (v) + { + case 0: + goto L2; + case 1: + section 1; + goto L1; + case 2: + ... + case n: + ... + default: + abort (); + } + L1: + v = GOMP_sections_next (); + goto L0; + L2: + reduction; + + If this is a combined parallel sections, replace the call to + GOMP_sections_start with call to GOMP_sections_next. */ + +static void +expand_omp_sections (struct omp_region *region) +{ + tree t, u, vin = NULL, vmain, vnext, l2; + unsigned len; + basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; + gimple_stmt_iterator si, switch_si; + gomp_sections *sections_stmt; + gimple *stmt; + gomp_continue *cont; + edge_iterator ei; + edge e; + struct omp_region *inner; + unsigned i, casei; + bool exit_reachable = region->cont != NULL; + + gcc_assert (region->exit != NULL); + entry_bb = region->entry; + l0_bb = single_succ (entry_bb); + l1_bb = region->cont; + l2_bb = region->exit; + if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) + l2 = gimple_block_label (l2_bb); + else + { + /* This can happen if there are reductions. */ + len = EDGE_COUNT (l0_bb->succs); + gcc_assert (len > 0); + e = EDGE_SUCC (l0_bb, len - 1); + si = gsi_last_bb (e->dest); + l2 = NULL_TREE; + if (gsi_end_p (si) + || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) + l2 = gimple_block_label (e->dest); + else + FOR_EACH_EDGE (e, ei, l0_bb->succs) + { + si = gsi_last_bb (e->dest); + if (gsi_end_p (si) + || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) + { + l2 = gimple_block_label (e->dest); + break; + } + } + } + if (exit_reachable) + default_bb = create_empty_bb (l1_bb->prev_bb); + else + default_bb = create_empty_bb (l0_bb); + + /* We will build a switch() with enough cases for all the + GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work + and a default case to abort if something goes wrong. */ + len = EDGE_COUNT (l0_bb->succs); + + /* Use vec::quick_push on label_vec throughout, since we know the size + in advance. */ + auto_vec<tree> label_vec (len); + + /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the + GIMPLE_OMP_SECTIONS statement. */ + si = gsi_last_bb (entry_bb); + sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); + gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); + vin = gimple_omp_sections_control (sections_stmt); + if (!is_combined_parallel (region)) + { + /* If we are not inside a combined parallel+sections region, + call GOMP_sections_start. */ + t = build_int_cst (unsigned_type_node, len - 1); + u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); + stmt = gimple_build_call (u, 1, t); + } + else + { + /* Otherwise, call GOMP_sections_next. */ + u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); + stmt = gimple_build_call (u, 0); + } + gimple_call_set_lhs (stmt, vin); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in + L0_BB. */ + switch_si = gsi_last_bb (l0_bb); + gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); + if (exit_reachable) + { + cont = as_a <gomp_continue *> (last_stmt (l1_bb)); + gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont); + vnext = gimple_omp_continue_control_def (cont); + } + else + { + vmain = vin; + vnext = NULL_TREE; + } + + t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); + label_vec.quick_push (t); + i = 1; + + /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ + for (inner = region->inner, casei = 1; + inner; + inner = inner->next, i++, casei++) + { + basic_block s_entry_bb, s_exit_bb; + + /* Skip optional reduction region. */ + if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) + { + --i; + --casei; + continue; + } + + s_entry_bb = inner->entry; + s_exit_bb = inner->exit; + + t = gimple_block_label (s_entry_bb); + u = build_int_cst (unsigned_type_node, casei); + u = build_case_label (u, NULL, t); + label_vec.quick_push (u); + + si = gsi_last_bb (s_entry_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); + gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); + gsi_remove (&si, true); + single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; + + if (s_exit_bb == NULL) + continue; + + si = gsi_last_bb (s_exit_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); + gsi_remove (&si, true); + + single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; + } + + /* Error handling code goes in DEFAULT_BB. */ + t = gimple_block_label (default_bb); + u = build_case_label (NULL, NULL, t); + make_edge (l0_bb, default_bb, 0); + add_bb_to_loop (default_bb, current_loops->tree_root); + + stmt = gimple_build_switch (vmain, u, label_vec); + gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); + gsi_remove (&switch_si, true); + + si = gsi_start_bb (default_bb); + stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); + gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); + + if (exit_reachable) + { + tree bfn_decl; + + /* Code to get the next section goes in L1_BB. */ + si = gsi_last_bb (l1_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); + + bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); + stmt = gimple_build_call (bfn_decl, 0); + gimple_call_set_lhs (stmt, vnext); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; + } + + /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ + si = gsi_last_bb (l2_bb); + if (gimple_omp_return_nowait_p (gsi_stmt (si))) + t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); + else if (gimple_omp_return_lhs (gsi_stmt (si))) + t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); + else + t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); + stmt = gimple_build_call (t, 0); + if (gimple_omp_return_lhs (gsi_stmt (si))) + gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); +} + +/* Expand code for an OpenMP single directive. We've already expanded + much of the code, here we simply place the GOMP_barrier call. */ + +static void +expand_omp_single (struct omp_region *region) +{ + basic_block entry_bb, exit_bb; + gimple_stmt_iterator si; + + entry_bb = region->entry; + exit_bb = region->exit; + + si = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); + gsi_remove (&si, true); + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + si = gsi_last_bb (exit_bb); + if (!gimple_omp_return_nowait_p (gsi_stmt (si))) + { + tree t = gimple_omp_return_lhs (gsi_stmt (si)); + gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); + } + gsi_remove (&si, true); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; +} + +/* Generic expansion for OpenMP synchronization directives: master, + ordered and critical. All we need to do here is remove the entry + and exit markers for REGION. */ + +static void +expand_omp_synch (struct omp_region *region) +{ + basic_block entry_bb, exit_bb; + gimple_stmt_iterator si; + + entry_bb = region->entry; + exit_bb = region->exit; + + si = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); + gsi_remove (&si, true); + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + if (exit_bb) + { + si = gsi_last_bb (exit_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); + gsi_remove (&si, true); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; + } +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a normal volatile load. */ + +static bool +expand_omp_atomic_load (basic_block load_bb, tree addr, + tree loaded_val, int index) +{ + enum built_in_function tmpbase; + gimple_stmt_iterator gsi; + basic_block store_bb; + location_t loc; + gimple *stmt; + tree decl, call, type, itype; + + gsi = gsi_last_bb (load_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); + loc = gimple_location (stmt); + + /* ??? If the target does not implement atomic_load_optab[mode], and mode + is smaller than word size, then expand_atomic_load assumes that the load + is atomic. We could avoid the builtin entirely in this case. */ + + tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); + decl = builtin_decl_explicit (tmpbase); + if (decl == NULL_TREE) + return false; + + type = TREE_TYPE (loaded_val); + itype = TREE_TYPE (TREE_TYPE (decl)); + + call = build_call_expr_loc (loc, decl, 2, addr, + build_int_cst (NULL, + gimple_omp_atomic_seq_cst_p (stmt) + ? MEMMODEL_SEQ_CST + : MEMMODEL_RELAXED)); + if (!useless_type_conversion_p (type, itype)) + call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); + call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); + + force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + store_bb = single_succ (load_bb); + gsi = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); + gsi_remove (&gsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a normal volatile store. */ + +static bool +expand_omp_atomic_store (basic_block load_bb, tree addr, + tree loaded_val, tree stored_val, int index) +{ + enum built_in_function tmpbase; + gimple_stmt_iterator gsi; + basic_block store_bb = single_succ (load_bb); + location_t loc; + gimple *stmt; + tree decl, call, type, itype; + machine_mode imode; + bool exchange; + + gsi = gsi_last_bb (load_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); + + /* If the load value is needed, then this isn't a store but an exchange. */ + exchange = gimple_omp_atomic_need_value_p (stmt); + + gsi = gsi_last_bb (store_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); + loc = gimple_location (stmt); + + /* ??? If the target does not implement atomic_store_optab[mode], and mode + is smaller than word size, then expand_atomic_store assumes that the store + is atomic. We could avoid the builtin entirely in this case. */ + + tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); + tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); + decl = builtin_decl_explicit (tmpbase); + if (decl == NULL_TREE) + return false; + + type = TREE_TYPE (stored_val); + + /* Dig out the type of the function's second argument. */ + itype = TREE_TYPE (decl); + itype = TYPE_ARG_TYPES (itype); + itype = TREE_CHAIN (itype); + itype = TREE_VALUE (itype); + imode = TYPE_MODE (itype); + + if (exchange && !can_atomic_exchange_p (imode, true)) + return false; + + if (!useless_type_conversion_p (itype, type)) + stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); + call = build_call_expr_loc (loc, decl, 3, addr, stored_val, + build_int_cst (NULL, + gimple_omp_atomic_seq_cst_p (stmt) + ? MEMMODEL_SEQ_CST + : MEMMODEL_RELAXED)); + if (exchange) + { + if (!useless_type_conversion_p (type, itype)) + call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); + call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); + } + + force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ + gsi = gsi_last_bb (load_bb); + gsi_remove (&gsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a __atomic_fetch_op builtin. INDEX is log2 of the + size of the data type, and thus usable to find the index of the builtin + decl. Returns false if the expression is not of the proper form. */ + +static bool +expand_omp_atomic_fetch_op (basic_block load_bb, + tree addr, tree loaded_val, + tree stored_val, int index) +{ + enum built_in_function oldbase, newbase, tmpbase; + tree decl, itype, call; + tree lhs, rhs; + basic_block store_bb = single_succ (load_bb); + gimple_stmt_iterator gsi; + gimple *stmt; + location_t loc; + enum tree_code code; + bool need_old, need_new; + machine_mode imode; + bool seq_cst; + + /* We expect to find the following sequences: + + load_bb: + GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) + + store_bb: + val = tmp OP something; (or: something OP tmp) + GIMPLE_OMP_STORE (val) + + ???FIXME: Allow a more flexible sequence. + Perhaps use data flow to pick the statements. + + */ + + gsi = gsi_after_labels (store_bb); + stmt = gsi_stmt (gsi); + loc = gimple_location (stmt); + if (!is_gimple_assign (stmt)) + return false; + gsi_next (&gsi); + if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) + return false; + need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); + need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); + seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); + gcc_checking_assert (!need_old || !need_new); + + if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) + return false; + + /* Check for one of the supported fetch-op operations. */ + code = gimple_assign_rhs_code (stmt); + switch (code) + { + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; + newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; + break; + case MINUS_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; + newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; + break; + case BIT_AND_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; + newbase = BUILT_IN_ATOMIC_AND_FETCH_N; + break; + case BIT_IOR_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; + newbase = BUILT_IN_ATOMIC_OR_FETCH_N; + break; + case BIT_XOR_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; + newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; + break; + default: + return false; + } + + /* Make sure the expression is of the proper form. */ + if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) + rhs = gimple_assign_rhs2 (stmt); + else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) + && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) + rhs = gimple_assign_rhs1 (stmt); + else + return false; + + tmpbase = ((enum built_in_function) + ((need_new ? newbase : oldbase) + index + 1)); + decl = builtin_decl_explicit (tmpbase); + if (decl == NULL_TREE) + return false; + itype = TREE_TYPE (TREE_TYPE (decl)); + imode = TYPE_MODE (itype); + + /* We could test all of the various optabs involved, but the fact of the + matter is that (with the exception of i486 vs i586 and xadd) all targets + that support any atomic operaton optab also implements compare-and-swap. + Let optabs.c take care of expanding any compare-and-swap loop. */ + if (!can_compare_and_swap_p (imode, true)) + return false; + + gsi = gsi_last_bb (load_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); + + /* OpenMP does not imply any barrier-like semantics on its atomic ops. + It only requires that the operation happen atomically. Thus we can + use the RELAXED memory model. */ + call = build_call_expr_loc (loc, decl, 3, addr, + fold_convert_loc (loc, itype, rhs), + build_int_cst (NULL, + seq_cst ? MEMMODEL_SEQ_CST + : MEMMODEL_RELAXED)); + + if (need_old || need_new) + { + lhs = need_old ? loaded_val : stored_val; + call = fold_convert_loc (loc, TREE_TYPE (lhs), call); + call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); + } + else + call = fold_convert_loc (loc, void_type_node, call); + force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + gsi = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); + gsi_remove (&gsi, true); + gsi = gsi_last_bb (store_bb); + stmt = gsi_stmt (gsi); + gsi_remove (&gsi, true); + + if (gimple_in_ssa_p (cfun)) + { + release_defs (stmt); + update_ssa (TODO_update_ssa_no_phi); + } + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + oldval = *addr; + repeat: + newval = rhs; // with oldval replacing *addr in rhs + oldval = __sync_val_compare_and_swap (addr, oldval, newval); + if (oldval != newval) + goto repeat; + + INDEX is log2 of the size of the data type, and thus usable to find the + index of the builtin decl. */ + +static bool +expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val, + int index) +{ + tree loadedi, storedi, initial, new_storedi, old_vali; + tree type, itype, cmpxchg, iaddr; + gimple_stmt_iterator si; + basic_block loop_header = single_succ (load_bb); + gimple *phi, *stmt; + edge e; + enum built_in_function fncode; + + /* ??? We need a non-pointer interface to __atomic_compare_exchange in + order to use the RELAXED memory model effectively. */ + fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N + + index + 1); + cmpxchg = builtin_decl_explicit (fncode); + if (cmpxchg == NULL_TREE) + return false; + type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + itype = TREE_TYPE (TREE_TYPE (cmpxchg)); + + if (!can_compare_and_swap_p (TYPE_MODE (itype), true)) + return false; + + /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ + si = gsi_last_bb (load_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); + + /* For floating-point values, we'll need to view-convert them to integers + so that we can perform the atomic compare and swap. Simplify the + following code by always setting up the "i"ntegral variables. */ + if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) + { + tree iaddr_val; + + iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, + true)); + iaddr_val + = force_gimple_operand_gsi (&si, + fold_convert (TREE_TYPE (iaddr), addr), + false, NULL_TREE, true, GSI_SAME_STMT); + stmt = gimple_build_assign (iaddr, iaddr_val); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + loadedi = create_tmp_var (itype); + if (gimple_in_ssa_p (cfun)) + loadedi = make_ssa_name (loadedi); + } + else + { + iaddr = addr; + loadedi = loaded_val; + } + + fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); + tree loaddecl = builtin_decl_explicit (fncode); + if (loaddecl) + initial + = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)), + build_call_expr (loaddecl, 2, iaddr, + build_int_cst (NULL_TREE, + MEMMODEL_RELAXED))); + else + initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr, + build_int_cst (TREE_TYPE (iaddr), 0)); + + initial + = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, + GSI_SAME_STMT); + + /* Move the value to the LOADEDI temporary. */ + if (gimple_in_ssa_p (cfun)) + { + gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); + phi = create_phi_node (loadedi, loop_header); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), + initial); + } + else + gsi_insert_before (&si, + gimple_build_assign (loadedi, initial), + GSI_SAME_STMT); + if (loadedi != loaded_val) + { + gimple_stmt_iterator gsi2; + tree x; + + x = build1 (VIEW_CONVERT_EXPR, type, loadedi); + gsi2 = gsi_start_bb (loop_header); + if (gimple_in_ssa_p (cfun)) + { + gassign *stmt; + x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, + true, GSI_SAME_STMT); + stmt = gimple_build_assign (loaded_val, x); + gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); + } + else + { + x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); + force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, + true, GSI_SAME_STMT); + } + } + gsi_remove (&si, true); + + si = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); + + if (iaddr == addr) + storedi = stored_val; + else + storedi = + force_gimple_operand_gsi (&si, + build1 (VIEW_CONVERT_EXPR, itype, + stored_val), true, NULL_TREE, true, + GSI_SAME_STMT); + + /* Build the compare&swap statement. */ + new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); + new_storedi = force_gimple_operand_gsi (&si, + fold_convert (TREE_TYPE (loadedi), + new_storedi), + true, NULL_TREE, + true, GSI_SAME_STMT); + + if (gimple_in_ssa_p (cfun)) + old_vali = loadedi; + else + { + old_vali = create_tmp_var (TREE_TYPE (loadedi)); + stmt = gimple_build_assign (old_vali, loadedi); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + + stmt = gimple_build_assign (loadedi, new_storedi); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + } + + /* Note that we always perform the comparison as an integer, even for + floating point. This allows the atomic operation to properly + succeed even with NaNs and -0.0. */ + stmt = gimple_build_cond_empty + (build2 (NE_EXPR, boolean_type_node, + new_storedi, old_vali)); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + + /* Update cfg. */ + e = single_succ_edge (store_bb); + e->flags &= ~EDGE_FALLTHRU; + e->flags |= EDGE_FALSE_VALUE; + + e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); + + /* Copy the new value to loadedi (we already did that before the condition + if we are not in SSA). */ + if (gimple_in_ssa_p (cfun)) + { + phi = gimple_seq_first_stmt (phi_nodes (loop_header)); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); + } + + /* Remove GIMPLE_OMP_ATOMIC_STORE. */ + gsi_remove (&si, true); + + struct loop *loop = alloc_loop (); + loop->header = loop_header; + loop->latch = store_bb; + add_loop (loop, loop_header->loop_father); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + GOMP_atomic_start (); + *addr = rhs; + GOMP_atomic_end (); + + The result is not globally atomic, but works so long as all parallel + references are within #pragma omp atomic directives. According to + responses received from omp@openmp.org, appears to be within spec. + Which makes sense, since that's how several other compilers handle + this situation as well. + LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're + expanding. STORED_VAL is the operand of the matching + GIMPLE_OMP_ATOMIC_STORE. + + We replace + GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with + loaded_val = *addr; + + and replace + GIMPLE_OMP_ATOMIC_STORE (stored_val) with + *addr = stored_val; +*/ + +static bool +expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val) +{ + gimple_stmt_iterator si; + gassign *stmt; + tree t; + + si = gsi_last_bb (load_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); + + t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); + t = build_call_expr (t, 0); + force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); + + stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr)); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + si = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); + + stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)), + stored_val); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + + t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); + t = build_call_expr (t, 0); + force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&si, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + return true; +} + +/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand + using expand_omp_atomic_fetch_op. If it failed, we try to + call expand_omp_atomic_pipeline, and if it fails too, the + ultimate fallback is wrapping the operation in a mutex + (expand_omp_atomic_mutex). REGION is the atomic region built + by build_omp_regions_1(). */ + +static void +expand_omp_atomic (struct omp_region *region) +{ + basic_block load_bb = region->entry, store_bb = region->exit; + gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); + gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); + tree loaded_val = gimple_omp_atomic_load_lhs (load); + tree addr = gimple_omp_atomic_load_rhs (load); + tree stored_val = gimple_omp_atomic_store_val (store); + tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + HOST_WIDE_INT index; + + /* Make sure the type is one of the supported sizes. */ + index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); + index = exact_log2 (index); + if (index >= 0 && index <= 4) + { + unsigned int align = TYPE_ALIGN_UNIT (type); + + /* __sync builtins require strict data alignment. */ + if (exact_log2 (align) >= index) + { + /* Atomic load. */ + if (loaded_val == stored_val + && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT + || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) + && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD + && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) + return; + + /* Atomic store. */ + if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT + || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) + && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD + && store_bb == single_succ (load_bb) + && first_stmt (store_bb) == store + && expand_omp_atomic_store (load_bb, addr, loaded_val, + stored_val, index)) + return; + + /* When possible, use specialized atomic update functions. */ + if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + && store_bb == single_succ (load_bb) + && expand_omp_atomic_fetch_op (load_bb, addr, + loaded_val, stored_val, index)) + return; + + /* If we don't have specialized __sync builtins, try and implement + as a compare and swap loop. */ + if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, + loaded_val, stored_val, index)) + return; + } + } + + /* The ultimate fallback is wrapping the operation in a mutex. */ + expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); +} + +/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending + at REGION_EXIT. */ + +static void +mark_loops_in_oacc_kernels_region (basic_block region_entry, + basic_block region_exit) +{ + struct loop *outer = region_entry->loop_father; + gcc_assert (region_exit == NULL || outer == region_exit->loop_father); + + /* Don't parallelize the kernels region if it contains more than one outer + loop. */ + unsigned int nr_outer_loops = 0; + struct loop *single_outer = NULL; + for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) + { + gcc_assert (loop_outer (loop) == outer); + + if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) + continue; + + if (region_exit != NULL + && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) + continue; + + nr_outer_loops++; + single_outer = loop; + } + if (nr_outer_loops != 1) + return; + + for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner) + if (loop->next) + return; + + /* Mark the loops in the region. */ + for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) + loop->in_oacc_kernels_region = true; +} + +/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ + +struct GTY(()) grid_launch_attributes_trees +{ + tree kernel_dim_array_type; + tree kernel_lattrs_dimnum_decl; + tree kernel_lattrs_grid_decl; + tree kernel_lattrs_group_decl; + tree kernel_launch_attributes_type; +}; + +static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; + +/* Create types used to pass kernel launch attributes to target. */ + +static void +grid_create_kernel_launch_attr_types (void) +{ + if (grid_attr_trees) + return; + grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); + + tree dim_arr_index_type + = build_index_type (build_int_cst (integer_type_node, 2)); + grid_attr_trees->kernel_dim_array_type + = build_array_type (uint32_type_node, dim_arr_index_type); + + grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); + grid_attr_trees->kernel_lattrs_dimnum_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), + uint32_type_node); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; + + grid_attr_trees->kernel_lattrs_grid_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), + grid_attr_trees->kernel_dim_array_type); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) + = grid_attr_trees->kernel_lattrs_dimnum_decl; + grid_attr_trees->kernel_lattrs_group_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), + grid_attr_trees->kernel_dim_array_type); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) + = grid_attr_trees->kernel_lattrs_grid_decl; + finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, + "__gomp_kernel_launch_attributes", + grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); +} + +/* Insert before the current statement in GSI a store of VALUE to INDEX of + array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be + of type uint32_type_node. */ + +static void +grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, + tree fld_decl, int index, tree value) +{ + tree ref = build4 (ARRAY_REF, uint32_type_node, + build3 (COMPONENT_REF, + grid_attr_trees->kernel_dim_array_type, + range_var, fld_decl, NULL_TREE), + build_int_cst (integer_type_node, index), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); +} + +/* Return a tree representation of a pointer to a structure with grid and + work-group size information. Statements filling that information will be + inserted before GSI, TGT_STMT is the target statement which has the + necessary information in it. */ + +static tree +grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, + gomp_target *tgt_stmt) +{ + grid_create_kernel_launch_attr_types (); + tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, + "__kernel_launch_attrs"); + + unsigned max_dim = 0; + for (tree clause = gimple_omp_target_clauses (tgt_stmt); + clause; + clause = OMP_CLAUSE_CHAIN (clause)) + { + if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) + continue; + + unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); + max_dim = MAX (dim, max_dim); + + grid_insert_store_range_dim (gsi, lattrs, + grid_attr_trees->kernel_lattrs_grid_decl, + dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); + grid_insert_store_range_dim (gsi, lattrs, + grid_attr_trees->kernel_lattrs_group_decl, + dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); + } + + tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, + grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); + gcc_checking_assert (max_dim <= 2); + tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); + gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), + GSI_SAME_STMT); + TREE_ADDRESSABLE (lattrs) = 1; + return build_fold_addr_expr (lattrs); +} + +/* Build target argument identifier from the DEVICE identifier, value + identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ + +static tree +get_target_argument_identifier_1 (int device, bool subseqent_param, int id) +{ + tree t = build_int_cst (integer_type_node, device); + if (subseqent_param) + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, + GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, id)); + return t; +} + +/* Like above but return it in type that can be directly stored as an element + of the argument array. */ + +static tree +get_target_argument_identifier (int device, bool subseqent_param, int id) +{ + tree t = get_target_argument_identifier_1 (device, subseqent_param, id); + return fold_convert (ptr_type_node, t); +} + +/* Return a target argument consisting of DEVICE identifier, value identifier + ID, and the actual VALUE. */ + +static tree +get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, + tree value) +{ + tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, + fold_convert (integer_type_node, value), + build_int_cst (unsigned_type_node, + GOMP_TARGET_ARG_VALUE_SHIFT)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + get_target_argument_identifier_1 (device, false, id)); + t = fold_convert (ptr_type_node, t); + return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); +} + +/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, + push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, + otherwise push an identifier (with DEVICE and ID) and the VALUE in two + arguments. */ + +static void +push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, + int id, tree value, vec <tree> *args) +{ + if (tree_fits_shwi_p (value) + && tree_to_shwi (value) > -(1 << 15) + && tree_to_shwi (value) < (1 << 15)) + args->quick_push (get_target_argument_value (gsi, device, id, value)); + else + { + args->quick_push (get_target_argument_identifier (device, true, id)); + value = fold_convert (ptr_type_node, value); + value = force_gimple_operand_gsi (gsi, value, true, NULL, true, + GSI_SAME_STMT); + args->quick_push (value); + } +} + +/* Create an array of arguments that is then passed to GOMP_target. */ + +static tree +get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) +{ + auto_vec <tree, 6> args; + tree clauses = gimple_omp_target_clauses (tgt_stmt); + tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); + if (c) + t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_NUM_TEAMS, t, &args); + + c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); + if (c) + t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_THREAD_LIMIT, t, + &args); + + /* Add HSA-specific grid sizes, if available. */ + if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + { + t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, + GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES); + args.quick_push (t); + args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); + } + + /* Produce more, perhaps device specific, arguments here. */ + + tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, + args.length () + 1), + ".omp_target_args"); + for (unsigned i = 0; i < args.length (); i++) + { + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, i), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), + GSI_SAME_STMT); + } + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, args.length ()), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), + GSI_SAME_STMT); + TREE_ADDRESSABLE (argarray) = 1; + return build_fold_addr_expr (argarray); +} + +/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ + +static void +expand_omp_target (struct omp_region *region) +{ + basic_block entry_bb, exit_bb, new_bb; + struct function *child_cfun; + tree child_fn, block, t; + gimple_stmt_iterator gsi; + gomp_target *entry_stmt; + gimple *stmt; + edge e; + bool offloaded, data_region; + + entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); + new_bb = region->entry; + + offloaded = is_gimple_omp_offloaded (entry_stmt); + switch (gimple_omp_target_kind (entry_stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_ENTER_DATA: + case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_DECLARE: + data_region = false; + break; + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + data_region = true; + break; + default: + gcc_unreachable (); + } + + child_fn = NULL_TREE; + child_cfun = NULL; + if (offloaded) + { + child_fn = gimple_omp_target_child_fn (entry_stmt); + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + } + + /* Supported by expand_omp_taskreg, but not here. */ + if (child_cfun != NULL) + gcc_checking_assert (!child_cfun->cfg); + gcc_checking_assert (!gimple_in_ssa_p (cfun)); + + entry_bb = region->entry; + exit_bb = region->exit; + + if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) + mark_loops_in_oacc_kernels_region (region->entry, region->exit); + + if (offloaded) + { + unsigned srcidx, dstidx, num; + + /* If the offloading region needs data sent from the parent + function, then the very first statement (except possible + tree profile counter updates) of the offloading body + is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since + &.OMP_DATA_O is passed as an argument to the child function, + we need to replace it with the argument as seen by the child + function. + + In most cases, this will end up being the identity assignment + .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had + a function call that has been inlined, the original PARM_DECL + .OMP_DATA_I may have been converted into a different local + variable. In which case, we need to keep the assignment. */ + tree data_arg = gimple_omp_target_data_arg (entry_stmt); + if (data_arg) + { + basic_block entry_succ_bb = single_succ (entry_bb); + gimple_stmt_iterator gsi; + tree arg; + gimple *tgtcopy_stmt = NULL; + tree sender = TREE_VEC_ELT (data_arg, 0); + + for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) + { + gcc_assert (!gsi_end_p (gsi)); + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + + if (gimple_num_ops (stmt) == 2) + { + tree arg = gimple_assign_rhs1 (stmt); + + /* We're ignoring the subcode because we're + effectively doing a STRIP_NOPS. */ + + if (TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) == sender) + { + tgtcopy_stmt = stmt; + break; + } + } + } + + gcc_assert (tgtcopy_stmt != NULL); + arg = DECL_ARGUMENTS (child_fn); + + gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); + gsi_remove (&gsi, true); + } + + /* Declare local variables needed in CHILD_CFUN. */ + block = DECL_INITIAL (child_fn); + BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); + /* The gimplifier could record temporaries in the offloading block + rather than in containing function's local_decls chain, + which would mean cgraph missed finalizing them. Do it now. */ + for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) + if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) + varpool_node::finalize_decl (t); + DECL_SAVED_TREE (child_fn) = NULL; + /* We'll create a CFG for child_fn, so no gimple body is needed. */ + gimple_set_body (child_fn, NULL); + TREE_USED (block) = 1; + + /* Reset DECL_CONTEXT on function arguments. */ + for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + /* Split ENTRY_BB at GIMPLE_*, + so that it can be moved to the child function. */ + gsi = gsi_last_bb (entry_bb); + stmt = gsi_stmt (gsi); + gcc_assert (stmt + && gimple_code (stmt) == gimple_code (entry_stmt)); + e = split_block (entry_bb, stmt); + gsi_remove (&gsi, true); + entry_bb = e->dest; + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ + if (exit_bb) + { + gsi = gsi_last_bb (exit_bb); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + } + + /* Move the offloading region into CHILD_CFUN. */ + + block = gimple_block (entry_stmt); + + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); + if (exit_bb) + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + /* When the OMP expansion process cannot guarantee an up-to-date + loop tree arrange for the child function to fixup loops. */ + if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; + + /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ + num = vec_safe_length (child_cfun->local_decls); + for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) + { + t = (*child_cfun->local_decls)[srcidx]; + if (DECL_CONTEXT (t) == cfun->decl) + continue; + if (srcidx != dstidx) + (*child_cfun->local_decls)[dstidx] = t; + dstidx++; + } + if (dstidx != num) + vec_safe_truncate (child_cfun->local_decls, dstidx); + + /* Inform the callgraph about the new function. */ + child_cfun->curr_properties = cfun->curr_properties; + child_cfun->has_simduid_loops |= cfun->has_simduid_loops; + child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; + cgraph_node *node = cgraph_node::get_create (child_fn); + node->parallelized_function = 1; + cgraph_node::add_new_function (child_fn, true); + + /* Add the new function to the offload table. */ + if (ENABLE_OFFLOADING) + vec_safe_push (offload_funcs, child_fn); + + bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) + && !DECL_ASSEMBLER_NAME_SET_P (child_fn); + + /* Fix the callgraph edges for child_cfun. Those for cfun will be + fixed in a following pass. */ + push_cfun (child_cfun); + if (need_asm) + assign_assembler_name_if_neeeded (child_fn); + cgraph_edge::rebuild_edges (); + + /* Some EH regions might become dead, see PR34608. If + pass_cleanup_cfg isn't the first pass to happen with the + new child, these dead EH edges might cause problems. + Clean them up now. */ + if (flag_exceptions) + { + basic_block bb; + bool changed = false; + + FOR_EACH_BB_FN (bb, cfun) + changed |= gimple_purge_dead_eh_edges (bb); + if (changed) + cleanup_tree_cfg (); + } + if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + verify_loop_structure (); + pop_cfun (); + + if (dump_file && !gimple_in_ssa_p (cfun)) + { + omp_any_child_fn_dumped = true; + dump_function_header (dump_file, child_fn, dump_flags); + dump_function_to_file (child_fn, dump_file, dump_flags); + } + } + + /* Emit a library call to launch the offloading region, or do data + transfers. */ + tree t1, t2, t3, t4, device, cond, depend, c, clauses; + enum built_in_function start_ix; + location_t clause_loc; + unsigned int flags_i = 0; + bool oacc_kernels_p = false; + + switch (gimple_omp_target_kind (entry_stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + start_ix = BUILT_IN_GOMP_TARGET; + break; + case GF_OMP_TARGET_KIND_DATA: + start_ix = BUILT_IN_GOMP_TARGET_DATA; + break; + case GF_OMP_TARGET_KIND_UPDATE: + start_ix = BUILT_IN_GOMP_TARGET_UPDATE; + break; + case GF_OMP_TARGET_KIND_ENTER_DATA: + start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; + break; + case GF_OMP_TARGET_KIND_EXIT_DATA: + start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; + flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; + break; + case GF_OMP_TARGET_KIND_OACC_KERNELS: + oacc_kernels_p = true; + /* FALLTHROUGH */ + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + start_ix = BUILT_IN_GOACC_PARALLEL; + break; + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + start_ix = BUILT_IN_GOACC_DATA_START; + break; + case GF_OMP_TARGET_KIND_OACC_UPDATE: + start_ix = BUILT_IN_GOACC_UPDATE; + break; + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; + break; + case GF_OMP_TARGET_KIND_OACC_DECLARE: + start_ix = BUILT_IN_GOACC_DECLARE; + break; + default: + gcc_unreachable (); + } + + clauses = gimple_omp_target_clauses (entry_stmt); + + /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime + library choose) and there is no conditional. */ + cond = NULL_TREE; + device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); + + c = omp_find_clause (clauses, OMP_CLAUSE_IF); + if (c) + cond = OMP_CLAUSE_IF_EXPR (c); + + c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); + if (c) + { + /* Even if we pass it to all library function calls, it is currently only + defined/used for the OpenMP target ones. */ + gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET + || start_ix == BUILT_IN_GOMP_TARGET_DATA + || start_ix == BUILT_IN_GOMP_TARGET_UPDATE + || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); + + device = OMP_CLAUSE_DEVICE_ID (c); + clause_loc = OMP_CLAUSE_LOCATION (c); + } + else + clause_loc = gimple_location (entry_stmt); + + c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); + if (c) + flags_i |= GOMP_TARGET_FLAG_NOWAIT; + + /* Ensure 'device' is of the correct type. */ + device = fold_convert_loc (clause_loc, integer_type_node, device); + + /* If we found the clause 'if (cond)', build + (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ + if (cond) + { + cond = gimple_boolify (cond); + + basic_block cond_bb, then_bb, else_bb; + edge e; + tree tmp_var; + + tmp_var = create_tmp_var (TREE_TYPE (device)); + if (offloaded) + e = split_block_after_labels (new_bb); + else + { + gsi = gsi_last_bb (new_bb); + gsi_prev (&gsi); + e = split_block (new_bb, gsi_stmt (gsi)); + } + cond_bb = e->src; + new_bb = e->dest; + remove_edge (e); + + then_bb = create_empty_bb (cond_bb); + else_bb = create_empty_bb (then_bb); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); + + stmt = gimple_build_cond_empty (cond); + gsi = gsi_last_bb (cond_bb); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (then_bb); + stmt = gimple_build_assign (tmp_var, device); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (else_bb); + stmt = gimple_build_assign (tmp_var, + build_int_cst (integer_type_node, + GOMP_DEVICE_HOST_FALLBACK)); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); + add_bb_to_loop (then_bb, cond_bb->loop_father); + add_bb_to_loop (else_bb, cond_bb->loop_father); + make_edge (then_bb, new_bb, EDGE_FALLTHRU); + make_edge (else_bb, new_bb, EDGE_FALLTHRU); + + device = tmp_var; + gsi = gsi_last_bb (new_bb); + } + else + { + gsi = gsi_last_bb (new_bb); + device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, + true, GSI_SAME_STMT); + } + + t = gimple_omp_target_data_arg (entry_stmt); + if (t == NULL) + { + t1 = size_zero_node; + t2 = build_zero_cst (ptr_type_node); + t3 = t2; + t4 = t2; + } + else + { + t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); + t1 = size_binop (PLUS_EXPR, t1, size_int (1)); + t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); + t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); + t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); + } + + gimple *g; + bool tagging = false; + /* The maximum number used by any start_ix, without varargs. */ + auto_vec<tree, 11> args; + args.quick_push (device); + if (offloaded) + args.quick_push (build_fold_addr_expr (child_fn)); + args.quick_push (t1); + args.quick_push (t2); + args.quick_push (t3); + args.quick_push (t4); + switch (start_ix) + { + case BUILT_IN_GOACC_DATA_START: + case BUILT_IN_GOACC_DECLARE: + case BUILT_IN_GOMP_TARGET_DATA: + break; + case BUILT_IN_GOMP_TARGET: + case BUILT_IN_GOMP_TARGET_UPDATE: + case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: + args.quick_push (build_int_cst (unsigned_type_node, flags_i)); + c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); + if (c) + depend = OMP_CLAUSE_DECL (c); + else + depend = build_int_cst (ptr_type_node, 0); + args.quick_push (depend); + if (start_ix == BUILT_IN_GOMP_TARGET) + args.quick_push (get_target_arguments (&gsi, entry_stmt)); + break; + case BUILT_IN_GOACC_PARALLEL: + { + oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args); + tagging = true; + } + /* FALLTHRU */ + case BUILT_IN_GOACC_ENTER_EXIT_DATA: + case BUILT_IN_GOACC_UPDATE: + { + tree t_async = NULL_TREE; + + /* If present, use the value specified by the respective + clause, making sure that is of the correct type. */ + c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); + if (c) + t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_ASYNC_EXPR (c)); + else if (!tagging) + /* Default values for t_async. */ + t_async = fold_convert_loc (gimple_location (entry_stmt), + integer_type_node, + build_int_cst (integer_type_node, + GOMP_ASYNC_SYNC)); + if (tagging && t_async) + { + unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; + + if (TREE_CODE (t_async) == INTEGER_CST) + { + /* See if we can pack the async arg in to the tag's + operand. */ + i_async = TREE_INT_CST_LOW (t_async); + if (i_async < GOMP_LAUNCH_OP_MAX) + t_async = NULL_TREE; + else + i_async = GOMP_LAUNCH_OP_MAX; + } + args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, + i_async)); + } + if (t_async) + args.safe_push (t_async); + + /* Save the argument index, and ... */ + unsigned t_wait_idx = args.length (); + unsigned num_waits = 0; + c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); + if (!tagging || c) + /* ... push a placeholder. */ + args.safe_push (integer_zero_node); + + for (; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) + { + args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_WAIT_EXPR (c))); + num_waits++; + } + + if (!tagging || num_waits) + { + tree len; + + /* Now that we know the number, update the placeholder. */ + if (tagging) + len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); + else + len = build_int_cst (integer_type_node, num_waits); + len = fold_convert_loc (gimple_location (entry_stmt), + unsigned_type_node, len); + args[t_wait_idx] = len; + } + } + break; + default: + gcc_unreachable (); + } + if (tagging) + /* Push terminal marker - zero. */ + args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); + + g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); + gimple_set_location (g, gimple_location (entry_stmt)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + if (!offloaded) + { + g = gsi_stmt (gsi); + gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); + gsi_remove (&gsi, true); + } + if (data_region && region->exit) + { + gsi = gsi_last_bb (region->exit); + g = gsi_stmt (gsi); + gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); + gsi_remove (&gsi, true); + } +} + +/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with + iteration variable derived from the thread number. INTRA_GROUP means this + is an expansion of a loop iterating over work-items within a separate + iteration over groups. */ + +static void +grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) +{ + gimple_stmt_iterator gsi; + gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); + gcc_checking_assert (gimple_omp_for_kind (for_stmt) + == GF_OMP_FOR_KIND_GRID_LOOP); + size_t collapse = gimple_omp_for_collapse (for_stmt); + struct omp_for_data_loop *loops + = XALLOCAVEC (struct omp_for_data_loop, + gimple_omp_for_collapse (for_stmt)); + struct omp_for_data fd; + + remove_edge (BRANCH_EDGE (kfor->entry)); + basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; + + gcc_assert (kfor->cont); + omp_extract_for_data (for_stmt, &fd, loops); + + gsi = gsi_start_bb (body_bb); + + for (size_t dim = 0; dim < collapse; dim++) + { + tree type, itype; + itype = type = TREE_TYPE (fd.loops[dim].v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + tree n1 = fd.loops[dim].n1; + tree step = fd.loops[dim].step; + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + tree threadid; + if (gimple_omp_for_grid_group_iter (for_stmt)) + { + gcc_checking_assert (!intra_group); + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_HSA_WORKGROUPID), 1, + build_int_cstu (unsigned_type_node, dim)); + } + else if (intra_group) + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_HSA_WORKITEMID), 1, + build_int_cstu (unsigned_type_node, dim)); + else + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_HSA_WORKITEMABSID), 1, + build_int_cstu (unsigned_type_node, dim)); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + tree startvar = fd.loops[dim].v; + tree t = fold_build2 (MULT_EXPR, itype, threadid, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (type, t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, true, GSI_SAME_STMT); + gassign *assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + } + /* Remove the omp for statement */ + gsi = gsi_last_bb (kfor->entry); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi = gsi_last_bb (kfor->cont); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); + gsi_remove (&gsi, true); + + /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ + gsi = gsi_last_bb (kfor->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + if (intra_group) + gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Fixup the much simpler CFG. */ + remove_edge (find_edge (kfor->cont, body_bb)); + + if (kfor->cont != body_bb) + set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); + set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); +} + +/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap + argument_decls. */ + +struct grid_arg_decl_map +{ + tree old_arg; + tree new_arg; +}; + +/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones + pertaining to kernel function. */ + +static tree +grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; + tree t = *tp; + + if (t == adm->old_arg) + *tp = adm->new_arg; + *walk_subtrees = !TYPE_P (t) && !DECL_P (t); + return NULL_TREE; +} + +/* If TARGET region contains a kernel body for loop, remove its region from the + TARGET and expand it in HSA gridified kernel fashion. */ + +static void +grid_expand_target_grid_body (struct omp_region *target) +{ + if (!hsa_gen_requested_p ()) + return; + + gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); + struct omp_region **pp; + + for (pp = &target->inner; *pp; pp = &(*pp)->next) + if ((*pp)->type == GIMPLE_OMP_GRID_BODY) + break; + + struct omp_region *gpukernel = *pp; + + tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); + if (!gpukernel) + { + /* HSA cannot handle OACC stuff. */ + if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) + return; + gcc_checking_assert (orig_child_fndecl); + gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)); + cgraph_node *n = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (n); + return; + } + + gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)); + tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry))); + *pp = gpukernel->next; + for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) + if ((*pp)->type == GIMPLE_OMP_FOR) + break; + + struct omp_region *kfor = *pp; + gcc_assert (kfor); + gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); + gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); + *pp = kfor->next; + if (kfor->inner) + { + if (gimple_omp_for_grid_group_iter (for_stmt)) + { + struct omp_region **next_pp; + for (pp = &kfor->inner; *pp; pp = next_pp) + { + next_pp = &(*pp)->next; + if ((*pp)->type != GIMPLE_OMP_FOR) + continue; + gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); + gcc_assert (gimple_omp_for_kind (inner) + == GF_OMP_FOR_KIND_GRID_LOOP); + grid_expand_omp_for_loop (*pp, true); + *pp = (*pp)->next; + next_pp = pp; + } + } + expand_omp (kfor->inner); + } + if (gpukernel->inner) + expand_omp (gpukernel->inner); + + tree kern_fndecl = copy_node (orig_child_fndecl); + DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); + SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); + tree tgtblock = gimple_block (tgt_stmt); + tree fniniblock = make_node (BLOCK); + BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; + BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); + BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); + BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; + DECL_INITIAL (kern_fndecl) = fniniblock; + push_struct_function (kern_fndecl); + cfun->function_end_locus = gimple_location (tgt_stmt); + init_tree_ssa (cfun); + pop_cfun (); + + tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); + gcc_assert (!DECL_CHAIN (old_parm_decl)); + tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); + DECL_CONTEXT (new_parm_decl) = kern_fndecl; + DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; + gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); + DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); + DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; + struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); + kern_cfun->curr_properties = cfun->curr_properties; + + grid_expand_omp_for_loop (kfor, false); + + /* Remove the omp for statement */ + gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); + gsi_remove (&gsi, true); + /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real + return. */ + gsi = gsi_last_bb (gpukernel->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gimple *ret_stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Statements in the first BB in the target construct have been produced by + target lowering and must be copied inside the GPUKERNEL, with the two + exceptions of the first OMP statement and the OMP_DATA assignment + statement. */ + gsi = gsi_start_bb (single_succ (gpukernel->entry)); + tree data_arg = gimple_omp_target_data_arg (tgt_stmt); + tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; + for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); + !gsi_end_p (tsi); gsi_next (&tsi)) + { + gimple *stmt = gsi_stmt (tsi); + if (is_gimple_omp (stmt)) + break; + if (sender + && is_gimple_assign (stmt) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR + && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) + continue; + gimple *copy = gimple_copy (stmt); + gsi_insert_before (&gsi, copy, GSI_SAME_STMT); + gimple_set_block (copy, fniniblock); + } + + move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), + gpukernel->exit, inside_block); + + cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); + kcn->mark_force_output (); + cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (kcn, orig_child); + + cgraph_node::add_new_function (kern_fndecl, true); + push_cfun (kern_cfun); + cgraph_edge::rebuild_edges (); + + /* Re-map any mention of the PARM_DECL of the original function to the + PARM_DECL of the new one. + + TODO: It would be great if lowering produced references into the GPU + kernel decl straight away and we did not have to do this. */ + struct grid_arg_decl_map adm; + adm.old_arg = old_parm_decl; + adm.new_arg = new_parm_decl; + basic_block bb; + FOR_EACH_BB_FN (bb, kern_cfun) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = &adm; + walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); + } + } + pop_cfun (); + + return; +} + +/* Expand the parallel region tree rooted at REGION. Expansion + proceeds in depth-first order. Innermost regions are expanded + first. This way, parallel regions that require a new function to + be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any + internal dependencies in their body. */ + +static void +expand_omp (struct omp_region *region) +{ + omp_any_child_fn_dumped = false; + while (region) + { + location_t saved_location; + gimple *inner_stmt = NULL; + + /* First, determine whether this is a combined parallel+workshare + region. */ + if (region->type == GIMPLE_OMP_PARALLEL) + determine_parallel_type (region); + else if (region->type == GIMPLE_OMP_TARGET) + grid_expand_target_grid_body (region); + + if (region->type == GIMPLE_OMP_FOR + && gimple_omp_for_combined_p (last_stmt (region->entry))) + inner_stmt = last_stmt (region->inner->entry); + + if (region->inner) + expand_omp (region->inner); + + saved_location = input_location; + if (gimple_has_location (last_stmt (region->entry))) + input_location = gimple_location (last_stmt (region->entry)); + + switch (region->type) + { + case GIMPLE_OMP_PARALLEL: + case GIMPLE_OMP_TASK: + expand_omp_taskreg (region); + break; + + case GIMPLE_OMP_FOR: + expand_omp_for (region, inner_stmt); + break; + + case GIMPLE_OMP_SECTIONS: + expand_omp_sections (region); + break; + + case GIMPLE_OMP_SECTION: + /* Individual omp sections are handled together with their + parent GIMPLE_OMP_SECTIONS region. */ + break; + + case GIMPLE_OMP_SINGLE: + expand_omp_single (region); + break; + + case GIMPLE_OMP_ORDERED: + { + gomp_ordered *ord_stmt + = as_a <gomp_ordered *> (last_stmt (region->entry)); + if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), + OMP_CLAUSE_DEPEND)) + { + /* We'll expand these when expanding corresponding + worksharing region with ordered(n) clause. */ + gcc_assert (region->outer + && region->outer->type == GIMPLE_OMP_FOR); + region->ord_stmt = ord_stmt; + break; + } + } + /* FALLTHRU */ + case GIMPLE_OMP_MASTER: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_TEAMS: + expand_omp_synch (region); + break; + + case GIMPLE_OMP_ATOMIC_LOAD: + expand_omp_atomic (region); + break; + + case GIMPLE_OMP_TARGET: + expand_omp_target (region); + break; + + default: + gcc_unreachable (); + } + + input_location = saved_location; + region = region->next; + } + if (omp_any_child_fn_dumped) + { + if (dump_file) + dump_function_header (dump_file, current_function_decl, dump_flags); + omp_any_child_fn_dumped = false; + } +} + +/* Helper for build_omp_regions. Scan the dominator tree starting at + block BB. PARENT is the region that contains BB. If SINGLE_TREE is + true, the function ends once a single tree is built (otherwise, whole + forest of OMP constructs may be built). */ + +static void +build_omp_regions_1 (basic_block bb, struct omp_region *parent, + bool single_tree) +{ + gimple_stmt_iterator gsi; + gimple *stmt; + basic_block son; + + gsi = gsi_last_bb (bb); + if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) + { + struct omp_region *region; + enum gimple_code code; + + stmt = gsi_stmt (gsi); + code = gimple_code (stmt); + if (code == GIMPLE_OMP_RETURN) + { + /* STMT is the return point out of region PARENT. Mark it + as the exit point and make PARENT the immediately + enclosing region. */ + gcc_assert (parent); + region = parent; + region->exit = bb; + parent = parent->outer; + } + else if (code == GIMPLE_OMP_ATOMIC_STORE) + { + /* GIMPLE_OMP_ATOMIC_STORE is analoguous to + GIMPLE_OMP_RETURN, but matches with + GIMPLE_OMP_ATOMIC_LOAD. */ + gcc_assert (parent); + gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); + region = parent; + region->exit = bb; + parent = parent->outer; + } + else if (code == GIMPLE_OMP_CONTINUE) + { + gcc_assert (parent); + parent->cont = bb; + } + else if (code == GIMPLE_OMP_SECTIONS_SWITCH) + { + /* GIMPLE_OMP_SECTIONS_SWITCH is part of + GIMPLE_OMP_SECTIONS, and we do nothing for it. */ + } + else + { + region = new_omp_region (bb, code, parent); + /* Otherwise... */ + if (code == GIMPLE_OMP_TARGET) + { + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + break; + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_ENTER_DATA: + case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_DECLARE: + /* ..., other than for those stand-alone directives... */ + region = NULL; + break; + default: + gcc_unreachable (); + } + } + else if (code == GIMPLE_OMP_ORDERED + && omp_find_clause (gimple_omp_ordered_clauses + (as_a <gomp_ordered *> (stmt)), + OMP_CLAUSE_DEPEND)) + /* #pragma omp ordered depend is also just a stand-alone + directive. */ + region = NULL; + /* ..., this directive becomes the parent for a new region. */ + if (region) + parent = region; + } + } + + if (single_tree && !parent) + return; + + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + build_omp_regions_1 (son, parent, single_tree); +} + +/* Builds the tree of OMP regions rooted at ROOT, storing it to + root_omp_region. */ + +static void +build_omp_regions_root (basic_block root) +{ + gcc_assert (root_omp_region == NULL); + build_omp_regions_1 (root, NULL, true); + gcc_assert (root_omp_region != NULL); +} + +/* Expands omp construct (and its subconstructs) starting in HEAD. */ + +void +omp_expand_local (basic_block head) +{ + build_omp_regions_root (head); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + expand_omp (root_omp_region); + + omp_free_regions (); +} + +/* Scan the CFG and build a tree of OMP regions. Return the root of + the OMP region tree. */ + +static void +build_omp_regions (void) +{ + gcc_assert (root_omp_region == NULL); + calculate_dominance_info (CDI_DOMINATORS); + build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); +} + +/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ + +static unsigned int +execute_expand_omp (void) +{ + build_omp_regions (); + + if (!root_omp_region) + return 0; + + if (dump_file) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + + expand_omp (root_omp_region); + + if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + verify_loop_structure (); + cleanup_tree_cfg (); + + omp_free_regions (); + + return 0; +} + +/* OMP expansion -- the default pass, run before creation of SSA form. */ + +namespace { + +const pass_data pass_data_expand_omp = +{ + GIMPLE_PASS, /* type */ + "ompexp", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + PROP_gimple_eomp, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_expand_omp : public gimple_opt_pass +{ +public: + pass_expand_omp (gcc::context *ctxt) + : gimple_opt_pass (pass_data_expand_omp, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { + bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 + || flag_openmp_simd != 0) + && !seen_error ()); + + /* This pass always runs, to provide PROP_gimple_eomp. + But often, there is nothing to do. */ + if (!gate) + return 0; + + return execute_expand_omp (); + } + +}; // class pass_expand_omp + +} // anon namespace + +gimple_opt_pass * +make_pass_expand_omp (gcc::context *ctxt) +{ + return new pass_expand_omp (ctxt); +} + +namespace { + +const pass_data pass_data_expand_omp_ssa = +{ + GIMPLE_PASS, /* type */ + "ompexpssa", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + PROP_gimple_eomp, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ +}; + +class pass_expand_omp_ssa : public gimple_opt_pass +{ +public: + pass_expand_omp_ssa (gcc::context *ctxt) + : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fun) + { + return !(fun->curr_properties & PROP_gimple_eomp); + } + virtual unsigned int execute (function *) { return execute_expand_omp (); } + opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } + +}; // class pass_expand_omp_ssa + +} // anon namespace + +gimple_opt_pass * +make_pass_expand_omp_ssa (gcc::context *ctxt) +{ + return new pass_expand_omp_ssa (ctxt); +} + +/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant + GIMPLE_* codes. */ + +bool +omp_make_gimple_edges (basic_block bb, struct omp_region **region, + int *region_idx) +{ + gimple *last = last_stmt (bb); + enum gimple_code code = gimple_code (last); + struct omp_region *cur_region = *region; + bool fallthru = false; + + switch (code) + { + case GIMPLE_OMP_PARALLEL: + case GIMPLE_OMP_TASK: + case GIMPLE_OMP_FOR: + case GIMPLE_OMP_SINGLE: + case GIMPLE_OMP_TEAMS: + case GIMPLE_OMP_MASTER: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_GRID_BODY: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + break; + + case GIMPLE_OMP_ORDERED: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + if (omp_find_clause (gimple_omp_ordered_clauses + (as_a <gomp_ordered *> (last)), + OMP_CLAUSE_DEPEND)) + cur_region = cur_region->outer; + break; + + case GIMPLE_OMP_TARGET: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + switch (gimple_omp_target_kind (last)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + break; + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_ENTER_DATA: + case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_DECLARE: + cur_region = cur_region->outer; + break; + default: + gcc_unreachable (); + } + break; + + case GIMPLE_OMP_SECTIONS: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + break; + + case GIMPLE_OMP_SECTIONS_SWITCH: + fallthru = false; + break; + + case GIMPLE_OMP_ATOMIC_LOAD: + case GIMPLE_OMP_ATOMIC_STORE: + fallthru = true; + break; + + case GIMPLE_OMP_RETURN: + /* In the case of a GIMPLE_OMP_SECTION, the edge will go + somewhere other than the next block. This will be + created later. */ + cur_region->exit = bb; + if (cur_region->type == GIMPLE_OMP_TASK) + /* Add an edge corresponding to not scheduling the task + immediately. */ + make_edge (cur_region->entry, bb, EDGE_ABNORMAL); + fallthru = cur_region->type != GIMPLE_OMP_SECTION; + cur_region = cur_region->outer; + break; + + case GIMPLE_OMP_CONTINUE: + cur_region->cont = bb; + switch (cur_region->type) + { + case GIMPLE_OMP_FOR: + /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE + succs edges as abnormal to prevent splitting + them. */ + single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; + /* Make the loopback edge. */ + make_edge (bb, single_succ (cur_region->entry), + EDGE_ABNORMAL); + + /* Create an edge from GIMPLE_OMP_FOR to exit, which + corresponds to the case that the body of the loop + is not executed at all. */ + make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); + make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); + fallthru = false; + break; + + case GIMPLE_OMP_SECTIONS: + /* Wire up the edges into and out of the nested sections. */ + { + basic_block switch_bb = single_succ (cur_region->entry); + + struct omp_region *i; + for (i = cur_region->inner; i ; i = i->next) + { + gcc_assert (i->type == GIMPLE_OMP_SECTION); + make_edge (switch_bb, i->entry, 0); + make_edge (i->exit, bb, EDGE_FALLTHRU); + } + + /* Make the loopback edge to the block with + GIMPLE_OMP_SECTIONS_SWITCH. */ + make_edge (bb, switch_bb, 0); + + /* Make the edge from the switch to exit. */ + make_edge (switch_bb, bb->next_bb, 0); + fallthru = false; + } + break; + + case GIMPLE_OMP_TASK: + fallthru = true; + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + if (*region != cur_region) + { + *region = cur_region; + if (cur_region) + *region_idx = cur_region->entry->index; + else + *region_idx = 0; + } + + return fallthru; +} + +#include "gt-omp-expand.h" diff --git a/gcc/omp-expand.h b/gcc/omp-expand.h new file mode 100644 index 00000000000..a81b9c7253d --- /dev/null +++ b/gcc/omp-expand.h @@ -0,0 +1,32 @@ +/* Expansion pass for OMP directives. Outlines regions of certain OMP + directives to separate functions, converts others into explicit calls to the + runtime library (libgomp) and so forth + +Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_OMP_EXPAND_H +#define GCC_OMP_EXPAND_H + +struct omp_region; +extern void omp_expand_local (basic_block head); +extern void omp_free_regions (void); +extern bool omp_make_gimple_edges (basic_block bb, struct omp_region **region, + int *region_idx); + +#endif /* GCC_OMP_EXPAND_H */ diff --git a/gcc/omp-general.c b/gcc/omp-general.c new file mode 100644 index 00000000000..0cad8a51fc1 --- /dev/null +++ b/gcc/omp-general.c @@ -0,0 +1,650 @@ +/* General types and functions that are uselful for processing of OpenMP, + OpenACC and similar directivers at various stages of compilation. + + Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Find an OMP clause of type KIND within CLAUSES. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "tree.h" +#include "gimple.h" +#include "ssa.h" +#include "diagnostic-core.h" +#include "fold-const.h" +#include "langhooks.h" +#include "omp-general.h" + + +tree +omp_find_clause (tree clauses, enum omp_clause_code kind) +{ + for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) + if (OMP_CLAUSE_CODE (clauses) == kind) + return clauses; + + return NULL_TREE; +} + +/* Return true if DECL is a reference type. */ + +bool +omp_is_reference (tree decl) +{ + return lang_hooks.decls.omp_privatize_by_reference (decl); +} + +/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or + GT_EXPR. */ + +void +omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2) +{ + switch (*cond_code) + { + case LT_EXPR: + case GT_EXPR: + case NE_EXPR: + break; + case LE_EXPR: + if (POINTER_TYPE_P (TREE_TYPE (*n2))) + *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1); + else + *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, + build_int_cst (TREE_TYPE (*n2), 1)); + *cond_code = LT_EXPR; + break; + case GE_EXPR: + if (POINTER_TYPE_P (TREE_TYPE (*n2))) + *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1); + else + *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, + build_int_cst (TREE_TYPE (*n2), 1)); + *cond_code = GT_EXPR; + break; + default: + gcc_unreachable (); + } +} + +/* Return the looping step from INCR, extracted from the step of a gimple omp + for statement. */ + +tree +omp_get_for_step_from_incr (location_t loc, tree incr) +{ + tree step; + switch (TREE_CODE (incr)) + { + case PLUS_EXPR: + step = TREE_OPERAND (incr, 1); + break; + case POINTER_PLUS_EXPR: + step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); + break; + case MINUS_EXPR: + step = TREE_OPERAND (incr, 1); + step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); + break; + default: + gcc_unreachable (); + } + return step; +} + +/* Extract the header elements of parallel loop FOR_STMT and store + them into *FD. */ + +void +omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, + struct omp_for_data_loop *loops) +{ + tree t, var, *collapse_iter, *collapse_count; + tree count = NULL_TREE, iter_type = long_integer_type_node; + struct omp_for_data_loop *loop; + int i; + struct omp_for_data_loop dummy_loop; + location_t loc = gimple_location (for_stmt); + bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD; + bool distribute = gimple_omp_for_kind (for_stmt) + == GF_OMP_FOR_KIND_DISTRIBUTE; + bool taskloop = gimple_omp_for_kind (for_stmt) + == GF_OMP_FOR_KIND_TASKLOOP; + tree iterv, countv; + + fd->for_stmt = for_stmt; + fd->pre = NULL; + if (gimple_omp_for_collapse (for_stmt) > 1) + fd->loops = loops; + else + fd->loops = &fd->loop; + + fd->have_nowait = distribute || simd; + fd->have_ordered = false; + fd->collapse = 1; + fd->ordered = 0; + fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; + fd->sched_modifiers = 0; + fd->chunk_size = NULL_TREE; + fd->simd_schedule = false; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR) + fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR; + collapse_iter = NULL; + collapse_count = NULL; + + for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) + switch (OMP_CLAUSE_CODE (t)) + { + case OMP_CLAUSE_NOWAIT: + fd->have_nowait = true; + break; + case OMP_CLAUSE_ORDERED: + fd->have_ordered = true; + if (OMP_CLAUSE_ORDERED_EXPR (t)) + fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t)); + break; + case OMP_CLAUSE_SCHEDULE: + gcc_assert (!distribute && !taskloop); + fd->sched_kind + = (enum omp_clause_schedule_kind) + (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK); + fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t) + & ~OMP_CLAUSE_SCHEDULE_MASK); + fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); + fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); + break; + case OMP_CLAUSE_DIST_SCHEDULE: + gcc_assert (distribute); + fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t); + break; + case OMP_CLAUSE_COLLAPSE: + fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t)); + if (fd->collapse > 1) + { + collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); + collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); + } + break; + default: + break; + } + if (fd->ordered && fd->collapse == 1 && loops != NULL) + { + fd->loops = loops; + iterv = NULL_TREE; + countv = NULL_TREE; + collapse_iter = &iterv; + collapse_count = &countv; + } + + /* FIXME: for now map schedule(auto) to schedule(static). + There should be analysis to determine whether all iterations + are approximately the same amount of work (then schedule(static) + is best) or if it varies (then schedule(dynamic,N) is better). */ + if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) + { + fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; + gcc_assert (fd->chunk_size == NULL); + } + gcc_assert (fd->collapse == 1 || collapse_iter != NULL); + if (taskloop) + fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME; + if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) + gcc_assert (fd->chunk_size == NULL); + else if (fd->chunk_size == NULL) + { + /* We only need to compute a default chunk size for ordered + static loops and dynamic loops. */ + if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC + || fd->have_ordered) + fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) + ? integer_zero_node : integer_one_node; + } + + int cnt = fd->ordered ? fd->ordered : fd->collapse; + for (i = 0; i < cnt; i++) + { + if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL)) + loop = &fd->loop; + else if (loops != NULL) + loop = loops + i; + else + loop = &dummy_loop; + + loop->v = gimple_omp_for_index (for_stmt, i); + gcc_assert (SSA_VAR_P (loop->v)); + gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE + || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); + var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; + loop->n1 = gimple_omp_for_initial (for_stmt, i); + + loop->cond_code = gimple_omp_for_cond (for_stmt, i); + loop->n2 = gimple_omp_for_final (for_stmt, i); + gcc_assert (loop->cond_code != NE_EXPR + || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD + || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR); + omp_adjust_for_condition (loc, &loop->cond_code, &loop->n2); + + t = gimple_omp_for_incr (for_stmt, i); + gcc_assert (TREE_OPERAND (t, 0) == var); + loop->step = omp_get_for_step_from_incr (loc, t); + + if (simd + || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC + && !fd->have_ordered)) + { + if (fd->collapse == 1) + iter_type = TREE_TYPE (loop->v); + else if (i == 0 + || TYPE_PRECISION (iter_type) + < TYPE_PRECISION (TREE_TYPE (loop->v))) + iter_type + = build_nonstandard_integer_type + (TYPE_PRECISION (TREE_TYPE (loop->v)), 1); + } + else if (iter_type != long_long_unsigned_type_node) + { + if (POINTER_TYPE_P (TREE_TYPE (loop->v))) + iter_type = long_long_unsigned_type_node; + else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) + && TYPE_PRECISION (TREE_TYPE (loop->v)) + >= TYPE_PRECISION (iter_type)) + { + tree n; + + if (loop->cond_code == LT_EXPR) + n = fold_build2_loc (loc, + PLUS_EXPR, TREE_TYPE (loop->v), + loop->n2, loop->step); + else + n = loop->n1; + if (TREE_CODE (n) != INTEGER_CST + || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) + iter_type = long_long_unsigned_type_node; + } + else if (TYPE_PRECISION (TREE_TYPE (loop->v)) + > TYPE_PRECISION (iter_type)) + { + tree n1, n2; + + if (loop->cond_code == LT_EXPR) + { + n1 = loop->n1; + n2 = fold_build2_loc (loc, + PLUS_EXPR, TREE_TYPE (loop->v), + loop->n2, loop->step); + } + else + { + n1 = fold_build2_loc (loc, + MINUS_EXPR, TREE_TYPE (loop->v), + loop->n2, loop->step); + n2 = loop->n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) + || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) + iter_type = long_long_unsigned_type_node; + } + } + + if (i >= fd->collapse) + continue; + + if (collapse_count && *collapse_count == NULL) + { + t = fold_binary (loop->cond_code, boolean_type_node, + fold_convert (TREE_TYPE (loop->v), loop->n1), + fold_convert (TREE_TYPE (loop->v), loop->n2)); + if (t && integer_zerop (t)) + count = build_zero_cst (long_long_unsigned_type_node); + else if ((i == 0 || count != NULL_TREE) + && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE + && TREE_CONSTANT (loop->n1) + && TREE_CONSTANT (loop->n2) + && TREE_CODE (loop->step) == INTEGER_CST) + { + tree itype = TREE_TYPE (loop->v); + + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2_loc (loc, + PLUS_EXPR, itype, + fold_convert_loc (loc, itype, loop->step), t); + t = fold_build2_loc (loc, PLUS_EXPR, itype, t, + fold_convert_loc (loc, itype, loop->n2)); + t = fold_build2_loc (loc, MINUS_EXPR, itype, t, + fold_convert_loc (loc, itype, loop->n1)); + if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) + t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, + fold_build1_loc (loc, NEGATE_EXPR, itype, t), + fold_build1_loc (loc, NEGATE_EXPR, itype, + fold_convert_loc (loc, itype, + loop->step))); + else + t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t, + fold_convert_loc (loc, itype, loop->step)); + t = fold_convert_loc (loc, long_long_unsigned_type_node, t); + if (count != NULL_TREE) + count = fold_build2_loc (loc, + MULT_EXPR, long_long_unsigned_type_node, + count, t); + else + count = t; + if (TREE_CODE (count) != INTEGER_CST) + count = NULL_TREE; + } + else if (count && !integer_zerop (count)) + count = NULL_TREE; + } + } + + if (count + && !simd + && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC + || fd->have_ordered)) + { + if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node))) + iter_type = long_long_unsigned_type_node; + else + iter_type = long_integer_type_node; + } + else if (collapse_iter && *collapse_iter != NULL) + iter_type = TREE_TYPE (*collapse_iter); + fd->iter_type = iter_type; + if (collapse_iter && *collapse_iter == NULL) + *collapse_iter = create_tmp_var (iter_type, ".iter"); + if (collapse_count && *collapse_count == NULL) + { + if (count) + *collapse_count = fold_convert_loc (loc, iter_type, count); + else + *collapse_count = create_tmp_var (iter_type, ".count"); + } + + if (fd->collapse > 1 || (fd->ordered && loops)) + { + fd->loop.v = *collapse_iter; + fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); + fd->loop.n2 = *collapse_count; + fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); + fd->loop.cond_code = LT_EXPR; + } + else if (loops) + loops[0] = fd->loop; +} + +/* Build a call to GOMP_barrier. */ + +gimple * +omp_build_barrier (tree lhs) +{ + tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL + : BUILT_IN_GOMP_BARRIER); + gcall *g = gimple_build_call (fndecl, 0); + if (lhs) + gimple_call_set_lhs (g, lhs); + return g; +} + +/* Return maximum possible vectorization factor for the target. */ + +int +omp_max_vf (void) +{ + if (!optimize + || optimize_debug + || !flag_tree_loop_optimize + || (!flag_tree_loop_vectorize + && (global_options_set.x_flag_tree_loop_vectorize + || global_options_set.x_flag_tree_vectorize))) + return 1; + + int vf = 1; + int vs = targetm.vectorize.autovectorize_vector_sizes (); + if (vs) + vf = 1 << floor_log2 (vs); + else + { + machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); + if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) + vf = GET_MODE_NUNITS (vqimode); + } + return vf; +} + +/* Return maximum SIMT width if offloading may target SIMT hardware. */ + +int +omp_max_simt_vf (void) +{ + if (!optimize) + return 0; + if (ENABLE_OFFLOADING) + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; ) + { + if (!strncmp (c, "nvptx", strlen ("nvptx"))) + return 32; + else if ((c = strchr (c, ','))) + c++; + } + return 0; +} + +/* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK + macro on gomp-constants.h. We do not check for overflow. */ + +tree +oacc_launch_pack (unsigned code, tree device, unsigned op) +{ + tree res; + + res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op)); + if (device) + { + device = fold_build2 (LSHIFT_EXPR, unsigned_type_node, + device, build_int_cst (unsigned_type_node, + GOMP_LAUNCH_DEVICE_SHIFT)); + res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device); + } + return res; +} + +/* FIXME: What is the following comment for? */ +/* Look for compute grid dimension clauses and convert to an attribute + attached to FN. This permits the target-side code to (a) massage + the dimensions, (b) emit that data and (c) optimize. Non-constant + dimensions are pushed onto ARGS. + + The attribute value is a TREE_LIST. A set of dimensions is + represented as a list of INTEGER_CST. Those that are runtime + exprs are represented as an INTEGER_CST of zero. + + TOOO. Normally the attribute will just contain a single such list. If + however it contains a list of lists, this will represent the use of + device_type. Each member of the outer list is an assoc list of + dimensions, keyed by the device type. The first entry will be the + default. Well, that's the plan. */ + +/* Replace any existing oacc fn attribute with updated dimensions. */ + +void +oacc_replace_fn_attrib (tree fn, tree dims) +{ + tree ident = get_identifier (OACC_FN_ATTRIB); + tree attribs = DECL_ATTRIBUTES (fn); + + /* If we happen to be present as the first attrib, drop it. */ + if (attribs && TREE_PURPOSE (attribs) == ident) + attribs = TREE_CHAIN (attribs); + DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs); +} + +/* Scan CLAUSES for launch dimensions and attach them to the oacc + function attribute. Push any that are non-constant onto the ARGS + list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is + true, if these are for a kernels region offload function. */ + +void +oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args) +{ + /* Must match GOMP_DIM ordering. */ + static const omp_clause_code ids[] + = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, + OMP_CLAUSE_VECTOR_LENGTH }; + unsigned ix; + tree dims[GOMP_DIM_MAX]; + + tree attr = NULL_TREE; + unsigned non_const = 0; + + for (ix = GOMP_DIM_MAX; ix--;) + { + tree clause = omp_find_clause (clauses, ids[ix]); + tree dim = NULL_TREE; + + if (clause) + dim = OMP_CLAUSE_EXPR (clause, ids[ix]); + dims[ix] = dim; + if (dim && TREE_CODE (dim) != INTEGER_CST) + { + dim = integer_zero_node; + non_const |= GOMP_DIM_MASK (ix); + } + attr = tree_cons (NULL_TREE, dim, attr); + /* Note kernelness with TREE_PUBLIC. */ + if (is_kernel) + TREE_PUBLIC (attr) = 1; + } + + oacc_replace_fn_attrib (fn, attr); + + if (non_const) + { + /* Push a dynamic argument set. */ + args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM, + NULL_TREE, non_const)); + for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) + if (non_const & GOMP_DIM_MASK (ix)) + args->safe_push (dims[ix]); + } +} + +/* Process the routine's dimension clauess to generate an attribute + value. Issue diagnostics as appropriate. We default to SEQ + (OpenACC 2.5 clarifies this). All dimensions have a size of zero + (dynamic). TREE_PURPOSE is set to indicate whether that dimension + can have a loop partitioned on it. non-zero indicates + yes, zero indicates no. By construction once a non-zero has been + reached, further inner dimensions must also be non-zero. We set + TREE_VALUE to zero for the dimensions that may be partitioned and + 1 for the other ones -- if a loop is (erroneously) spawned at + an outer level, we don't want to try and partition it. */ + +tree +oacc_build_routine_dims (tree clauses) +{ + /* Must match GOMP_DIM ordering. */ + static const omp_clause_code ids[] = + {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ}; + int ix; + int level = -1; + + for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses)) + for (ix = GOMP_DIM_MAX + 1; ix--;) + if (OMP_CLAUSE_CODE (clauses) == ids[ix]) + { + if (level >= 0) + error_at (OMP_CLAUSE_LOCATION (clauses), + "multiple loop axes specified for routine"); + level = ix; + break; + } + + /* Default to SEQ. */ + if (level < 0) + level = GOMP_DIM_MAX; + + tree dims = NULL_TREE; + + for (ix = GOMP_DIM_MAX; ix--;) + dims = tree_cons (build_int_cst (boolean_type_node, ix >= level), + build_int_cst (integer_type_node, ix < level), dims); + + return dims; +} + +/* Retrieve the oacc function attrib and return it. Non-oacc + functions will return NULL. */ + +tree +oacc_get_fn_attrib (tree fn) +{ + return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn)); +} + +/* Return true if this oacc fn attrib is for a kernels offload + region. We use the TREE_PUBLIC flag of each dimension -- only + need to check the first one. */ + +bool +oacc_fn_attrib_kernels_p (tree attr) +{ + return TREE_PUBLIC (TREE_VALUE (attr)); +} + +/* Extract an oacc execution dimension from FN. FN must be an + offloaded function or routine that has already had its execution + dimensions lowered to the target-specific values. */ + +int +oacc_get_fn_dim_size (tree fn, int axis) +{ + tree attrs = oacc_get_fn_attrib (fn); + + gcc_assert (axis < GOMP_DIM_MAX); + + tree dims = TREE_VALUE (attrs); + while (axis--) + dims = TREE_CHAIN (dims); + + int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); + + return size; +} + +/* Extract the dimension axis from an IFN_GOACC_DIM_POS or + IFN_GOACC_DIM_SIZE call. */ + +int +oacc_get_ifn_dim_arg (const gimple *stmt) +{ + gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE + || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS); + tree arg = gimple_call_arg (stmt, 0); + HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg); + + gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); + return (int) axis; +} diff --git a/gcc/omp-general.h b/gcc/omp-general.h new file mode 100644 index 00000000000..634fdccb357 --- /dev/null +++ b/gcc/omp-general.h @@ -0,0 +1,91 @@ +/* General types and functions that are uselful for processing of OpenMP, + OpenACC and similar directivers at various stages of compilation. + + Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_OMP_GENERAL_H +#define GCC_OMP_GENERAL_H + +#include "gomp-constants.h" + +/* Flags for an OpenACC loop. */ + +enum oacc_loop_flags { + OLF_SEQ = 1u << 0, /* Explicitly sequential */ + OLF_AUTO = 1u << 1, /* Compiler chooses axes. */ + OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */ + OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */ + + /* Explicitly specified loop axes. */ + OLF_DIM_BASE = 4, + OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG), + OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER), + OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR), + + OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX +}; + +/* A structure holding the elements of: + for (V = N1; V cond N2; V += STEP) [...] */ + +struct omp_for_data_loop +{ + tree v, n1, n2, step; + enum tree_code cond_code; +}; + +/* A structure describing the main elements of a parallel loop. */ + +struct omp_for_data +{ + struct omp_for_data_loop loop; + tree chunk_size; + gomp_for *for_stmt; + tree pre, iter_type; + int collapse; + int ordered; + bool have_nowait, have_ordered, simd_schedule; + unsigned char sched_modifiers; + enum omp_clause_schedule_kind sched_kind; + struct omp_for_data_loop *loops; +}; + +#define OACC_FN_ATTRIB "oacc function" + +extern tree omp_find_clause (tree clauses, enum omp_clause_code kind); +extern bool omp_is_reference (tree decl); +extern void omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, + tree *n2); +extern tree omp_get_for_step_from_incr (location_t loc, tree incr); +extern void omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, + struct omp_for_data_loop *loops); +extern gimple *omp_build_barrier (tree lhs); +extern int omp_max_vf (void); +extern int omp_max_simt_vf (void); +extern tree oacc_launch_pack (unsigned code, tree device, unsigned op); +extern void oacc_replace_fn_attrib (tree fn, tree dims); +extern void oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, + vec<tree> *args); +extern tree oacc_build_routine_dims (tree clauses); +extern tree oacc_get_fn_attrib (tree fn); +extern bool oacc_fn_attrib_kernels_p (tree attr); +extern int oacc_get_fn_dim_size (tree fn, int axis); +extern int oacc_get_ifn_dim_arg (const gimple *stmt); + +#endif /* GCC_OMP_GENERAL_H */ diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c new file mode 100644 index 00000000000..81f6ea5e193 --- /dev/null +++ b/gcc/omp-grid.c @@ -0,0 +1,1407 @@ +/* Lowering and expansion of OpenMP directives for HSA GPU agents. + + Copyright (C) 2013-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "cgraph.h" +#include "pretty-print.h" +#include "fold-const.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "tree-inline.h" +#include "langhooks.h" +#include "omp-general.h" +#include "omp-low.h" +#include "omp-grid.h" +#include "gimple-pretty-print.h" + +/* Return the lastprivate predicate for a given gridified loop described by + FD). */ + +tree +omp_grid_lastprivate_predicate (struct omp_for_data *fd) +{ + /* When dealing with a gridified loop, we need to check up to three collapsed + iteration variables but they are not actually captured in this fd. + Fortunately, we can easily rely on HSA builtins to get this + information. */ + + tree id, size; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP + && gimple_omp_for_grid_intra_group (fd->for_stmt)) + { + id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); + size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); + } + else + { + id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); + size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); + } + tree cond = NULL; + for (int dim = 0; dim < fd->collapse; dim++) + { + tree dim_tree = build_int_cstu (unsigned_type_node, dim); + tree u1 = build_int_cstu (unsigned_type_node, 1); + tree c2 + = build2 (EQ_EXPR, boolean_type_node, + build2 (PLUS_EXPR, unsigned_type_node, + build_call_expr (id, 1, dim_tree), u1), + build_call_expr (size, 1, dim_tree)); + if (cond) + cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); + else + cond = c2; + } + return cond; +} + +/* Structure describing the basic properties of the loop we ara analyzing + whether it can be gridified and when it is gridified. */ + +struct grid_prop +{ + /* True when we are doing tiling gridification, i.e. when there is a distinct + distribute loop over groups and a loop construct over work-items. False + when distribute and parallel for loops form a combined construct. */ + bool tiling; + /* Location of the target construct for optimization information + messages. */ + location_t target_loc; + /* The collapse clause of the involved loops. Collapse value of all of them + must be the same for gridification to take place. */ + size_t collapse; + /* Group sizes, if requested by the user or NULL if not requested. */ + tree group_sizes[3]; +}; + +#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ + "gridified HSA kernel because " + +/* Return true if STMT is an assignment of a register-type into a local + VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to + any of the trees specifying group sizes there. */ + +static bool +grid_safe_assignment_p (gimple *stmt, grid_prop *grid) +{ + gassign *assign = dyn_cast <gassign *> (stmt); + if (!assign) + return false; + if (gimple_clobber_p (assign)) + return true; + tree lhs = gimple_assign_lhs (assign); + if (!VAR_P (lhs) + || !is_gimple_reg_type (TREE_TYPE (lhs)) + || is_global_var (lhs)) + return false; + if (grid) + for (unsigned i = 0; i < grid->collapse; i++) + if (lhs == grid->group_sizes[i]) + return false; + return true; +} + +/* Return true if all statements in SEQ are assignments to local register-type + variables that do not hold group size information. */ + +static bool +grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) +{ + if (!seq) + return true; + + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) + return false; + return true; +} + +/* Scan statements in SEQ and call itself recursively on any bind. GRID + describes hitherto discovered properties of the loop that is evaluated for + possible gridification. If during whole search only assignments to + register-type local variables (that do not overwrite group size information) + and one single OMP statement is encountered, return true, otherwise return + false. RET is where we store any OMP statement encountered. */ + +static bool +grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, + const char *name, gimple **ret) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (grid_safe_assignment_p (stmt, grid)) + continue; + if (gbind *bind = dyn_cast <gbind *> (stmt)) + { + if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind), + grid, name, ret)) + return false; + } + else if (is_gimple_omp (stmt)) + { + if (*ret) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct " + "contains multiple OpenMP constructs\n", + name); + dump_printf_loc (MSG_NOTE, gimple_location (*ret), + "The first OpenMP construct within " + "a parallel\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "The second OpenMP construct within " + "a parallel\n"); + } + return false; + } + *ret = stmt; + } + else + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct contains " + "a complex statement\n", name); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This statement cannot be analyzed for " + "gridification\n"); + } + return false; + } + } + return true; +} + +/* Scan statements in SEQ and make sure that it and any binds in it contain + only assignments to local register-type variables (that do not overwrite + group size information) and one OMP construct. If so, return that + construct, otherwise return NULL. GRID describes hitherto discovered + properties of the loop that is evaluated for possible gridification. If + dumping is enabled and function fails, use NAME to dump a note with the + reason for failure. */ + +static gimple * +grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, + const char *name) +{ + if (!seq) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", + name); + return NULL; + } + + gimple *ret = NULL; + if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) + { + if (!ret && dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct does not contain" + "any other OpenMP construct\n", name); + return ret; + } + else + return NULL; +} + +/* Walker function looking for statements there is no point gridifying (and for + noreturn function calls which we cannot do). Return non-NULL if such a + function is found. */ + +static tree +grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *wi) +{ + *handled_ops_p = false; + gimple *stmt = gsi_stmt (*gsi); + switch (gimple_code (stmt)) + { + case GIMPLE_CALL: + if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) + { + *handled_ops_p = true; + wi->info = stmt; + return error_mark_node; + } + break; + + /* We may reduce the following list if we find a way to implement the + clauses, but now there is no point trying further. */ + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_TASK: + case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_SECTIONS: + case GIMPLE_OMP_SECTIONS_SWITCH: + case GIMPLE_OMP_TARGET: + case GIMPLE_OMP_ORDERED: + *handled_ops_p = true; + wi->info = stmt; + return error_mark_node; + default: + break; + } + return NULL; +} + +/* Examine clauses of omp parallel statement PAR and if any prevents + gridification, issue a missed-optimization diagnostics and return false, + otherwise return true. GRID describes hitherto discovered properties of the + loop that is evaluated for possible gridification. */ + +static bool +grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc) +{ + tree clauses = gimple_omp_parallel_clauses (par); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_NUM_THREADS: + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "because there is " + "a num_threads clause of the parallel " + "construct\n"); + dump_printf_loc (MSG_NOTE, gimple_location (par), + "Parallel construct has a num_threads clause\n"); + } + return false; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "a reduction clause" + "is present\n "); + dump_printf_loc (MSG_NOTE, gimple_location (par), + "Parallel construct has a reduction clause\n"); + } + return false; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + return true; +} + +/* Examine clauses and the body of omp loop statement GFOR and if something + prevents gridification, issue a missed-optimization diagnostics and return + false, otherwise return true. GRID describes hitherto discovered properties + of the loop that is evaluated for possible gridification. */ + +static bool +grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) +{ + if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), + grid)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop " + "loop bounds computation contains a complex " + "statement\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct cannot be analyzed for " + "gridification\n"); + } + return false; + } + + tree clauses = gimple_omp_for_clauses (gfor); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_SCHEDULE: + if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop " + "has a non-automatic schedule clause\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct has a non automatic " + "schedule clause\n"); + } + return false; + } + break; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "a reduction " + "clause is present\n "); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct has a reduction schedule " + "clause\n"); + } + return false; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + if (walk_gimple_seq (gimple_omp_body (gfor), + grid_find_ungridifiable_statement, + NULL, &wi)) + { + gimple *bad = (gimple *) wi.info; + if (dump_enabled_p ()) + { + if (is_gimple_call (bad)) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop contains " + "call to a noreturn function\n"); + else + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop contains " + "statement %s which cannot be transformed\n", + gimple_code_name[(int) gimple_code (bad)]); + dump_printf_loc (MSG_NOTE, gimple_location (bad), + "This statement cannot be analyzed for " + "gridification\n"); + } + return false; + } + return true; +} + +/* Given distribute omp construct represented by DIST, which in the original + source forms a compound construct with a looping construct, return true if it + can be turned into a gridified HSA kernel. Otherwise return false. GRID + describes hitherto discovered properties of the loop that is evaluated for + possible gridification. */ + +static bool +grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) +{ + location_t tloc = grid->target_loc; + gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), + grid, "distribute"); + gomp_parallel *par; + if (!stmt + || !(par = dyn_cast <gomp_parallel *> (stmt)) + || !grid_parallel_clauses_gridifiable (par, tloc)) + return false; + + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, + "parallel"); + gomp_for *gfor; + if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) + return false; + + if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the inner loop is not " + "a simple for loop\n"); + return false; + } + gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); + + if (!grid_inner_loop_gridifiable_p (gfor, grid)) + return false; + + return true; +} + +/* Given an omp loop statement GFOR, return true if it can participate in + tiling gridification, i.e. in one where the distribute and parallel for + loops do not form a compound statement. GRID describes hitherto discovered + properties of the loop that is evaluated for possible gridification. */ + +static bool +grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) +{ + if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "an inner loop is not " + "a simple for loop\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "This statement is not a simple for loop\n"); + } + return false; + } + + if (!grid_inner_loop_gridifiable_p (gfor, grid)) + return false; + + if (gimple_omp_for_collapse (gfor) != grid->collapse) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "an inner loop does not " + "have use the same collapse clause\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct uses a different collapse clause\n"); + } + return false; + } + + struct omp_for_data fd; + struct omp_for_data_loop *loops + = (struct omp_for_data_loop *)alloca (grid->collapse + * sizeof (struct omp_for_data_loop)); + omp_extract_for_data (gfor, &fd, loops); + for (unsigned i = 0; i < grid->collapse; i++) + { + tree itype, type = TREE_TYPE (fd.loops[i].v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + else + itype = type; + + tree n1 = fold_convert (itype, fd.loops[i].n1); + tree n2 = fold_convert (itype, fd.loops[i].n2); + tree t = build_int_cst (itype, + (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); + + if (!operand_equal_p (grid->group_sizes[i], t, 0)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute and " + "an internal loop do not agree on tile size\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct does not seem to loop over " + "a tile size\n"); + } + return false; + } + } + return true; +} + +/* Facing a call to FNDECL in the body of a distribute construct, return true + if we can handle it or false if it precludes gridification. */ + +static bool +grid_call_permissible_in_distribute_p (tree fndecl) +{ + if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) + return true; + + const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if (strstr (name, "omp_") != name) + return false; + + if ((strcmp (name, "omp_get_thread_num") == 0) + || (strcmp (name, "omp_get_num_threads") == 0) + || (strcmp (name, "omp_get_num_teams") == 0) + || (strcmp (name, "omp_get_team_num") == 0) + || (strcmp (name, "omp_get_level") == 0) + || (strcmp (name, "omp_get_active_level") == 0) + || (strcmp (name, "omp_in_parallel") == 0)) + return true; + + return false; +} + +/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body + of a distribute construct that is pointed at by GSI, modify it as necessary + for gridification. If the statement itself got removed, return true. */ + +static bool +grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (stmt); + if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) + return false; + + const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if ((strcmp (name, "omp_get_thread_num") == 0) + || (strcmp (name, "omp_get_level") == 0) + || (strcmp (name, "omp_get_active_level") == 0) + || (strcmp (name, "omp_in_parallel") == 0)) + { + tree lhs = gimple_call_lhs (stmt); + if (lhs) + { + gassign *assign + = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); + gsi_insert_before (gsi, assign, GSI_SAME_STMT); + } + gsi_remove (gsi, true); + return true; + } + + /* The rest of the omp functions can stay as they are, HSA back-end will + handle them correctly. */ + gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) + || (strcmp (name, "omp_get_num_teams") == 0) + || (strcmp (name, "omp_get_team_num") == 0)); + return false; +} + +/* Given a sequence of statements within a distribute omp construct or a + parallel construct, which in the original source does not form a compound + construct with a looping construct, return true if it does not prevent us + from turning it into a gridified HSA kernel. Otherwise return false. GRID + describes hitherto discovered properties of the loop that is evaluated for + possible gridification. IN_PARALLEL must be true if seq is within a + parallel construct and flase if it is only within a distribute + construct. */ + +static bool +grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, + bool in_parallel) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (grid_safe_assignment_p (stmt, grid) + || gimple_code (stmt) == GIMPLE_GOTO + || gimple_code (stmt) == GIMPLE_LABEL + || gimple_code (stmt) == GIMPLE_COND) + continue; + else if (gbind *bind = dyn_cast <gbind *> (stmt)) + { + if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), + grid, in_parallel)) + return false; + continue; + } + else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) + { + if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute " + "construct contains a try..catch region\n"); + dump_printf_loc (MSG_NOTE, gimple_location (try_stmt), + "This statement cannot be analyzed for " + "tiled gridification\n"); + } + return false; + } + if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), + grid, in_parallel)) + return false; + if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), + grid, in_parallel)) + return false; + continue; + } + else if (is_gimple_call (stmt)) + { + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) + continue; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute " + "construct contains a call\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This statement cannot be analyzed for " + "tiled gridification\n"); + } + return false; + } + else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) + { + if (in_parallel) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "a parallel " + "construct contains another parallel " + "construct\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This parallel construct is nested in " + "another one\n"); + } + return false; + } + if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) + || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), + grid, true)) + return false; + } + else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) + { + if (!in_parallel) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "a loop " + "construct is not nested within a parallel " + "construct\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This loop construct is not nested in " + "a parallel construct\n"); + } + return false; + } + if (!grid_gfor_follows_tiling_pattern (gfor, grid)) + return false; + } + else + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute " + "construct contains a complex statement\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This statement cannot be analyzed for " + "tiled gridification\n"); + } + return false; + } + } + return true; +} + +/* If TARGET follows a pattern that can be turned into a gridified HSA kernel, + return true, otherwise return false. In the case of success, also fill in + GRID with information describing the kernel grid. */ + +static bool +grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) +{ + if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) + return false; + + location_t tloc = gimple_location (target); + grid->target_loc = tloc; + gimple *stmt + = grid_find_single_omp_among_assignments (gimple_omp_body (target), + grid, "target"); + if (!stmt) + return false; + gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); + tree group_size = NULL; + if (!teams) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "it does not have a sole teams " + "construct in it.\n"); + return false; + } + + tree clauses = gimple_omp_teams_clauses (teams); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_NUM_TEAMS: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the teams construct " + "contains a num_teams clause\n "); + return false; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "a reduction " + "clause is present\n "); + return false; + + case OMP_CLAUSE_THREAD_LIMIT: + if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) + group_size = OMP_CLAUSE_OPERAND (clauses, 0); + break; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, + "teams"); + if (!stmt) + return false; + gomp_for *dist = dyn_cast <gomp_for *> (stmt); + if (!dist) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the teams construct does not " + "have a single distribute construct in it.\n"); + return false; + } + + gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); + + grid->collapse = gimple_omp_for_collapse (dist); + if (grid->collapse > 3) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the distribute construct " + "contains collapse clause with parameter greater " + "than 3\n"); + return false; + } + + struct omp_for_data fd; + struct omp_for_data_loop *dist_loops + = (struct omp_for_data_loop *)alloca (grid->collapse + * sizeof (struct omp_for_data_loop)); + omp_extract_for_data (dist, &fd, dist_loops); + if (fd.chunk_size) + { + if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the teams " + "thread limit is different from distribute " + "schedule chunk\n"); + return false; + } + group_size = fd.chunk_size; + } + if (group_size && grid->collapse > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "group size cannot be " + "set using thread_limit or schedule clauses " + "when also using a collapse clause greater than 1\n"); + return false; + } + + if (gimple_omp_for_combined_p (dist)) + { + grid->tiling = false; + grid->group_sizes[0] = group_size; + for (unsigned i = 1; i < grid->collapse; i++) + grid->group_sizes[i] = NULL; + return grid_dist_follows_simple_pattern (dist, grid); + } + else + { + grid->tiling = true; + if (group_size) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "group size cannot be set " + "using thread_limit or schedule clauses when " + "distribute and loop constructs do not form " + "one combined construct\n"); + return false; + } + for (unsigned i = 0; i < grid->collapse; i++) + { + if (fd.loops[i].cond_code == GT_EXPR) + grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, + TREE_TYPE (fd.loops[i].step), + fd.loops[i].step); + else + grid->group_sizes[i] = fd.loops[i].step; + } + return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, + false); + } +} + +/* Operand walker, used to remap pre-body declarations according to a hash map + provided in DATA. */ + +static tree +grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) +{ + tree t = *tp; + + if (DECL_P (t) || TYPE_P (t)) + *walk_subtrees = 0; + else + *walk_subtrees = 1; + + if (VAR_P (t)) + { + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; + tree *repl = declmap->get (t); + if (repl) + *tp = *repl; + } + return NULL_TREE; +} + +/* Identifiers of segments into which a particular variable should be places + when gridifying. */ + +enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, + GRID_SEGMENT_GLOBAL}; + +/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial + builtin call into SEQ that will make sure the variable is always considered + address taken. */ + +static void +grid_mark_variable_segment (tree var, enum grid_var_segment segment) +{ + /* Making a non-addressable variables would require that we re-gimplify all + their uses. Fortunately, we do not have to do this because if they are + not addressable, it means they are not used in atomic or parallel + statements and so relaxed GPU consistency rules mean we can just keep them + private. */ + if (!TREE_ADDRESSABLE (var)) + return; + + switch (segment) + { + case GRID_SEGMENT_GROUP: + DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), + NULL, DECL_ATTRIBUTES (var)); + break; + case GRID_SEGMENT_GLOBAL: + DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), + NULL, DECL_ATTRIBUTES (var)); + break; + default: + gcc_unreachable (); + } + + if (!TREE_STATIC (var)) + { + TREE_STATIC (var) = 1; + varpool_node::finalize_decl (var); + } + +} + +/* Copy leading register-type assignments to local variables in SRC to just + before DST, Creating temporaries, adjusting mapping of operands in WI and + remapping operands as necessary. Add any new temporaries to TGT_BIND. + Return the first statement that does not conform to grid_safe_assignment_p + or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all + variables in traversed bind statements so that they are put into the + appropriate segment. */ + +static gimple * +grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, + gbind *tgt_bind, + enum grid_var_segment var_segment, + struct walk_stmt_info *wi) +{ + hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; + gimple_stmt_iterator gsi; + for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gbind *bind = dyn_cast <gbind *> (stmt)) + { + gimple *r = grid_copy_leading_local_assignments + (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); + + if (var_segment != GRID_SEGMENT_PRIVATE) + for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) + grid_mark_variable_segment (var, var_segment); + if (r) + return r; + else + continue; + } + if (!grid_safe_assignment_p (stmt, NULL)) + return stmt; + tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); + tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), + TREE_TYPE (lhs)); + DECL_CONTEXT (repl) = current_function_decl; + gimple_bind_append_vars (tgt_bind, repl); + + declmap->put (lhs, repl); + gassign *copy = as_a <gassign *> (gimple_copy (stmt)); + walk_gimple_op (copy, grid_remap_prebody_decls, wi); + gsi_insert_before (dst, copy, GSI_SAME_STMT); + } + return NULL; +} + +/* Statement walker function to make adjustments to statements within the + gridifed kernel copy. */ + +static tree +grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, + struct walk_stmt_info *) +{ + *handled_ops_p = false; + gimple *stmt = gsi_stmt (*gsi); + if (gimple_code (stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) + { + gomp_for *loop = as_a <gomp_for *> (stmt); + tree clauses = gimple_omp_for_clauses (loop); + tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); + if (cl) + OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; + else + { + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); + OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; + OMP_CLAUSE_CHAIN (c) = clauses; + gimple_omp_for_set_clauses (loop, c); + } + } + return NULL_TREE; +} + +/* Given a PARLOOP that is a normal for looping construct but also a part of a + combined construct with a simd loop, eliminate the simd loop. */ + +static void +grid_eliminate_combined_simd_part (gomp_for *parloop) +{ + struct walk_stmt_info wi; + + memset (&wi, 0, sizeof (wi)); + wi.val_only = true; + enum gf_mask msk = GF_OMP_FOR_SIMD; + wi.info = (void *) &msk; + walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); + gimple *stmt = (gimple *) wi.info; + /* We expect that the SIMD id the only statement in the parallel loop. */ + gcc_assert (stmt + && gimple_code (stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) + && gimple_omp_for_combined_into_p (stmt) + && !gimple_omp_for_combined_p (stmt)); + gomp_for *simd = as_a <gomp_for *> (stmt); + + /* Copy over the iteration properties because the body refers to the index in + the bottmom-most loop. */ + unsigned i, collapse = gimple_omp_for_collapse (parloop); + gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); + for (i = 0; i < collapse; i++) + { + gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); + gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); + gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); + gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); + } + + tree *tgt= gimple_omp_for_clauses_ptr (parloop); + while (*tgt) + tgt = &OMP_CLAUSE_CHAIN (*tgt); + + /* Copy over all clauses, except for linaer clauses, which are turned into + private clauses, and all other simd-specificl clauses, which are + ignored. */ + tree *pc = gimple_omp_for_clauses_ptr (simd); + while (*pc) + { + tree c = *pc; + switch (TREE_CODE (c)) + { + case OMP_CLAUSE_LINEAR: + { + tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); + OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); + OMP_CLAUSE_CHAIN (priv) = NULL; + *tgt = priv; + tgt = &OMP_CLAUSE_CHAIN (priv); + pc = &OMP_CLAUSE_CHAIN (c); + break; + } + + case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_SIMDLEN: + case OMP_CLAUSE_ALIGNED: + pc = &OMP_CLAUSE_CHAIN (c); + break; + + default: + *pc = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = NULL; + *tgt = c; + tgt = &OMP_CLAUSE_CHAIN(c); + break; + } + } + + /* Finally, throw away the simd and mark the parallel loop as not + combined. */ + gimple_omp_set_body (parloop, gimple_omp_body (simd)); + gimple_omp_for_set_combined_p (parloop, false); +} + +/* Statement walker function marking all parallels as grid_phony and loops as + grid ones representing threads of a particular thread group. */ + +static tree +grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, + struct walk_stmt_info *wi_in) +{ + *handled_ops_p = false; + if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) + { + *handled_ops_p = true; + gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); + gimple_omp_for_set_grid_intra_group (loop, true); + if (gimple_omp_for_combined_p (loop)) + grid_eliminate_combined_simd_part (loop); + + struct walk_stmt_info body_wi; + memset (&body_wi, 0, sizeof (body_wi)); + walk_gimple_seq_mod (gimple_omp_body_ptr (loop), + grid_process_grid_body, NULL, &body_wi); + + gbind *bind = (gbind *) wi_in->info; + tree c; + for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) + { + push_gimplify_context (); + tree ov = OMP_CLAUSE_DECL (c); + tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), + TREE_TYPE (ov)); + + grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); + DECL_CONTEXT (gv) = current_function_decl; + gimple_bind_append_vars (bind, gv); + tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); + gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); + x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); + gimple_seq l = NULL; + gimplify_and_add (x, &l); + gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); + pop_gimplify_context (bind); + } + } + return NULL_TREE; +} + +/* Statement walker function marking all parallels as grid_phony and loops as + grid ones representing threads of a particular thread group. */ + +static tree +grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *wi_in) +{ + *handled_ops_p = false; + wi_in->removed_stmt = false; + gimple *stmt = gsi_stmt (*gsi); + if (gbind *bind = dyn_cast <gbind *> (stmt)) + { + for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) + grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); + } + else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) + { + *handled_ops_p = true; + gimple_omp_parallel_set_grid_phony (parallel, true); + + gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); + gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); + gimple_seq s = NULL; + gimple_seq_add_stmt (&s, new_bind); + gimple_omp_set_body (parallel, s); + + struct walk_stmt_info wi_par; + memset (&wi_par, 0, sizeof (wi_par)); + wi_par.info = new_bind; + walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), + grid_mark_tiling_loops, NULL, &wi_par); + } + else if (is_a <gcall *> (stmt)) + wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); + return NULL_TREE; +} + +/* Given freshly copied top level kernel SEQ, identify the individual OMP + components, mark them as part of kernel, copy assignment leading to them + just before DST, remapping them using WI and adding new temporaries to + TGT_BIND, and and return the loop that will be used for kernel dispatch. */ + +static gomp_for * +grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, + gimple_stmt_iterator *dst, + gbind *tgt_bind, struct walk_stmt_info *wi) +{ + gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, + GRID_SEGMENT_GLOBAL, wi); + gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); + gcc_assert (teams); + gimple_omp_teams_set_grid_phony (teams, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, + tgt_bind, GRID_SEGMENT_GLOBAL, wi); + gcc_checking_assert (stmt); + gomp_for *dist = dyn_cast <gomp_for *> (stmt); + gcc_assert (dist); + gimple_seq prebody = gimple_omp_for_pre_body (dist); + if (prebody) + grid_copy_leading_local_assignments (prebody, dst, tgt_bind, + GRID_SEGMENT_GROUP, wi); + + if (grid->tiling) + { + gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); + gimple_omp_for_set_grid_group_iter (dist, true); + + struct walk_stmt_info wi_tiled; + memset (&wi_tiled, 0, sizeof (wi_tiled)); + walk_gimple_seq_mod (gimple_omp_body_ptr (dist), + grid_mark_tiling_parallels_and_loops, NULL, + &wi_tiled); + return dist; + } + else + { + gimple_omp_for_set_grid_phony (dist, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, + tgt_bind, + GRID_SEGMENT_PRIVATE, wi); + gcc_checking_assert (stmt); + gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); + gimple_omp_parallel_set_grid_phony (parallel, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), + dst, tgt_bind, + GRID_SEGMENT_PRIVATE, wi); + gomp_for *inner_loop = as_a <gomp_for *> (stmt); + gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); + prebody = gimple_omp_for_pre_body (inner_loop); + if (prebody) + grid_copy_leading_local_assignments (prebody, dst, tgt_bind, + GRID_SEGMENT_PRIVATE, wi); + + if (gimple_omp_for_combined_p (inner_loop)) + grid_eliminate_combined_simd_part (inner_loop); + struct walk_stmt_info body_wi;; + memset (&body_wi, 0, sizeof (body_wi)); + walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), + grid_process_grid_body, NULL, &body_wi); + + return inner_loop; + } +} + +/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, + create a GPU kernel for it. GSI must point to the same statement, TGT_BIND + is the bind into which temporaries inserted before TARGET should be + added. */ + +static void +grid_attempt_target_gridification (gomp_target *target, + gimple_stmt_iterator *gsi, + gbind *tgt_bind) +{ + /* removed group_size */ + grid_prop grid; + memset (&grid, 0, sizeof (grid)); + if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) + return; + + location_t loc = gimple_location (target); + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + "Target construct will be turned into a gridified HSA " + "kernel\n"); + + /* Copy target body to a GPUKERNEL construct: */ + gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals + (gimple_omp_body (target)); + + hash_map<tree, tree> *declmap = new hash_map<tree, tree>; + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (struct walk_stmt_info)); + wi.info = declmap; + + /* Copy assignments in between OMP statements before target, mark OMP + statements within copy appropriately. */ + gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, + tgt_bind, &wi); + + gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); + gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); + tree new_block = gimple_bind_block (new_bind); + tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); + BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); + BLOCK_SUBBLOCKS (enc_block) = new_block; + BLOCK_SUPERCONTEXT (new_block) = enc_block; + gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); + gimple_seq_add_stmt + (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), + gpukernel); + + for (size_t i = 0; i < grid.collapse; i++) + walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); + push_gimplify_context (); + for (size_t i = 0; i < grid.collapse; i++) + { + tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i)); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + else + itype = type; + + enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); + tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); + walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); + tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); + walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); + omp_adjust_for_condition (loc, &cond_code, &n2); + n1 = fold_convert (itype, n1); + n2 = fold_convert (itype, n2); + + tree step + = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); + + tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + if (grid.tiling) + { + if (cond_code == GT_EXPR) + step = fold_build1 (NEGATE_EXPR, itype, step); + t = fold_build2 (MULT_EXPR, itype, t, step); + } + + tree gs = fold_convert (uint32_type_node, t); + gimple_seq tmpseq = NULL; + gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); + if (!gimple_seq_empty_p (tmpseq)) + gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); + + tree ws; + if (grid.group_sizes[i]) + { + ws = fold_convert (uint32_type_node, grid.group_sizes[i]); + tmpseq = NULL; + gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); + if (!gimple_seq_empty_p (tmpseq)) + gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); + } + else + ws = build_zero_cst (uint32_type_node); + + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); + OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; + OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; + OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; + OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); + gimple_omp_target_set_clauses (target, c); + } + pop_gimplify_context (tgt_bind); + delete declmap; + return; +} + +/* Walker function doing all the work for create_target_kernels. */ + +static tree +grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *incoming) +{ + *handled_ops_p = false; + + gimple *stmt = gsi_stmt (*gsi); + gomp_target *target = dyn_cast <gomp_target *> (stmt); + if (target) + { + gbind *tgt_bind = (gbind *) incoming->info; + gcc_checking_assert (tgt_bind); + grid_attempt_target_gridification (target, gsi, tgt_bind); + return NULL_TREE; + } + gbind *bind = dyn_cast <gbind *> (stmt); + if (bind) + { + *handled_ops_p = true; + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = bind; + walk_gimple_seq_mod (gimple_bind_body_ptr (bind), + grid_gridify_all_targets_stmt, NULL, &wi); + } + return NULL_TREE; +} + +/* Attempt to gridify all target constructs in BODY_P. All such targets will + have their bodies duplicated, with the new copy being put into a + gimple_omp_grid_body statement. All kernel-related construct within the + grid_body will be marked with phony flags or kernel kinds. Moreover, some + re-structuring is often needed, such as copying pre-bodies before the target + construct so that kernel grid sizes can be computed. */ + +void +omp_grid_gridify_all_targets (gimple_seq *body_p) +{ + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); +} diff --git a/gcc/omp-grid.h b/gcc/omp-grid.h new file mode 100644 index 00000000000..90a0d355007 --- /dev/null +++ b/gcc/omp-grid.h @@ -0,0 +1,27 @@ +/* Lowering and expansion of OpenMP directives for HSA GPU agents. + + Copyright (C) 2013-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_OMP_GRID_H +#define GCC_OMP_GRID_H + +extern tree omp_grid_lastprivate_predicate (struct omp_for_data *fd); +extern void omp_grid_gridify_all_targets (gimple_seq *body_p); + +#endif /* GCC_OMP_GRID_H */ diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 7bcaeee2409..4fb59eb400f 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -25,26 +25,17 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "memmodel.h" #include "backend.h" #include "target.h" -#include "rtl.h" #include "tree.h" #include "gimple.h" -#include "cfghooks.h" -#include "alloc-pool.h" #include "tree-pass.h" #include "ssa.h" -#include "expmed.h" -#include "optabs.h" -#include "emit-rtl.h" #include "cgraph.h" #include "pretty-print.h" #include "diagnostic-core.h" -#include "alias.h" #include "fold-const.h" #include "stor-layout.h" -#include "cfganal.h" #include "internal-fn.h" #include "gimple-fold.h" #include "gimplify.h" @@ -54,36 +45,19 @@ along with GCC; see the file COPYING3. If not see #include "tree-iterator.h" #include "tree-inline.h" #include "langhooks.h" -#include "tree-cfg.h" -#include "tree-into-ssa.h" -#include "flags.h" -#include "dojump.h" -#include "explow.h" -#include "calls.h" -#include "varasm.h" -#include "stmt.h" -#include "expr.h" #include "tree-dfa.h" #include "tree-ssa.h" -#include "except.h" #include "splay-tree.h" -#include "cfgloop.h" -#include "common/common-target.h" +#include "omp-general.h" #include "omp-low.h" +#include "omp-grid.h" #include "gimple-low.h" -#include "tree-cfgcleanup.h" #include "symbol-summary.h" -#include "ipa-prop.h" #include "tree-nested.h" -#include "tree-eh.h" -#include "cilk.h" #include "context.h" -#include "lto-section-names.h" #include "gomp-constants.h" #include "gimple-pretty-print.h" -#include "symbol-summary.h" #include "hsa.h" -#include "params.h" /* Lowering of OMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -96,52 +70,6 @@ along with GCC; see the file COPYING3. If not see scanned for regions which are then moved to a new function, to be invoked by the thread library, or offloaded. */ -/* OMP region information. Every parallel and workshare - directive is enclosed between two markers, the OMP_* directive - and a corresponding GIMPLE_OMP_RETURN statement. */ - -struct omp_region -{ - /* The enclosing region. */ - struct omp_region *outer; - - /* First child region. */ - struct omp_region *inner; - - /* Next peer region. */ - struct omp_region *next; - - /* Block containing the omp directive as its last stmt. */ - basic_block entry; - - /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ - basic_block exit; - - /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ - basic_block cont; - - /* If this is a combined parallel+workshare region, this is a list - of additional arguments needed by the combined parallel+workshare - library call. */ - vec<tree, va_gc> *ws_args; - - /* The code for the omp directive of this region. */ - enum gimple_code type; - - /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ - enum omp_clause_schedule_kind sched_kind; - - /* Schedule modifiers. */ - unsigned char sched_modifiers; - - /* True if this is a combined parallel+workshare region. */ - bool is_combined_parallel; - - /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has - a depend clause. */ - gomp_ordered *ord_stmt; -}; - /* Context structure. Used to store information about each parallel directive in the code. */ @@ -196,89 +124,14 @@ struct omp_context bool cancellable; }; -/* A structure holding the elements of: - for (V = N1; V cond N2; V += STEP) [...] */ - -struct omp_for_data_loop -{ - tree v, n1, n2, step; - enum tree_code cond_code; -}; - -/* A structure describing the main elements of a parallel loop. */ - -struct omp_for_data -{ - struct omp_for_data_loop loop; - tree chunk_size; - gomp_for *for_stmt; - tree pre, iter_type; - int collapse; - int ordered; - bool have_nowait, have_ordered, simd_schedule; - unsigned char sched_modifiers; - enum omp_clause_schedule_kind sched_kind; - struct omp_for_data_loop *loops; -}; - -/* Describe the OpenACC looping structure of a function. The entire - function is held in a 'NULL' loop. */ - -struct oacc_loop -{ - oacc_loop *parent; /* Containing loop. */ - - oacc_loop *child; /* First inner loop. */ - - oacc_loop *sibling; /* Next loop within same parent. */ - - location_t loc; /* Location of the loop start. */ - - gcall *marker; /* Initial head marker. */ - - gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */ - gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */ - - tree routine; /* Pseudo-loop enclosing a routine. */ - - unsigned mask; /* Partitioning mask. */ - unsigned inner; /* Partitioning of inner loops. */ - unsigned flags; /* Partitioning flags. */ - unsigned ifns; /* Contained loop abstraction functions. */ - tree chunk_size; /* Chunk size. */ - gcall *head_end; /* Final marker of head sequence. */ -}; - -/* Flags for an OpenACC loop. */ - -enum oacc_loop_flags { - OLF_SEQ = 1u << 0, /* Explicitly sequential */ - OLF_AUTO = 1u << 1, /* Compiler chooses axes. */ - OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */ - OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */ - - /* Explicitly specified loop axes. */ - OLF_DIM_BASE = 4, - OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG), - OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER), - OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR), - - OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX -}; - - static splay_tree all_contexts; static int taskreg_nesting_level; static int target_nesting_level; -static struct omp_region *root_omp_region; static bitmap task_shared_vars; static vec<omp_context *> taskreg_contexts; -static bool omp_any_child_fn_dumped; static void scan_omp (gimple_seq *, omp_context *); static tree scan_omp_1_op (tree *, int *, void *); -static gphi *find_phi_with_arg_on_edge (tree, edge); -static int omp_max_simt_vf (void); #define WALK_SUBSTMTS \ case GIMPLE_BIND: \ @@ -379,9 +232,6 @@ unshare_and_remap (tree x, tree from, tree to) return x; } -/* Holds offload tables with decls. */ -vec<tree, va_gc> *offload_funcs, *offload_vars; - /* Convenience function for calling scan_omp_1_op on tree operands. */ static inline tree @@ -400,18 +250,6 @@ static void lower_omp (gimple_seq *, omp_context *); static tree lookup_decl_in_outer_ctx (tree, omp_context *); static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); -/* Find an OMP clause of type KIND within CLAUSES. */ - -tree -find_omp_clause (tree clauses, enum omp_clause_code kind) -{ - for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) - if (OMP_CLAUSE_CODE (clauses) == kind) - return clauses; - - return NULL_TREE; -} - /* Return true if CTX is for an omp parallel. */ static inline bool @@ -448,592 +286,6 @@ is_taskreg_ctx (omp_context *ctx) return is_parallel_ctx (ctx) || is_task_ctx (ctx); } - -/* Return true if REGION is a combined parallel+workshare region. */ - -static inline bool -is_combined_parallel (struct omp_region *region) -{ - return region->is_combined_parallel; -} - -/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or - GT_EXPR. */ - -static void -adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2) -{ - switch (*cond_code) - { - case LT_EXPR: - case GT_EXPR: - case NE_EXPR: - break; - case LE_EXPR: - if (POINTER_TYPE_P (TREE_TYPE (*n2))) - *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1); - else - *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, - build_int_cst (TREE_TYPE (*n2), 1)); - *cond_code = LT_EXPR; - break; - case GE_EXPR: - if (POINTER_TYPE_P (TREE_TYPE (*n2))) - *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1); - else - *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, - build_int_cst (TREE_TYPE (*n2), 1)); - *cond_code = GT_EXPR; - break; - default: - gcc_unreachable (); - } -} - -/* Return the looping step from INCR, extracted from the step of a gimple omp - for statement. */ - -static tree -get_omp_for_step_from_incr (location_t loc, tree incr) -{ - tree step; - switch (TREE_CODE (incr)) - { - case PLUS_EXPR: - step = TREE_OPERAND (incr, 1); - break; - case POINTER_PLUS_EXPR: - step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); - break; - case MINUS_EXPR: - step = TREE_OPERAND (incr, 1); - step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); - break; - default: - gcc_unreachable (); - } - return step; -} - -/* Extract the header elements of parallel loop FOR_STMT and store - them into *FD. */ - -static void -extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd, - struct omp_for_data_loop *loops) -{ - tree t, var, *collapse_iter, *collapse_count; - tree count = NULL_TREE, iter_type = long_integer_type_node; - struct omp_for_data_loop *loop; - int i; - struct omp_for_data_loop dummy_loop; - location_t loc = gimple_location (for_stmt); - bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD; - bool distribute = gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_DISTRIBUTE; - bool taskloop = gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_TASKLOOP; - tree iterv, countv; - - fd->for_stmt = for_stmt; - fd->pre = NULL; - if (gimple_omp_for_collapse (for_stmt) > 1) - fd->loops = loops; - else - fd->loops = &fd->loop; - - fd->have_nowait = distribute || simd; - fd->have_ordered = false; - fd->collapse = 1; - fd->ordered = 0; - fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; - fd->sched_modifiers = 0; - fd->chunk_size = NULL_TREE; - fd->simd_schedule = false; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR) - fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR; - collapse_iter = NULL; - collapse_count = NULL; - - for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) - switch (OMP_CLAUSE_CODE (t)) - { - case OMP_CLAUSE_NOWAIT: - fd->have_nowait = true; - break; - case OMP_CLAUSE_ORDERED: - fd->have_ordered = true; - if (OMP_CLAUSE_ORDERED_EXPR (t)) - fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t)); - break; - case OMP_CLAUSE_SCHEDULE: - gcc_assert (!distribute && !taskloop); - fd->sched_kind - = (enum omp_clause_schedule_kind) - (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK); - fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t) - & ~OMP_CLAUSE_SCHEDULE_MASK); - fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); - fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); - break; - case OMP_CLAUSE_DIST_SCHEDULE: - gcc_assert (distribute); - fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t); - break; - case OMP_CLAUSE_COLLAPSE: - fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t)); - if (fd->collapse > 1) - { - collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); - collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); - } - break; - default: - break; - } - if (fd->ordered && fd->collapse == 1 && loops != NULL) - { - fd->loops = loops; - iterv = NULL_TREE; - countv = NULL_TREE; - collapse_iter = &iterv; - collapse_count = &countv; - } - - /* FIXME: for now map schedule(auto) to schedule(static). - There should be analysis to determine whether all iterations - are approximately the same amount of work (then schedule(static) - is best) or if it varies (then schedule(dynamic,N) is better). */ - if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) - { - fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; - gcc_assert (fd->chunk_size == NULL); - } - gcc_assert (fd->collapse == 1 || collapse_iter != NULL); - if (taskloop) - fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME; - if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) - gcc_assert (fd->chunk_size == NULL); - else if (fd->chunk_size == NULL) - { - /* We only need to compute a default chunk size for ordered - static loops and dynamic loops. */ - if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC - || fd->have_ordered) - fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) - ? integer_zero_node : integer_one_node; - } - - int cnt = fd->ordered ? fd->ordered : fd->collapse; - for (i = 0; i < cnt; i++) - { - if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL)) - loop = &fd->loop; - else if (loops != NULL) - loop = loops + i; - else - loop = &dummy_loop; - - loop->v = gimple_omp_for_index (for_stmt, i); - gcc_assert (SSA_VAR_P (loop->v)); - gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE - || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); - var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; - loop->n1 = gimple_omp_for_initial (for_stmt, i); - - loop->cond_code = gimple_omp_for_cond (for_stmt, i); - loop->n2 = gimple_omp_for_final (for_stmt, i); - gcc_assert (loop->cond_code != NE_EXPR - || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD - || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR); - adjust_for_condition (loc, &loop->cond_code, &loop->n2); - - t = gimple_omp_for_incr (for_stmt, i); - gcc_assert (TREE_OPERAND (t, 0) == var); - loop->step = get_omp_for_step_from_incr (loc, t); - - if (simd - || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC - && !fd->have_ordered)) - { - if (fd->collapse == 1) - iter_type = TREE_TYPE (loop->v); - else if (i == 0 - || TYPE_PRECISION (iter_type) - < TYPE_PRECISION (TREE_TYPE (loop->v))) - iter_type - = build_nonstandard_integer_type - (TYPE_PRECISION (TREE_TYPE (loop->v)), 1); - } - else if (iter_type != long_long_unsigned_type_node) - { - if (POINTER_TYPE_P (TREE_TYPE (loop->v))) - iter_type = long_long_unsigned_type_node; - else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) - && TYPE_PRECISION (TREE_TYPE (loop->v)) - >= TYPE_PRECISION (iter_type)) - { - tree n; - - if (loop->cond_code == LT_EXPR) - n = fold_build2_loc (loc, - PLUS_EXPR, TREE_TYPE (loop->v), - loop->n2, loop->step); - else - n = loop->n1; - if (TREE_CODE (n) != INTEGER_CST - || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) - iter_type = long_long_unsigned_type_node; - } - else if (TYPE_PRECISION (TREE_TYPE (loop->v)) - > TYPE_PRECISION (iter_type)) - { - tree n1, n2; - - if (loop->cond_code == LT_EXPR) - { - n1 = loop->n1; - n2 = fold_build2_loc (loc, - PLUS_EXPR, TREE_TYPE (loop->v), - loop->n2, loop->step); - } - else - { - n1 = fold_build2_loc (loc, - MINUS_EXPR, TREE_TYPE (loop->v), - loop->n2, loop->step); - n2 = loop->n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) - || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) - iter_type = long_long_unsigned_type_node; - } - } - - if (i >= fd->collapse) - continue; - - if (collapse_count && *collapse_count == NULL) - { - t = fold_binary (loop->cond_code, boolean_type_node, - fold_convert (TREE_TYPE (loop->v), loop->n1), - fold_convert (TREE_TYPE (loop->v), loop->n2)); - if (t && integer_zerop (t)) - count = build_zero_cst (long_long_unsigned_type_node); - else if ((i == 0 || count != NULL_TREE) - && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE - && TREE_CONSTANT (loop->n1) - && TREE_CONSTANT (loop->n2) - && TREE_CODE (loop->step) == INTEGER_CST) - { - tree itype = TREE_TYPE (loop->v); - - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2_loc (loc, - PLUS_EXPR, itype, - fold_convert_loc (loc, itype, loop->step), t); - t = fold_build2_loc (loc, PLUS_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->n2)); - t = fold_build2_loc (loc, MINUS_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->n1)); - if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) - t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, - fold_build1_loc (loc, NEGATE_EXPR, itype, t), - fold_build1_loc (loc, NEGATE_EXPR, itype, - fold_convert_loc (loc, itype, - loop->step))); - else - t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->step)); - t = fold_convert_loc (loc, long_long_unsigned_type_node, t); - if (count != NULL_TREE) - count = fold_build2_loc (loc, - MULT_EXPR, long_long_unsigned_type_node, - count, t); - else - count = t; - if (TREE_CODE (count) != INTEGER_CST) - count = NULL_TREE; - } - else if (count && !integer_zerop (count)) - count = NULL_TREE; - } - } - - if (count - && !simd - && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC - || fd->have_ordered)) - { - if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node))) - iter_type = long_long_unsigned_type_node; - else - iter_type = long_integer_type_node; - } - else if (collapse_iter && *collapse_iter != NULL) - iter_type = TREE_TYPE (*collapse_iter); - fd->iter_type = iter_type; - if (collapse_iter && *collapse_iter == NULL) - *collapse_iter = create_tmp_var (iter_type, ".iter"); - if (collapse_count && *collapse_count == NULL) - { - if (count) - *collapse_count = fold_convert_loc (loc, iter_type, count); - else - *collapse_count = create_tmp_var (iter_type, ".count"); - } - - if (fd->collapse > 1 || (fd->ordered && loops)) - { - fd->loop.v = *collapse_iter; - fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); - fd->loop.n2 = *collapse_count; - fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); - fd->loop.cond_code = LT_EXPR; - } - else if (loops) - loops[0] = fd->loop; -} - - -/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB - is the immediate dominator of PAR_ENTRY_BB, return true if there - are no data dependencies that would prevent expanding the parallel - directive at PAR_ENTRY_BB as a combined parallel+workshare region. - - When expanding a combined parallel+workshare region, the call to - the child function may need additional arguments in the case of - GIMPLE_OMP_FOR regions. In some cases, these arguments are - computed out of variables passed in from the parent to the child - via 'struct .omp_data_s'. For instance: - - #pragma omp parallel for schedule (guided, i * 4) - for (j ...) - - Is lowered into: - - # BLOCK 2 (PAR_ENTRY_BB) - .omp_data_o.i = i; - #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) - - # BLOCK 3 (WS_ENTRY_BB) - .omp_data_i = &.omp_data_o; - D.1667 = .omp_data_i->i; - D.1598 = D.1667 * 4; - #pragma omp for schedule (guided, D.1598) - - When we outline the parallel region, the call to the child function - 'bar.omp_fn.0' will need the value D.1598 in its argument list, but - that value is computed *after* the call site. So, in principle we - cannot do the transformation. - - To see whether the code in WS_ENTRY_BB blocks the combined - parallel+workshare call, we collect all the variables used in the - GIMPLE_OMP_FOR header check whether they appear on the LHS of any - statement in WS_ENTRY_BB. If so, then we cannot emit the combined - call. - - FIXME. If we had the SSA form built at this point, we could merely - hoist the code in block 3 into block 2 and be done with it. But at - this point we don't have dataflow information and though we could - hack something up here, it is really not worth the aggravation. */ - -static bool -workshare_safe_to_combine_p (basic_block ws_entry_bb) -{ - struct omp_for_data fd; - gimple *ws_stmt = last_stmt (ws_entry_bb); - - if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) - return true; - - gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); - - extract_omp_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); - - if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) - return false; - if (fd.iter_type != long_integer_type_node) - return false; - - /* FIXME. We give up too easily here. If any of these arguments - are not constants, they will likely involve variables that have - been mapped into fields of .omp_data_s for sharing with the child - function. With appropriate data flow, it would be possible to - see through this. */ - if (!is_gimple_min_invariant (fd.loop.n1) - || !is_gimple_min_invariant (fd.loop.n2) - || !is_gimple_min_invariant (fd.loop.step) - || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) - return false; - - return true; -} - - -static int omp_max_vf (void); - -/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier - presence (SIMD_SCHEDULE). */ - -static tree -omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) -{ - if (!simd_schedule) - return chunk_size; - - int vf = omp_max_vf (); - if (vf == 1) - return chunk_size; - - tree type = TREE_TYPE (chunk_size); - chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, - build_int_cst (type, vf - 1)); - return fold_build2 (BIT_AND_EXPR, type, chunk_size, - build_int_cst (type, -vf)); -} - - -/* Collect additional arguments needed to emit a combined - parallel+workshare call. WS_STMT is the workshare directive being - expanded. */ - -static vec<tree, va_gc> * -get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) -{ - tree t; - location_t loc = gimple_location (ws_stmt); - vec<tree, va_gc> *ws_args; - - if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) - { - struct omp_for_data fd; - tree n1, n2; - - extract_omp_for_data (for_stmt, &fd, NULL); - n1 = fd.loop.n1; - n2 = fd.loop.n2; - - if (gimple_omp_for_combined_into_p (for_stmt)) - { - tree innerc - = find_omp_clause (gimple_omp_parallel_clauses (par_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - - vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); - - t = fold_convert_loc (loc, long_integer_type_node, n1); - ws_args->quick_push (t); - - t = fold_convert_loc (loc, long_integer_type_node, n2); - ws_args->quick_push (t); - - t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); - ws_args->quick_push (t); - - if (fd.chunk_size) - { - t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); - t = omp_adjust_chunk_size (t, fd.simd_schedule); - ws_args->quick_push (t); - } - - return ws_args; - } - else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) - { - /* Number of sections is equal to the number of edges from the - GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to - the exit of the sections region. */ - basic_block bb = single_succ (gimple_bb (ws_stmt)); - t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); - vec_alloc (ws_args, 1); - ws_args->quick_push (t); - return ws_args; - } - - gcc_unreachable (); -} - - -/* Discover whether REGION is a combined parallel+workshare region. */ - -static void -determine_parallel_type (struct omp_region *region) -{ - basic_block par_entry_bb, par_exit_bb; - basic_block ws_entry_bb, ws_exit_bb; - - if (region == NULL || region->inner == NULL - || region->exit == NULL || region->inner->exit == NULL - || region->inner->cont == NULL) - return; - - /* We only support parallel+for and parallel+sections. */ - if (region->type != GIMPLE_OMP_PARALLEL - || (region->inner->type != GIMPLE_OMP_FOR - && region->inner->type != GIMPLE_OMP_SECTIONS)) - return; - - /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and - WS_EXIT_BB -> PAR_EXIT_BB. */ - par_entry_bb = region->entry; - par_exit_bb = region->exit; - ws_entry_bb = region->inner->entry; - ws_exit_bb = region->inner->exit; - - if (single_succ (par_entry_bb) == ws_entry_bb - && single_succ (ws_exit_bb) == par_exit_bb - && workshare_safe_to_combine_p (ws_entry_bb) - && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) - || (last_and_only_stmt (ws_entry_bb) - && last_and_only_stmt (par_exit_bb)))) - { - gimple *par_stmt = last_stmt (par_entry_bb); - gimple *ws_stmt = last_stmt (ws_entry_bb); - - if (region->inner->type == GIMPLE_OMP_FOR) - { - /* If this is a combined parallel loop, we need to determine - whether or not to use the combined library calls. There - are two cases where we do not apply the transformation: - static loops and any kind of ordered loop. In the first - case, we already open code the loop so there is no need - to do anything else. In the latter case, the combined - parallel loop call would still need extra synchronization - to implement ordered semantics, so there would not be any - gain in using the combined call. */ - tree clauses = gimple_omp_for_clauses (ws_stmt); - tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE); - if (c == NULL - || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) - == OMP_CLAUSE_SCHEDULE_STATIC) - || find_omp_clause (clauses, OMP_CLAUSE_ORDERED)) - { - region->is_combined_parallel = false; - region->inner->is_combined_parallel = false; - return; - } - } - - region->is_combined_parallel = true; - region->inner->is_combined_parallel = true; - region->ws_args = get_ws_args_for (par_stmt, ws_stmt); - } -} - - /* Return true if EXPR is variable sized. */ static inline bool @@ -1042,25 +294,6 @@ is_variable_sized (const_tree expr) return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr))); } -/* Return true if DECL is a reference type. */ - -static inline bool -is_reference (tree decl) -{ - return lang_hooks.decls.omp_privatize_by_reference (decl); -} - -/* Return the type of a decl. If the decl is reference type, - return its base type. */ -static inline tree -get_base_type (tree decl) -{ - tree type = TREE_TYPE (decl); - if (is_reference (decl)) - type = TREE_TYPE (type); - return type; -} - /* Lookup variables. The "maybe" form allows for the variable form to not have been entered, otherwise we assert that the variable must have been entered. */ @@ -1359,7 +592,7 @@ build_outer_var_ref (tree var, omp_context *ctx, } x = lookup_decl (var, outer); } - else if (is_reference (var)) + else if (omp_is_reference (var)) /* This can happen with orphaned constructs. If var is reference, it is possible it is shared and as such valid. */ x = var; @@ -1382,7 +615,7 @@ build_outer_var_ref (tree var, omp_context *ctx, } } - if (is_reference (var)) + if (omp_is_reference (var)) x = build_simple_mem_ref (x); return x; @@ -1444,7 +677,7 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx, if (base_pointers_restrict) type = build_qualified_type (type, TYPE_QUAL_RESTRICT); } - else if ((mask & 3) == 1 && is_reference (var)) + else if ((mask & 3) == 1 && omp_is_reference (var)) type = TREE_TYPE (type); field = build_decl (DECL_SOURCE_LOCATION (var), @@ -1585,113 +818,6 @@ omp_copy_decl (tree var, copy_body_data *cb) return error_mark_node; } - -/* Debugging dumps for parallel regions. */ -void dump_omp_region (FILE *, struct omp_region *, int); -void debug_omp_region (struct omp_region *); -void debug_all_omp_regions (void); - -/* Dump the parallel region tree rooted at REGION. */ - -void -dump_omp_region (FILE *file, struct omp_region *region, int indent) -{ - fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, - gimple_code_name[region->type]); - - if (region->inner) - dump_omp_region (file, region->inner, indent + 4); - - if (region->cont) - { - fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", - region->cont->index); - } - - if (region->exit) - fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", - region->exit->index); - else - fprintf (file, "%*s[no exit marker]\n", indent, ""); - - if (region->next) - dump_omp_region (file, region->next, indent); -} - -DEBUG_FUNCTION void -debug_omp_region (struct omp_region *region) -{ - dump_omp_region (stderr, region, 0); -} - -DEBUG_FUNCTION void -debug_all_omp_regions (void) -{ - dump_omp_region (stderr, root_omp_region, 0); -} - - -/* Create a new parallel region starting at STMT inside region PARENT. */ - -static struct omp_region * -new_omp_region (basic_block bb, enum gimple_code type, - struct omp_region *parent) -{ - struct omp_region *region = XCNEW (struct omp_region); - - region->outer = parent; - region->entry = bb; - region->type = type; - - if (parent) - { - /* This is a nested region. Add it to the list of inner - regions in PARENT. */ - region->next = parent->inner; - parent->inner = region; - } - else - { - /* This is a toplevel region. Add it to the list of toplevel - regions in ROOT_OMP_REGION. */ - region->next = root_omp_region; - root_omp_region = region; - } - - return region; -} - -/* Release the memory associated with the region tree rooted at REGION. */ - -static void -free_omp_region_1 (struct omp_region *region) -{ - struct omp_region *i, *n; - - for (i = region->inner; i ; i = n) - { - n = i->next; - free_omp_region_1 (i); - } - - free (region); -} - -/* Release the memory for the entire omp region tree. */ - -void -free_omp_regions (void) -{ - struct omp_region *r, *n; - for (r = root_omp_region; r ; r = n) - { - n = r->next; - free_omp_region_1 (r); - } - root_omp_region = NULL; -} - - /* Create a new context, with OUTER_CTX being the surrounding context. */ static omp_context * @@ -1915,7 +1041,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, if ((! TREE_READONLY (decl) && !OMP_CLAUSE_SHARED_READONLY (c)) || TREE_ADDRESSABLE (decl) || by_ref - || is_reference (decl)) + || omp_is_reference (decl)) { by_ref = use_pointer_for_field (decl, ctx); install_var_field (decl, by_ref, 3, ctx); @@ -1965,7 +1091,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, && is_gimple_omp_offloaded (ctx->stmt)) { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) - install_var_field (decl, !is_reference (decl), 3, ctx); + install_var_field (decl, !omp_is_reference (decl), 3, ctx); else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) install_var_field (decl, true, 3, ctx); else @@ -1984,7 +1110,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, by_ref = use_pointer_for_field (decl, NULL); if (is_task_ctx (ctx) - && (global || by_ref || is_reference (decl))) + && (global || by_ref || omp_is_reference (decl))) { install_var_field (decl, false, 1, ctx); if (!global) @@ -2461,7 +1587,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy) tree cilk_for_count = (flag_cilkplus && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL) - ? find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt), + ? omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), OMP_CLAUSE__CILK_FOR_COUNT_) : NULL_TREE; tree cilk_var_type = NULL_TREE; @@ -2592,10 +1718,10 @@ create_omp_child_function (omp_context *ctx, bool task_copy) /* Callback for walk_gimple_seq. Check if combined parallel contains gimple_omp_for_combined_into_p OMP_FOR. */ -static tree -find_combined_for (gimple_stmt_iterator *gsi_p, - bool *handled_ops_p, - struct walk_stmt_info *wi) +tree +omp_find_combined_for (gimple_stmt_iterator *gsi_p, + bool *handled_ops_p, + struct walk_stmt_info *wi) { gimple *stmt = gsi_stmt (*gsi_p); @@ -2630,12 +1756,12 @@ add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt, memset (&wi, 0, sizeof (wi)); wi.val_only = true; wi.info = (void *) &msk; - walk_gimple_seq (gimple_omp_body (stmt), find_combined_for, NULL, &wi); + walk_gimple_seq (gimple_omp_body (stmt), omp_find_combined_for, NULL, &wi); if (wi.info != (void *) &msk) { gomp_for *for_stmt = as_a <gomp_for *> ((gimple *) wi.info); struct omp_for_data fd; - extract_omp_for_data (for_stmt, &fd, NULL); + omp_extract_for_data (for_stmt, &fd, NULL); /* We need two temporaries with fd.loop.v type (istart/iend) and then (fd.collapse - 1) temporaries with the same type for count2 ... countN-1 vars if not constant. */ @@ -2648,11 +1774,11 @@ add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt, /* If there are lastprivate clauses on the inner GIMPLE_OMP_FOR, add one more temporaries for the total number of iterations (product of count1 ... countN-1). */ - if (find_omp_clause (gimple_omp_for_clauses (for_stmt), + if (omp_find_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_LASTPRIVATE)) count++; else if (msk == GF_OMP_FOR_KIND_FOR - && find_omp_clause (gimple_omp_parallel_clauses (stmt), + && omp_find_clause (gimple_omp_parallel_clauses (stmt), OMP_CLAUSE_LASTPRIVATE)) count++; } @@ -2681,7 +1807,7 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx) are copyin clauses. */ if (optimize > 0 && empty_body_p (gimple_omp_body (stmt)) - && find_omp_clause (gimple_omp_parallel_clauses (stmt), + && omp_find_clause (gimple_omp_parallel_clauses (stmt), OMP_CLAUSE_COPYIN) == NULL) { gsi_replace (gsi, gimple_build_nop (), false); @@ -2866,8 +1992,8 @@ finish_taskreg_scan (omp_context *ctx) clause first. There are filled by GOMP_taskloop and thus need to be in specific positions. */ tree c1 = gimple_omp_task_clauses (ctx->stmt); - c1 = find_omp_clause (c1, OMP_CLAUSE__LOOPTEMP_); - tree c2 = find_omp_clause (OMP_CLAUSE_CHAIN (c1), + c1 = omp_find_clause (c1, OMP_CLAUSE__LOOPTEMP_); + tree c2 = omp_find_clause (OMP_CLAUSE_CHAIN (c1), OMP_CLAUSE__LOOPTEMP_); tree f1 = lookup_field (OMP_CLAUSE_DECL (c1), ctx); tree f2 = lookup_field (OMP_CLAUSE_DECL (c2), ctx); @@ -3304,7 +2430,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) && !(gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE)) { - if (get_oacc_fn_attrib (cfun->decl) != NULL) + if (oacc_get_fn_attrib (cfun->decl) != NULL) { error_at (gimple_location (stmt), "non-OpenACC construct inside of OpenACC routine"); @@ -3330,9 +2456,9 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) if (gimple_code (stmt) == GIMPLE_OMP_ORDERED) { c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt)); - if (find_omp_clause (c, OMP_CLAUSE_SIMD)) + if (omp_find_clause (c, OMP_CLAUSE_SIMD)) { - if (find_omp_clause (c, OMP_CLAUSE_THREADS) + if (omp_find_clause (c, OMP_CLAUSE_THREADS) && (ctx->outer == NULL || !gimple_omp_for_combined_into_p (ctx->stmt) || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR @@ -3414,7 +2540,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) default: break; } - else if (get_oacc_fn_attrib (current_function_decl)) + else if (oacc_get_fn_attrib (current_function_decl)) ok = true; if (!ok) { @@ -3467,12 +2593,12 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) && !integer_zerop (gimple_call_arg (stmt, 1))) { ctx->cancellable = true; - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_NOWAIT)) warning_at (gimple_location (stmt), 0, "%<#pragma omp cancel for%> inside " "%<nowait%> for construct"); - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_ORDERED)) warning_at (gimple_location (stmt), 0, "%<#pragma omp cancel for%> inside " @@ -3491,7 +2617,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) if (gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS) { ctx->cancellable = true; - if (find_omp_clause (gimple_omp_sections_clauses + if (omp_find_clause (gimple_omp_sections_clauses (ctx->stmt), OMP_CLAUSE_NOWAIT)) warning_at (gimple_location (stmt), 0, @@ -3504,7 +2630,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_SECTIONS); ctx->outer->cancellable = true; - if (find_omp_clause (gimple_omp_sections_clauses + if (omp_find_clause (gimple_omp_sections_clauses (ctx->outer->stmt), OMP_CLAUSE_NOWAIT)) warning_at (gimple_location (stmt), 0, @@ -3666,7 +2792,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) if (ctx == NULL || gimple_code (ctx->stmt) != GIMPLE_OMP_FOR || (oclause - = find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_ORDERED)) == NULL_TREE) { error_at (OMP_CLAUSE_LOCATION (c), @@ -3692,7 +2818,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) } } c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt)); - if (find_omp_clause (c, OMP_CLAUSE_SIMD)) + if (omp_find_clause (c, OMP_CLAUSE_SIMD)) { /* ordered simd must be closely nested inside of simd region, and simd region must not encounter constructs other than @@ -3722,7 +2848,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GIMPLE_OMP_FOR: if (gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP) goto ordered_in_taskloop; - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_ORDERED) == NULL) { error_at (gimple_location (stmt), @@ -3786,7 +2912,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) return false; } if (is_gimple_omp_offloaded (stmt) - && get_oacc_fn_attrib (cfun->decl) != NULL) + && oacc_get_fn_attrib (cfun->decl) != NULL) { error_at (gimple_location (stmt), "OpenACC region inside of OpenACC routine, nested " @@ -4098,19 +3224,6 @@ scan_omp (gimple_seq *body_p, omp_context *ctx) /* Re-gimplification and code generation routines. */ -/* Build a call to GOMP_barrier. */ - -static gimple * -build_omp_barrier (tree lhs) -{ - tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL - : BUILT_IN_GOMP_BARRIER); - gcall *g = gimple_build_call (fndecl, 0); - if (lhs) - gimple_call_set_lhs (g, lhs); - return g; -} - /* If a context was created for STMT when it was scanned, return it. */ static omp_context * @@ -4332,51 +3445,6 @@ omp_clause_aligned_alignment (tree clause) return build_int_cst (integer_type_node, al); } - -/* Return maximum SIMT width if offloading may target SIMT hardware. */ - -static int -omp_max_simt_vf (void) -{ - if (!optimize) - return 0; - if (ENABLE_OFFLOADING) - for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; ) - { - if (!strncmp (c, "nvptx", strlen ("nvptx"))) - return 32; - else if ((c = strchr (c, ','))) - c++; - } - return 0; -} - -/* Return maximum possible vectorization factor for the target. */ - -static int -omp_max_vf (void) -{ - if (!optimize - || optimize_debug - || !flag_tree_loop_optimize - || (!flag_tree_loop_vectorize - && (global_options_set.x_flag_tree_loop_vectorize - || global_options_set.x_flag_tree_vectorize))) - return 1; - - int vf = 1; - int vs = targetm.vectorize.autovectorize_vector_sizes (); - if (vs) - vf = 1 << floor_log2 (vs); - else - { - machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); - if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) - vf = GET_MODE_NUNITS (vqimode); - } - return vf; -} - /* Helper function of lower_rec_input_clauses, used for #pragma omp simd privatization. */ @@ -4386,14 +3454,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf, { if (max_vf == 0) { - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE__SIMT_)) max_vf = omp_max_simt_vf (); else max_vf = omp_max_vf (); if (max_vf > 1) { - tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_SAFELEN); if (c && (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) != INTEGER_CST @@ -4466,7 +3534,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, int pass; bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD); - bool maybe_simt = is_simd && find_omp_clause (clauses, OMP_CLAUSE__SIMT_); + bool maybe_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_); int max_vf = 0; tree lane = NULL_TREE, idx = NULL_TREE; tree simt_lane = NULL_TREE; @@ -4768,10 +3836,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree ref = build_outer_var_ref (var, ctx); /* For ref build_outer_var_ref already performs this. */ if (TREE_CODE (d) == INDIRECT_REF) - gcc_assert (is_reference (var)); + gcc_assert (omp_is_reference (var)); else if (TREE_CODE (d) == ADDR_EXPR) ref = build_fold_addr_expr (ref); - else if (is_reference (var)) + else if (omp_is_reference (var)) ref = build_fold_addr_expr (ref); ref = fold_convert_loc (clause_loc, ptype, ref); if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) @@ -4935,7 +4003,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_assign (ptr, x, ilist); } } - else if (is_reference (var)) + else if (omp_is_reference (var)) { /* For references that are being privatized for Fortran, allocate new backing storage for the new pointer @@ -5102,7 +4170,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, case OMP_CLAUSE_FIRSTPRIVATE: if (is_task_ctx (ctx)) { - if (is_reference (var) || is_variable_sized (var)) + if (omp_is_reference (var) || is_variable_sized (var)) goto do_dtor; else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)) @@ -5123,7 +4191,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, { tree t = OMP_CLAUSE_LINEAR_STEP (c); tree stept = TREE_TYPE (t); - tree ct = find_omp_clause (clauses, + tree ct = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); gcc_assert (ct); tree l = OMP_CLAUSE_DECL (ct); @@ -5230,14 +4298,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimple *tseq; x = build_outer_var_ref (var, ctx); - if (is_reference (var) + if (omp_is_reference (var) && !useless_type_conversion_p (TREE_TYPE (placeholder), TREE_TYPE (x))) x = build_fold_addr_expr_loc (clause_loc, x); SET_DECL_VALUE_EXPR (placeholder, x); DECL_HAS_VALUE_EXPR_P (placeholder) = 1; tree new_vard = new_var; - if (is_reference (var)) + if (omp_is_reference (var)) { gcc_assert (TREE_CODE (new_var) == MEM_REF); new_vard = TREE_OPERAND (new_var, 0); @@ -5295,7 +4363,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, for it because it is undesirable if SIMD arrays are used. But if they aren't used, we need to emit the deferred initialization now. */ - else if (is_reference (var) && is_simd) + else if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); x = lang_hooks.decls.omp_clause_default_ctor (c, unshare_expr (new_var), @@ -5331,7 +4399,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, code = PLUS_EXPR; tree new_vard = new_var; - if (is_simd && is_reference (var)) + if (is_simd && omp_is_reference (var)) { gcc_assert (TREE_CODE (new_var) == MEM_REF); new_vard = TREE_OPERAND (new_var, 0); @@ -5368,7 +4436,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } else { - if (is_reference (var) && is_simd) + if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); gimplify_assign (new_var, x, ilist); if (is_simd) @@ -5492,14 +4560,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, #pragma omp distribute. */ if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR) - gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE)); + gimple_seq_add_stmt (ilist, omp_build_barrier (NULL_TREE)); } /* If max_vf is non-zero, then we can use only a vectorization factor up to the max_vf we chose. So stick it into the safelen clause. */ if (max_vf) { - tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_SAFELEN); if (c == NULL_TREE || (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) == INTEGER_CST @@ -5546,7 +4614,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, if (ctx == NULL || !is_parallel_ctx (ctx)) return; - clauses = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt), + clauses = omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), OMP_CLAUSE_LASTPRIVATE); if (clauses == NULL) return; @@ -5557,8 +4625,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) { - maybe_simt = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMT_); - simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_); + maybe_simt = omp_find_clause (orig_clauses, OMP_CLAUSE__SIMT_); + simduid = omp_find_clause (orig_clauses, OMP_CLAUSE__SIMDUID_); if (simduid) simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); } @@ -5705,7 +4773,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, } if (!x) x = build_outer_var_ref (var, ctx, OMP_CLAUSE_LASTPRIVATE); - if (is_reference (var)) + if (omp_is_reference (var)) new_var = build_simple_mem_ref_loc (clause_loc, new_var); x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var); gimplify_and_add (x, stmt_list); @@ -5723,7 +4791,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, if (ctx == NULL || !is_parallel_ctx (ctx)) break; - c = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt), + c = omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), OMP_CLAUSE_LASTPRIVATE); par_clauses = true; } @@ -5836,7 +4904,7 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, && maybe_lookup_field (orig, outer) && !is_private) { ref_to_res = build_receiver_ref (orig, false, outer); - if (is_reference (orig)) + if (omp_is_reference (orig)) ref_to_res = build_simple_mem_ref (ref_to_res); tree type = TREE_TYPE (var); @@ -5866,7 +4934,7 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (!ref_to_res) ref_to_res = integer_zero_node; - if (is_reference (orig)) + if (omp_is_reference (orig)) { tree type = TREE_TYPE (var); const char *id = IDENTIFIER_POINTER (DECL_NAME (var)); @@ -6021,7 +5089,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) } } new_var = lookup_decl (var, ctx); - if (var == OMP_CLAUSE_DECL (c) && is_reference (var)) + if (var == OMP_CLAUSE_DECL (c) && omp_is_reference (var)) new_var = build_simple_mem_ref_loc (clause_loc, new_var); ref = build_outer_var_ref (var, ctx); code = OMP_CLAUSE_REDUCTION_CODE (c); @@ -6075,7 +5143,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) if (TREE_CODE (d) == INDIRECT_REF) { new_var = build_simple_mem_ref_loc (clause_loc, new_var); - gcc_assert (is_reference (var) && var == orig_var); + gcc_assert (omp_is_reference (var) && var == orig_var); } else if (TREE_CODE (d) == ADDR_EXPR) { @@ -6088,7 +5156,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) else { gcc_assert (orig_var == var); - if (is_reference (var)) + if (omp_is_reference (var)) ref = build_fold_addr_expr (ref); } if (DECL_P (v)) @@ -6162,7 +5230,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); - if (is_reference (var) + if (omp_is_reference (var) && !useless_type_conversion_p (TREE_TYPE (placeholder), TREE_TYPE (ref))) ref = build_fold_addr_expr_loc (clause_loc, ref); @@ -6230,7 +5298,7 @@ lower_copyprivate_clauses (tree clauses, gimple_seq *slist, gimple_seq *rlist, ref); ref = build_fold_indirect_ref_loc (clause_loc, ref); } - if (is_reference (var)) + if (omp_is_reference (var)) { ref = fold_convert_loc (clause_loc, TREE_TYPE (new_var), ref); ref = build_simple_mem_ref_loc (clause_loc, ref); @@ -6369,7 +5437,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, break; case OMP_CLAUSE_LASTPRIVATE: - if (by_ref || is_reference (val)) + if (by_ref || omp_is_reference (val)) { if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)) continue; @@ -6386,7 +5454,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE_REDUCTION: do_in = true; if (val == OMP_CLAUSE_DECL (c)) - do_out = !(by_ref || is_reference (val)); + do_out = !(by_ref || omp_is_reference (val)); else by_ref = TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE; break; @@ -6647,423 +5715,6 @@ lower_oacc_head_tail (location_t loc, tree clauses, lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail); } -/* A convenience function to build an empty GIMPLE_COND with just the - condition. */ - -static gcond * -gimple_build_cond_empty (tree cond) -{ - enum tree_code pred_code; - tree lhs, rhs; - - gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); - return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); -} - -/* Return true if a parallel REGION is within a declare target function or - within a target region and is not a part of a gridified target. */ - -static bool -parallel_needs_hsa_kernel_p (struct omp_region *region) -{ - bool indirect = false; - for (region = region->outer; region; region = region->outer) - { - if (region->type == GIMPLE_OMP_PARALLEL) - indirect = true; - else if (region->type == GIMPLE_OMP_TARGET) - { - gomp_target *tgt_stmt - = as_a <gomp_target *> (last_stmt (region->entry)); - - if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)) - return indirect; - else - return true; - } - } - - if (lookup_attribute ("omp declare target", - DECL_ATTRIBUTES (current_function_decl))) - return true; - - return false; -} - -static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, - bool = false); - -/* Build the function calls to GOMP_parallel_start etc to actually - generate the parallel operation. REGION is the parallel region - being expanded. BB is the block where to insert the code. WS_ARGS - will be set if this is a call to a combined parallel+workshare - construct, it contains the list of additional arguments needed by - the workshare construct. */ - -static void -expand_parallel_call (struct omp_region *region, basic_block bb, - gomp_parallel *entry_stmt, - vec<tree, va_gc> *ws_args) -{ - tree t, t1, t2, val, cond, c, clauses, flags; - gimple_stmt_iterator gsi; - gimple *stmt; - enum built_in_function start_ix; - int start_ix2; - location_t clause_loc; - vec<tree, va_gc> *args; - - clauses = gimple_omp_parallel_clauses (entry_stmt); - - /* Determine what flavor of GOMP_parallel we will be - emitting. */ - start_ix = BUILT_IN_GOMP_PARALLEL; - if (is_combined_parallel (region)) - { - switch (region->inner->type) - { - case GIMPLE_OMP_FOR: - gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); - switch (region->inner->sched_kind) - { - case OMP_CLAUSE_SCHEDULE_RUNTIME: - start_ix2 = 3; - break; - case OMP_CLAUSE_SCHEDULE_DYNAMIC: - case OMP_CLAUSE_SCHEDULE_GUIDED: - if (region->inner->sched_modifiers - & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) - { - start_ix2 = 3 + region->inner->sched_kind; - break; - } - /* FALLTHRU */ - default: - start_ix2 = region->inner->sched_kind; - break; - } - start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; - start_ix = (enum built_in_function) start_ix2; - break; - case GIMPLE_OMP_SECTIONS: - start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; - break; - default: - gcc_unreachable (); - } - } - - /* By default, the value of NUM_THREADS is zero (selected at run time) - and there is no conditional. */ - cond = NULL_TREE; - val = build_int_cst (unsigned_type_node, 0); - flags = build_int_cst (unsigned_type_node, 0); - - c = find_omp_clause (clauses, OMP_CLAUSE_IF); - if (c) - cond = OMP_CLAUSE_IF_EXPR (c); - - c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS); - if (c) - { - val = OMP_CLAUSE_NUM_THREADS_EXPR (c); - clause_loc = OMP_CLAUSE_LOCATION (c); - } - else - clause_loc = gimple_location (entry_stmt); - - c = find_omp_clause (clauses, OMP_CLAUSE_PROC_BIND); - if (c) - flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); - - /* Ensure 'val' is of the correct type. */ - val = fold_convert_loc (clause_loc, unsigned_type_node, val); - - /* If we found the clause 'if (cond)', build either - (cond != 0) or (cond ? val : 1u). */ - if (cond) - { - cond = gimple_boolify (cond); - - if (integer_zerop (val)) - val = fold_build2_loc (clause_loc, - EQ_EXPR, unsigned_type_node, cond, - build_int_cst (TREE_TYPE (cond), 0)); - else - { - basic_block cond_bb, then_bb, else_bb; - edge e, e_then, e_else; - tree tmp_then, tmp_else, tmp_join, tmp_var; - - tmp_var = create_tmp_var (TREE_TYPE (val)); - if (gimple_in_ssa_p (cfun)) - { - tmp_then = make_ssa_name (tmp_var); - tmp_else = make_ssa_name (tmp_var); - tmp_join = make_ssa_name (tmp_var); - } - else - { - tmp_then = tmp_var; - tmp_else = tmp_var; - tmp_join = tmp_var; - } - - e = split_block_after_labels (bb); - cond_bb = e->src; - bb = e->dest; - remove_edge (e); - - then_bb = create_empty_bb (cond_bb); - else_bb = create_empty_bb (then_bb); - set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); - set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); - - stmt = gimple_build_cond_empty (cond); - gsi = gsi_start_bb (cond_bb); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - gsi = gsi_start_bb (then_bb); - expand_omp_build_assign (&gsi, tmp_then, val, true); - - gsi = gsi_start_bb (else_bb); - expand_omp_build_assign (&gsi, tmp_else, - build_int_cst (unsigned_type_node, 1), - true); - - make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); - make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); - add_bb_to_loop (then_bb, cond_bb->loop_father); - add_bb_to_loop (else_bb, cond_bb->loop_father); - e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); - e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); - - if (gimple_in_ssa_p (cfun)) - { - gphi *phi = create_phi_node (tmp_join, bb); - add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); - add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); - } - - val = tmp_join; - } - - gsi = gsi_start_bb (bb); - val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - } - - gsi = gsi_last_bb (bb); - t = gimple_omp_parallel_data_arg (entry_stmt); - if (t == NULL) - t1 = null_pointer_node; - else - t1 = build_fold_addr_expr (t); - tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); - t2 = build_fold_addr_expr (child_fndecl); - - vec_alloc (args, 4 + vec_safe_length (ws_args)); - args->quick_push (t2); - args->quick_push (t1); - args->quick_push (val); - if (ws_args) - args->splice (*ws_args); - args->quick_push (flags); - - t = build_call_expr_loc_vec (UNKNOWN_LOCATION, - builtin_decl_explicit (start_ix), args); - - force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - - if (hsa_gen_requested_p () - && parallel_needs_hsa_kernel_p (region)) - { - cgraph_node *child_cnode = cgraph_node::get (child_fndecl); - hsa_register_kernel (child_cnode); - } -} - -/* Insert a function call whose name is FUNC_NAME with the information from - ENTRY_STMT into the basic_block BB. */ - -static void -expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt, - vec <tree, va_gc> *ws_args) -{ - tree t, t1, t2; - gimple_stmt_iterator gsi; - vec <tree, va_gc> *args; - - gcc_assert (vec_safe_length (ws_args) == 2); - tree func_name = (*ws_args)[0]; - tree grain = (*ws_args)[1]; - - tree clauses = gimple_omp_parallel_clauses (entry_stmt); - tree count = find_omp_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_); - gcc_assert (count != NULL_TREE); - count = OMP_CLAUSE_OPERAND (count, 0); - - gsi = gsi_last_bb (bb); - t = gimple_omp_parallel_data_arg (entry_stmt); - if (t == NULL) - t1 = null_pointer_node; - else - t1 = build_fold_addr_expr (t); - t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); - - vec_alloc (args, 4); - args->quick_push (t2); - args->quick_push (t1); - args->quick_push (count); - args->quick_push (grain); - t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args); - - force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, - GSI_CONTINUE_LINKING); -} - -/* Build the function call to GOMP_task to actually - generate the task operation. BB is the block where to insert the code. */ - -static void -expand_task_call (struct omp_region *region, basic_block bb, - gomp_task *entry_stmt) -{ - tree t1, t2, t3; - gimple_stmt_iterator gsi; - location_t loc = gimple_location (entry_stmt); - - tree clauses = gimple_omp_task_clauses (entry_stmt); - - tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF); - tree untied = find_omp_clause (clauses, OMP_CLAUSE_UNTIED); - tree mergeable = find_omp_clause (clauses, OMP_CLAUSE_MERGEABLE); - tree depend = find_omp_clause (clauses, OMP_CLAUSE_DEPEND); - tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL); - tree priority = find_omp_clause (clauses, OMP_CLAUSE_PRIORITY); - - unsigned int iflags - = (untied ? GOMP_TASK_FLAG_UNTIED : 0) - | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) - | (depend ? GOMP_TASK_FLAG_DEPEND : 0); - - bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); - tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; - tree num_tasks = NULL_TREE; - bool ull = false; - if (taskloop_p) - { - gimple *g = last_stmt (region->outer->entry); - gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); - struct omp_for_data fd; - extract_omp_for_data (as_a <gomp_for *> (g), &fd, NULL); - startvar = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - endvar = find_omp_clause (OMP_CLAUSE_CHAIN (startvar), - OMP_CLAUSE__LOOPTEMP_); - startvar = OMP_CLAUSE_DECL (startvar); - endvar = OMP_CLAUSE_DECL (endvar); - step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); - if (fd.loop.cond_code == LT_EXPR) - iflags |= GOMP_TASK_FLAG_UP; - tree tclauses = gimple_omp_for_clauses (g); - num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS); - if (num_tasks) - num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); - else - { - num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE); - if (num_tasks) - { - iflags |= GOMP_TASK_FLAG_GRAINSIZE; - num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); - } - else - num_tasks = integer_zero_node; - } - num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); - if (ifc == NULL_TREE) - iflags |= GOMP_TASK_FLAG_IF; - if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP)) - iflags |= GOMP_TASK_FLAG_NOGROUP; - ull = fd.iter_type == long_long_unsigned_type_node; - } - else if (priority) - iflags |= GOMP_TASK_FLAG_PRIORITY; - - tree flags = build_int_cst (unsigned_type_node, iflags); - - tree cond = boolean_true_node; - if (ifc) - { - if (taskloop_p) - { - tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); - t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, - build_int_cst (unsigned_type_node, - GOMP_TASK_FLAG_IF), - build_int_cst (unsigned_type_node, 0)); - flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, - flags, t); - } - else - cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); - } - - if (finalc) - { - tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); - t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, - build_int_cst (unsigned_type_node, - GOMP_TASK_FLAG_FINAL), - build_int_cst (unsigned_type_node, 0)); - flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); - } - if (depend) - depend = OMP_CLAUSE_DECL (depend); - else - depend = build_int_cst (ptr_type_node, 0); - if (priority) - priority = fold_convert (integer_type_node, - OMP_CLAUSE_PRIORITY_EXPR (priority)); - else - priority = integer_zero_node; - - gsi = gsi_last_bb (bb); - tree t = gimple_omp_task_data_arg (entry_stmt); - if (t == NULL) - t2 = null_pointer_node; - else - t2 = build_fold_addr_expr_loc (loc, t); - t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); - t = gimple_omp_task_copy_fn (entry_stmt); - if (t == NULL) - t3 = null_pointer_node; - else - t3 = build_fold_addr_expr_loc (loc, t); - - if (taskloop_p) - t = build_call_expr (ull - ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) - : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), - 11, t1, t2, t3, - gimple_omp_task_arg_size (entry_stmt), - gimple_omp_task_arg_align (entry_stmt), flags, - num_tasks, priority, startvar, endvar, step); - else - t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), - 9, t1, t2, t3, - gimple_omp_task_arg_size (entry_stmt), - gimple_omp_task_arg_align (entry_stmt), cond, flags, - depend, priority); - - force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); -} - - /* If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW catch handler and return it. This prevents programs from violating the structured block semantics with throws. */ @@ -7089,7403 +5740,6 @@ maybe_catch_exception (gimple_seq body) return gimple_seq_alloc_with_stmt (g); } -/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ - -static tree -vec2chain (vec<tree, va_gc> *v) -{ - tree chain = NULL_TREE, t; - unsigned ix; - - FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) - { - DECL_CHAIN (t) = chain; - chain = t; - } - - return chain; -} - - -/* Remove barriers in REGION->EXIT's block. Note that this is only - valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region - is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that - left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be - removed. */ - -static void -remove_exit_barrier (struct omp_region *region) -{ - gimple_stmt_iterator gsi; - basic_block exit_bb; - edge_iterator ei; - edge e; - gimple *stmt; - int any_addressable_vars = -1; - - exit_bb = region->exit; - - /* If the parallel region doesn't return, we don't have REGION->EXIT - block at all. */ - if (! exit_bb) - return; - - /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The - workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of - statements that can appear in between are extremely limited -- no - memory operations at all. Here, we allow nothing at all, so the - only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ - gsi = gsi_last_bb (exit_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gsi_prev (&gsi); - if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) - return; - - FOR_EACH_EDGE (e, ei, exit_bb->preds) - { - gsi = gsi_last_bb (e->src); - if (gsi_end_p (gsi)) - continue; - stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_OMP_RETURN - && !gimple_omp_return_nowait_p (stmt)) - { - /* OpenMP 3.0 tasks unfortunately prevent this optimization - in many cases. If there could be tasks queued, the barrier - might be needed to let the tasks run before some local - variable of the parallel that the task uses as shared - runs out of scope. The task can be spawned either - from within current function (this would be easy to check) - or from some function it calls and gets passed an address - of such a variable. */ - if (any_addressable_vars < 0) - { - gomp_parallel *parallel_stmt - = as_a <gomp_parallel *> (last_stmt (region->entry)); - tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); - tree local_decls, block, decl; - unsigned ix; - - any_addressable_vars = 0; - FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) - if (TREE_ADDRESSABLE (decl)) - { - any_addressable_vars = 1; - break; - } - for (block = gimple_block (stmt); - !any_addressable_vars - && block - && TREE_CODE (block) == BLOCK; - block = BLOCK_SUPERCONTEXT (block)) - { - for (local_decls = BLOCK_VARS (block); - local_decls; - local_decls = DECL_CHAIN (local_decls)) - if (TREE_ADDRESSABLE (local_decls)) - { - any_addressable_vars = 1; - break; - } - if (block == gimple_block (parallel_stmt)) - break; - } - } - if (!any_addressable_vars) - gimple_omp_return_set_nowait (stmt); - } - } -} - -static void -remove_exit_barriers (struct omp_region *region) -{ - if (region->type == GIMPLE_OMP_PARALLEL) - remove_exit_barrier (region); - - if (region->inner) - { - region = region->inner; - remove_exit_barriers (region); - while (region->next) - { - region = region->next; - remove_exit_barriers (region); - } - } -} - -/* Optimize omp_get_thread_num () and omp_get_num_threads () - calls. These can't be declared as const functions, but - within one parallel body they are constant, so they can be - transformed there into __builtin_omp_get_{thread_num,num_threads} () - which are declared const. Similarly for task body, except - that in untied task omp_get_thread_num () can change at any task - scheduling point. */ - -static void -optimize_omp_library_calls (gimple *entry_stmt) -{ - basic_block bb; - gimple_stmt_iterator gsi; - tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); - tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); - bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK - && find_omp_clause (gimple_omp_task_clauses (entry_stmt), - OMP_CLAUSE_UNTIED) != NULL); - - FOR_EACH_BB_FN (bb, cfun) - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *call = gsi_stmt (gsi); - tree decl; - - if (is_gimple_call (call) - && (decl = gimple_call_fndecl (call)) - && DECL_EXTERNAL (decl) - && TREE_PUBLIC (decl) - && DECL_INITIAL (decl) == NULL) - { - tree built_in; - - if (DECL_NAME (decl) == thr_num_id) - { - /* In #pragma omp task untied omp_get_thread_num () can change - during the execution of the task region. */ - if (untied_task) - continue; - built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - } - else if (DECL_NAME (decl) == num_thr_id) - built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - else - continue; - - if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) - || gimple_call_num_args (call) != 0) - continue; - - if (flag_exceptions && !TREE_NOTHROW (decl)) - continue; - - if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE - || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), - TREE_TYPE (TREE_TYPE (built_in)))) - continue; - - gimple_call_set_fndecl (call, built_in); - } - } -} - -/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be - regimplified. */ - -static tree -expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) -{ - tree t = *tp; - - /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ - if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) - return t; - - if (TREE_CODE (t) == ADDR_EXPR) - recompute_tree_invariant_for_addr_expr (t); - - *walk_subtrees = !TYPE_P (t) && !DECL_P (t); - return NULL_TREE; -} - -/* Prepend or append TO = FROM assignment before or after *GSI_P. */ - -static void -expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, - bool after) -{ - bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); - from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, - !after, after ? GSI_CONTINUE_LINKING - : GSI_SAME_STMT); - gimple *stmt = gimple_build_assign (to, from); - if (after) - gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); - else - gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); - if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) - || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) - { - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); - gimple_regimplify_operands (stmt, &gsi); - } -} - -/* Expand the OpenMP parallel or task directive starting at REGION. */ - -static void -expand_omp_taskreg (struct omp_region *region) -{ - basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun; - tree child_fn, block, t; - gimple_stmt_iterator gsi; - gimple *entry_stmt, *stmt; - edge e; - vec<tree, va_gc> *ws_args; - - entry_stmt = last_stmt (region->entry); - child_fn = gimple_omp_taskreg_child_fn (entry_stmt); - child_cfun = DECL_STRUCT_FUNCTION (child_fn); - - entry_bb = region->entry; - if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) - exit_bb = region->cont; - else - exit_bb = region->exit; - - bool is_cilk_for - = (flag_cilkplus - && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL - && find_omp_clause (gimple_omp_parallel_clauses (entry_stmt), - OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE); - - if (is_cilk_for) - /* If it is a _Cilk_for statement, it is modelled *like* a parallel for, - and the inner statement contains the name of the built-in function - and grain. */ - ws_args = region->inner->ws_args; - else if (is_combined_parallel (region)) - ws_args = region->ws_args; - else - ws_args = NULL; - - if (child_cfun->cfg) - { - /* Due to inlining, it may happen that we have already outlined - the region, in which case all we need to do is make the - sub-graph unreachable and emit the parallel call. */ - edge entry_succ_e, exit_succ_e; - - entry_succ_e = single_succ_edge (entry_bb); - - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL - || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); - gsi_remove (&gsi, true); - - new_bb = entry_bb; - if (exit_bb) - { - exit_succ_e = single_succ_edge (exit_bb); - make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); - } - remove_edge_and_dominated_blocks (entry_succ_e); - } - else - { - unsigned srcidx, dstidx, num; - - /* If the parallel region needs data sent from the parent - function, then the very first statement (except possible - tree profile counter updates) of the parallel body - is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since - &.OMP_DATA_O is passed as an argument to the child function, - we need to replace it with the argument as seen by the child - function. - - In most cases, this will end up being the identity assignment - .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had - a function call that has been inlined, the original PARM_DECL - .OMP_DATA_I may have been converted into a different local - variable. In which case, we need to keep the assignment. */ - if (gimple_omp_taskreg_data_arg (entry_stmt)) - { - basic_block entry_succ_bb - = single_succ_p (entry_bb) ? single_succ (entry_bb) - : FALLTHRU_EDGE (entry_bb)->dest; - tree arg; - gimple *parcopy_stmt = NULL; - - for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) - { - gimple *stmt; - - gcc_assert (!gsi_end_p (gsi)); - stmt = gsi_stmt (gsi); - if (gimple_code (stmt) != GIMPLE_ASSIGN) - continue; - - if (gimple_num_ops (stmt) == 2) - { - tree arg = gimple_assign_rhs1 (stmt); - - /* We're ignore the subcode because we're - effectively doing a STRIP_NOPS. */ - - if (TREE_CODE (arg) == ADDR_EXPR - && TREE_OPERAND (arg, 0) - == gimple_omp_taskreg_data_arg (entry_stmt)) - { - parcopy_stmt = stmt; - break; - } - } - } - - gcc_assert (parcopy_stmt != NULL); - arg = DECL_ARGUMENTS (child_fn); - - if (!gimple_in_ssa_p (cfun)) - { - if (gimple_assign_lhs (parcopy_stmt) == arg) - gsi_remove (&gsi, true); - else - { - /* ?? Is setting the subcode really necessary ?? */ - gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); - gimple_assign_set_rhs1 (parcopy_stmt, arg); - } - } - else - { - tree lhs = gimple_assign_lhs (parcopy_stmt); - gcc_assert (SSA_NAME_VAR (lhs) == arg); - /* We'd like to set the rhs to the default def in the child_fn, - but it's too early to create ssa names in the child_fn. - Instead, we set the rhs to the parm. In - move_sese_region_to_fn, we introduce a default def for the - parm, map the parm to it's default def, and once we encounter - this stmt, replace the parm with the default def. */ - gimple_assign_set_rhs1 (parcopy_stmt, arg); - update_stmt (parcopy_stmt); - } - } - - /* Declare local variables needed in CHILD_CFUN. */ - block = DECL_INITIAL (child_fn); - BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); - /* The gimplifier could record temporaries in parallel/task block - rather than in containing function's local_decls chain, - which would mean cgraph missed finalizing them. Do it now. */ - for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) - if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) - varpool_node::finalize_decl (t); - DECL_SAVED_TREE (child_fn) = NULL; - /* We'll create a CFG for child_fn, so no gimple body is needed. */ - gimple_set_body (child_fn, NULL); - TREE_USED (block) = 1; - - /* Reset DECL_CONTEXT on function arguments. */ - for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) - DECL_CONTEXT (t) = child_fn; - - /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, - so that it can be moved to the child function. */ - gsi = gsi_last_bb (entry_bb); - stmt = gsi_stmt (gsi); - gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL - || gimple_code (stmt) == GIMPLE_OMP_TASK)); - e = split_block (entry_bb, stmt); - gsi_remove (&gsi, true); - entry_bb = e->dest; - edge e2 = NULL; - if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - else - { - e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); - gcc_assert (e2->dest == region->exit); - remove_edge (BRANCH_EDGE (entry_bb)); - set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); - gsi = gsi_last_bb (region->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gsi_remove (&gsi, true); - } - - /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ - if (exit_bb) - { - gsi = gsi_last_bb (exit_bb); - gcc_assert (!gsi_end_p (gsi) - && (gimple_code (gsi_stmt (gsi)) - == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); - stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - } - - /* Move the parallel region into CHILD_CFUN. */ - - if (gimple_in_ssa_p (cfun)) - { - init_tree_ssa (child_cfun); - init_ssa_operands (child_cfun); - child_cfun->gimple_df->in_ssa_p = true; - block = NULL_TREE; - } - else - block = gimple_block (entry_stmt); - - new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); - if (exit_bb) - single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; - if (e2) - { - basic_block dest_bb = e2->dest; - if (!exit_bb) - make_edge (new_bb, dest_bb, EDGE_FALLTHRU); - remove_edge (e2); - set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); - } - /* When the OMP expansion process cannot guarantee an up-to-date - loop tree arrange for the child function to fixup loops. */ - if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; - - /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ - num = vec_safe_length (child_cfun->local_decls); - for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) - { - t = (*child_cfun->local_decls)[srcidx]; - if (DECL_CONTEXT (t) == cfun->decl) - continue; - if (srcidx != dstidx) - (*child_cfun->local_decls)[dstidx] = t; - dstidx++; - } - if (dstidx != num) - vec_safe_truncate (child_cfun->local_decls, dstidx); - - /* Inform the callgraph about the new function. */ - child_cfun->curr_properties = cfun->curr_properties; - child_cfun->has_simduid_loops |= cfun->has_simduid_loops; - child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; - cgraph_node *node = cgraph_node::get_create (child_fn); - node->parallelized_function = 1; - cgraph_node::add_new_function (child_fn, true); - - bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) - && !DECL_ASSEMBLER_NAME_SET_P (child_fn); - - /* Fix the callgraph edges for child_cfun. Those for cfun will be - fixed in a following pass. */ - push_cfun (child_cfun); - if (need_asm) - assign_assembler_name_if_neeeded (child_fn); - - if (optimize) - optimize_omp_library_calls (entry_stmt); - cgraph_edge::rebuild_edges (); - - /* Some EH regions might become dead, see PR34608. If - pass_cleanup_cfg isn't the first pass to happen with the - new child, these dead EH edges might cause problems. - Clean them up now. */ - if (flag_exceptions) - { - basic_block bb; - bool changed = false; - - FOR_EACH_BB_FN (bb, cfun) - changed |= gimple_purge_dead_eh_edges (bb); - if (changed) - cleanup_tree_cfg (); - } - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa); - if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - verify_loop_structure (); - pop_cfun (); - - if (dump_file && !gimple_in_ssa_p (cfun)) - { - omp_any_child_fn_dumped = true; - dump_function_header (dump_file, child_fn, dump_flags); - dump_function_to_file (child_fn, dump_file, dump_flags); - } - } - - /* Emit a library call to launch the children threads. */ - if (is_cilk_for) - expand_cilk_for_call (new_bb, - as_a <gomp_parallel *> (entry_stmt), ws_args); - else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) - expand_parallel_call (region, new_bb, - as_a <gomp_parallel *> (entry_stmt), ws_args); - else - expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_only_virtuals); -} - -/* Information about members of an OpenACC collapsed loop nest. */ - -struct oacc_collapse -{ - tree base; /* Base value. */ - tree iters; /* Number of steps. */ - tree step; /* step size. */ -}; - -/* Helper for expand_oacc_for. Determine collapsed loop information. - Fill in COUNTS array. Emit any initialization code before GSI. - Return the calculated outer loop bound of BOUND_TYPE. */ - -static tree -expand_oacc_collapse_init (const struct omp_for_data *fd, - gimple_stmt_iterator *gsi, - oacc_collapse *counts, tree bound_type) -{ - tree total = build_int_cst (bound_type, 1); - int ix; - - gcc_assert (integer_onep (fd->loop.step)); - gcc_assert (integer_zerop (fd->loop.n1)); - - for (ix = 0; ix != fd->collapse; ix++) - { - const omp_for_data_loop *loop = &fd->loops[ix]; - - tree iter_type = TREE_TYPE (loop->v); - tree diff_type = iter_type; - tree plus_type = iter_type; - - gcc_assert (loop->cond_code == fd->loop.cond_code); - - if (POINTER_TYPE_P (iter_type)) - plus_type = sizetype; - if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) - diff_type = signed_type_for (diff_type); - - tree b = loop->n1; - tree e = loop->n2; - tree s = loop->step; - bool up = loop->cond_code == LT_EXPR; - tree dir = build_int_cst (diff_type, up ? +1 : -1); - bool negating; - tree expr; - - b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, - true, GSI_SAME_STMT); - e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, - true, GSI_SAME_STMT); - - /* Convert the step, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); - if (negating) - s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); - s = fold_convert (diff_type, s); - if (negating) - s = fold_build1 (NEGATE_EXPR, diff_type, s); - s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, - true, GSI_SAME_STMT); - - /* Determine the range, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (iter_type); - expr = fold_build2 (MINUS_EXPR, plus_type, - fold_convert (plus_type, negating ? b : e), - fold_convert (plus_type, negating ? e : b)); - expr = fold_convert (diff_type, expr); - if (negating) - expr = fold_build1 (NEGATE_EXPR, diff_type, expr); - tree range = force_gimple_operand_gsi - (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); - - /* Determine number of iterations. */ - expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); - expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); - expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); - - tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, - true, GSI_SAME_STMT); - - counts[ix].base = b; - counts[ix].iters = iters; - counts[ix].step = s; - - total = fold_build2 (MULT_EXPR, bound_type, total, - fold_convert (bound_type, iters)); - } - - return total; -} - -/* Emit initializers for collapsed loop members. IVAR is the outer - loop iteration variable, from which collapsed loop iteration values - are calculated. COUNTS array has been initialized by - expand_oacc_collapse_inits. */ - -static void -expand_oacc_collapse_vars (const struct omp_for_data *fd, - gimple_stmt_iterator *gsi, - const oacc_collapse *counts, tree ivar) -{ - tree ivar_type = TREE_TYPE (ivar); - - /* The most rapidly changing iteration variable is the innermost - one. */ - for (int ix = fd->collapse; ix--;) - { - const omp_for_data_loop *loop = &fd->loops[ix]; - const oacc_collapse *collapse = &counts[ix]; - tree iter_type = TREE_TYPE (loop->v); - tree diff_type = TREE_TYPE (collapse->step); - tree plus_type = iter_type; - enum tree_code plus_code = PLUS_EXPR; - tree expr; - - if (POINTER_TYPE_P (iter_type)) - { - plus_code = POINTER_PLUS_EXPR; - plus_type = sizetype; - } - - expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar, - fold_convert (ivar_type, collapse->iters)); - expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), - collapse->step); - expr = fold_build2 (plus_code, iter_type, collapse->base, - fold_convert (plus_type, expr)); - expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - gassign *ass = gimple_build_assign (loop->v, expr); - gsi_insert_before (gsi, ass, GSI_SAME_STMT); - - if (ix) - { - expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar, - fold_convert (ivar_type, collapse->iters)); - ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, - true, GSI_SAME_STMT); - } - } -} - - -/* Helper function for expand_omp_{for_*,simd}. If this is the outermost - of the combined collapse > 1 loop constructs, generate code like: - if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: - count = 0; - and set ZERO_ITER_BB to that bb. If this isn't the outermost - of the combined loop constructs, just initialize COUNTS array - from the _looptemp_ clauses. */ - -/* NOTE: It *could* be better to moosh all of the BBs together, - creating one larger BB with all the computation and the unexpected - jump at the end. I.e. - - bool zero3, zero2, zero1, zero; - - zero3 = N32 c3 N31; - count3 = (N32 - N31) /[cl] STEP3; - zero2 = N22 c2 N21; - count2 = (N22 - N21) /[cl] STEP2; - zero1 = N12 c1 N11; - count1 = (N12 - N11) /[cl] STEP1; - zero = zero3 || zero2 || zero1; - count = count1 * count2 * count3; - if (__builtin_expect(zero, false)) goto zero_iter_bb; - - After all, we expect the zero=false, and thus we expect to have to - evaluate all of the comparison expressions, so short-circuiting - oughtn't be a win. Since the condition isn't protecting a - denominator, we're not concerned about divide-by-zero, so we can - fully evaluate count even if a numerator turned out to be wrong. - - It seems like putting this all together would create much better - scheduling opportunities, and less pressure on the chip's branch - predictor. */ - -static void -expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, - basic_block &entry_bb, tree *counts, - basic_block &zero_iter1_bb, int &first_zero_iter1, - basic_block &zero_iter2_bb, int &first_zero_iter2, - basic_block &l2_dom_bb) -{ - tree t, type = TREE_TYPE (fd->loop.v); - edge e, ne; - int i; - - /* Collapsed loops need work for expansion into SSA form. */ - gcc_assert (!gimple_in_ssa_p (cfun)); - - if (gimple_omp_for_combined_into_p (fd->for_stmt) - && TREE_CODE (fd->loop.n2) != INTEGER_CST) - { - gcc_assert (fd->ordered == 0); - /* First two _looptemp_ clauses are for istart/iend, counts[0] - isn't supposed to be handled, as the inner loop doesn't - use it. */ - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - for (i = 0; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - if (i) - counts[i] = OMP_CLAUSE_DECL (innerc); - else - counts[0] = NULL_TREE; - } - return; - } - - for (i = fd->collapse; i < fd->ordered; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - counts[i] = NULL_TREE; - t = fold_binary (fd->loops[i].cond_code, boolean_type_node, - fold_convert (itype, fd->loops[i].n1), - fold_convert (itype, fd->loops[i].n2)); - if (t && integer_zerop (t)) - { - for (i = fd->collapse; i < fd->ordered; i++) - counts[i] = build_int_cst (type, 0); - break; - } - } - for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - - if (i >= fd->collapse && counts[i]) - continue; - if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) - && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, - fold_convert (itype, fd->loops[i].n1), - fold_convert (itype, fd->loops[i].n2))) - == NULL_TREE || !integer_onep (t))) - { - gcond *cond_stmt; - tree n1, n2; - n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); - n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, - true, GSI_SAME_STMT); - n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); - n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, - true, GSI_SAME_STMT); - cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL)) - { - *gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, gsi); - } - e = split_block (entry_bb, cond_stmt); - basic_block &zero_iter_bb - = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; - int &first_zero_iter - = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; - if (zero_iter_bb == NULL) - { - gassign *assign_stmt; - first_zero_iter = i; - zero_iter_bb = create_empty_bb (entry_bb); - add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); - *gsi = gsi_after_labels (zero_iter_bb); - if (i < fd->collapse) - assign_stmt = gimple_build_assign (fd->loop.n2, - build_zero_cst (type)); - else - { - counts[i] = create_tmp_reg (type, ".count"); - assign_stmt - = gimple_build_assign (counts[i], build_zero_cst (type)); - } - gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); - set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, - entry_bb); - } - ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); - ne->probability = REG_BR_PROB_BASE / 2000 - 1; - e->flags = EDGE_TRUE_VALUE; - e->probability = REG_BR_PROB_BASE - ne->probability; - if (l2_dom_bb == NULL) - l2_dom_bb = entry_bb; - entry_bb = e->dest; - *gsi = gsi_last_bb (entry_bb); - } - - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR - ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, - fold_convert (itype, fd->loops[i].step), t); - t = fold_build2 (PLUS_EXPR, itype, t, - fold_convert (itype, fd->loops[i].n2)); - t = fold_build2 (MINUS_EXPR, itype, t, - fold_convert (itype, fd->loops[i].n1)); - /* ?? We could probably use CEIL_DIV_EXPR instead of - TRUNC_DIV_EXPR and adjusting by hand. Unless we can't - generate the same code in the end because generically we - don't know that the values involved must be negative for - GT?? */ - if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, - fold_convert (itype, - fd->loops[i].step))); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, - fold_convert (itype, fd->loops[i].step)); - t = fold_convert (type, t); - if (TREE_CODE (t) == INTEGER_CST) - counts[i] = t; - else - { - if (i < fd->collapse || i != first_zero_iter2) - counts[i] = create_tmp_reg (type, ".count"); - expand_omp_build_assign (gsi, counts[i], t); - } - if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) - { - if (i == 0) - t = counts[0]; - else - t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); - expand_omp_build_assign (gsi, fd->loop.n2, t); - } - } -} - - -/* Helper function for expand_omp_{for_*,simd}. Generate code like: - T = V; - V3 = N31 + (T % count3) * STEP3; - T = T / count3; - V2 = N21 + (T % count2) * STEP2; - T = T / count2; - V1 = N11 + T * STEP1; - if this loop doesn't have an inner loop construct combined with it. - If it does have an inner loop construct combined with it and the - iteration count isn't known constant, store values from counts array - into its _looptemp_ temporaries instead. */ - -static void -expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, - tree *counts, gimple *inner_stmt, tree startvar) -{ - int i; - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - /* If fd->loop.n2 is constant, then no propagation of the counts - is needed, they are constant. */ - if (TREE_CODE (fd->loop.n2) == INTEGER_CST) - return; - - tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR - ? gimple_omp_taskreg_clauses (inner_stmt) - : gimple_omp_for_clauses (inner_stmt); - /* First two _looptemp_ clauses are for istart/iend, counts[0] - isn't supposed to be handled, as the inner loop doesn't - use it. */ - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - for (i = 0; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - if (i) - { - tree tem = OMP_CLAUSE_DECL (innerc); - tree t = fold_convert (TREE_TYPE (tem), counts[i]); - t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, - false, GSI_CONTINUE_LINKING); - gassign *stmt = gimple_build_assign (tem, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - } - } - return; - } - - tree type = TREE_TYPE (fd->loop.v); - tree tem = create_tmp_reg (type, ".tem"); - gassign *stmt = gimple_build_assign (tem, startvar); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - - for (i = fd->collapse - 1; i >= 0; i--) - { - tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; - itype = vtype; - if (POINTER_TYPE_P (vtype)) - itype = signed_type_for (vtype); - if (i != 0) - t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); - else - t = tem; - t = fold_convert (itype, t); - t = fold_build2 (MULT_EXPR, itype, t, - fold_convert (itype, fd->loops[i].step)); - if (POINTER_TYPE_P (vtype)) - t = fold_build_pointer_plus (fd->loops[i].n1, t); - else - t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); - t = force_gimple_operand_gsi (gsi, t, - DECL_P (fd->loops[i].v) - && TREE_ADDRESSABLE (fd->loops[i].v), - NULL_TREE, false, - GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i].v, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - if (i != 0) - { - t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); - t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, - false, GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (tem, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - } - } -} - - -/* Helper function for expand_omp_for_*. Generate code like: - L10: - V3 += STEP3; - if (V3 cond3 N32) goto BODY_BB; else goto L11; - L11: - V3 = N31; - V2 += STEP2; - if (V2 cond2 N22) goto BODY_BB; else goto L12; - L12: - V2 = N21; - V1 += STEP1; - goto BODY_BB; */ - -static basic_block -extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, - basic_block body_bb) -{ - basic_block last_bb, bb, collapse_bb = NULL; - int i; - gimple_stmt_iterator gsi; - edge e; - tree t; - gimple *stmt; - - last_bb = cont_bb; - for (i = fd->collapse - 1; i >= 0; i--) - { - tree vtype = TREE_TYPE (fd->loops[i].v); - - bb = create_empty_bb (last_bb); - add_bb_to_loop (bb, last_bb->loop_father); - gsi = gsi_start_bb (bb); - - if (i < fd->collapse - 1) - { - e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); - e->probability = REG_BR_PROB_BASE / 8; - - t = fd->loops[i + 1].n1; - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (fd->loops[i + 1].v) - && TREE_ADDRESSABLE (fd->loops[i - + 1].v), - NULL_TREE, false, - GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i + 1].v, t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - } - else - collapse_bb = bb; - - set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); - - if (POINTER_TYPE_P (vtype)) - t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); - else - t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (fd->loops[i].v) - && TREE_ADDRESSABLE (fd->loops[i].v), - NULL_TREE, false, GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i].v, t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - if (i > 0) - { - t = fd->loops[i].n2; - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - tree v = fd->loops[i].v; - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); - stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); - e->probability = REG_BR_PROB_BASE * 7 / 8; - } - else - make_edge (bb, body_bb, EDGE_FALLTHRU); - last_bb = bb; - } - - return collapse_bb; -} - - -/* Expand #pragma omp ordered depend(source). */ - -static void -expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, - tree *counts, location_t loc) -{ - enum built_in_function source_ix - = fd->iter_type == long_integer_type_node - ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; - gimple *g - = gimple_build_call (builtin_decl_explicit (source_ix), 1, - build_fold_addr_expr (counts[fd->ordered])); - gimple_set_location (g, loc); - gsi_insert_before (gsi, g, GSI_SAME_STMT); -} - -/* Expand a single depend from #pragma omp ordered depend(sink:...). */ - -static void -expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, - tree *counts, tree c, location_t loc) -{ - auto_vec<tree, 10> args; - enum built_in_function sink_ix - = fd->iter_type == long_integer_type_node - ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; - tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; - int i; - gimple_stmt_iterator gsi2 = *gsi; - bool warned_step = false; - - for (i = 0; i < fd->ordered; i++) - { - tree step = NULL_TREE; - off = TREE_PURPOSE (deps); - if (TREE_CODE (off) == TRUNC_DIV_EXPR) - { - step = TREE_OPERAND (off, 1); - off = TREE_OPERAND (off, 0); - } - if (!integer_zerop (off)) - { - gcc_assert (fd->loops[i].cond_code == LT_EXPR - || fd->loops[i].cond_code == GT_EXPR); - bool forward = fd->loops[i].cond_code == LT_EXPR; - if (step) - { - /* Non-simple Fortran DO loops. If step is variable, - we don't know at compile even the direction, so can't - warn. */ - if (TREE_CODE (step) != INTEGER_CST) - break; - forward = tree_int_cst_sgn (step) != -1; - } - if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - warning_at (loc, 0, "%<depend(sink)%> clause waiting for " - "lexically later iteration"); - break; - } - deps = TREE_CHAIN (deps); - } - /* If all offsets corresponding to the collapsed loops are zero, - this depend clause can be ignored. FIXME: but there is still a - flush needed. We need to emit one __sync_synchronize () for it - though (perhaps conditionally)? Solve this together with the - conservative dependence folding optimization. - if (i >= fd->collapse) - return; */ - - deps = OMP_CLAUSE_DECL (c); - gsi_prev (&gsi2); - edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); - edge e2 = split_block_after_labels (e1->dest); - - gsi2 = gsi_after_labels (e1->dest); - *gsi = gsi_last_bb (e1->src); - for (i = 0; i < fd->ordered; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - tree step = NULL_TREE; - tree orig_off = NULL_TREE; - if (POINTER_TYPE_P (itype)) - itype = sizetype; - if (i) - deps = TREE_CHAIN (deps); - off = TREE_PURPOSE (deps); - if (TREE_CODE (off) == TRUNC_DIV_EXPR) - { - step = TREE_OPERAND (off, 1); - off = TREE_OPERAND (off, 0); - gcc_assert (fd->loops[i].cond_code == LT_EXPR - && integer_onep (fd->loops[i].step) - && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); - } - tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); - if (step) - { - off = fold_convert_loc (loc, itype, off); - orig_off = off; - off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); - } - - if (integer_zerop (off)) - t = boolean_true_node; - else - { - tree a; - tree co = fold_convert_loc (loc, itype, off); - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) - { - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); - a = fold_build2_loc (loc, POINTER_PLUS_EXPR, - TREE_TYPE (fd->loops[i].v), fd->loops[i].v, - co); - } - else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, co); - else - a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, co); - if (step) - { - tree t1, t2; - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - else - t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - else - t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, - step, build_int_cst (TREE_TYPE (step), 0)); - if (TREE_CODE (step) != INTEGER_CST) - { - t1 = unshare_expr (t1); - t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t2 = unshare_expr (t2); - t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - } - t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, - t, t2, t1); - } - else if (fd->loops[i].cond_code == LT_EXPR) - { - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - else - t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - } - else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - else - t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - } - if (cond) - cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); - else - cond = t; - - off = fold_convert_loc (loc, itype, off); - - if (step - || (fd->loops[i].cond_code == LT_EXPR - ? !integer_onep (fd->loops[i].step) - : !integer_minus_onep (fd->loops[i].step))) - { - if (step == NULL_TREE - && TYPE_UNSIGNED (itype) - && fd->loops[i].cond_code == GT_EXPR) - t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, - fold_build1_loc (loc, NEGATE_EXPR, itype, - s)); - else - t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, - orig_off ? orig_off : off, s); - t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, - build_int_cst (itype, 0)); - if (integer_zerop (t) && !warned_step) - { - warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " - "in the iteration space"); - warned_step = true; - } - cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, - cond, t); - } - - if (i <= fd->collapse - 1 && fd->collapse > 1) - t = fd->loop.v; - else if (counts[i]) - t = counts[i]; - else - { - t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, fd->loops[i].n1); - t = fold_convert_loc (loc, fd->iter_type, t); - } - if (step) - /* We have divided off by step already earlier. */; - else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) - off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, - fold_build1_loc (loc, NEGATE_EXPR, itype, - s)); - else - off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); - off = fold_convert_loc (loc, fd->iter_type, off); - if (i <= fd->collapse - 1 && fd->collapse > 1) - { - if (i) - off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, - off); - if (i < fd->collapse - 1) - { - coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, - counts[i]); - continue; - } - } - off = unshare_expr (off); - t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); - t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, - true, GSI_SAME_STMT); - args.safe_push (t); - } - gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); - gimple_set_location (g, loc); - gsi_insert_before (&gsi2, g, GSI_SAME_STMT); - - cond = unshare_expr (cond); - cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, - GSI_CONTINUE_LINKING); - gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); - edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); - e3->probability = REG_BR_PROB_BASE / 8; - e1->probability = REG_BR_PROB_BASE - e3->probability; - e1->flags = EDGE_TRUE_VALUE; - set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); - - *gsi = gsi_after_labels (e2->dest); -} - -/* Expand all #pragma omp ordered depend(source) and - #pragma omp ordered depend(sink:...) constructs in the current - #pragma omp for ordered(n) region. */ - -static void -expand_omp_ordered_source_sink (struct omp_region *region, - struct omp_for_data *fd, tree *counts, - basic_block cont_bb) -{ - struct omp_region *inner; - int i; - for (i = fd->collapse - 1; i < fd->ordered; i++) - if (i == fd->collapse - 1 && fd->collapse > 1) - counts[i] = NULL_TREE; - else if (i >= fd->collapse && !cont_bb) - counts[i] = build_zero_cst (fd->iter_type); - else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) - && integer_onep (fd->loops[i].step)) - counts[i] = NULL_TREE; - else - counts[i] = create_tmp_var (fd->iter_type, ".orditer"); - tree atype - = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); - counts[fd->ordered] = create_tmp_var (atype, ".orditera"); - TREE_ADDRESSABLE (counts[fd->ordered]) = 1; - - for (inner = region->inner; inner; inner = inner->next) - if (inner->type == GIMPLE_OMP_ORDERED) - { - gomp_ordered *ord_stmt = inner->ord_stmt; - gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); - location_t loc = gimple_location (ord_stmt); - tree c; - for (c = gimple_omp_ordered_clauses (ord_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) - break; - if (c) - expand_omp_ordered_source (&gsi, fd, counts, loc); - for (c = gimple_omp_ordered_clauses (ord_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) - expand_omp_ordered_sink (&gsi, fd, counts, c, loc); - gsi_remove (&gsi, true); - } -} - -/* Wrap the body into fd->ordered - fd->collapse loops that aren't - collapsed. */ - -static basic_block -expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, - basic_block cont_bb, basic_block body_bb, - bool ordered_lastprivate) -{ - if (fd->ordered == fd->collapse) - return cont_bb; - - if (!cont_bb) - { - gimple_stmt_iterator gsi = gsi_after_labels (body_bb); - for (int i = fd->collapse; i < fd->ordered; i++) - { - tree type = TREE_TYPE (fd->loops[i].v); - tree n1 = fold_convert (type, fd->loops[i].n1); - expand_omp_build_assign (&gsi, fd->loops[i].v, n1); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_int (i - fd->collapse + 1), - NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); - } - return NULL; - } - - for (int i = fd->ordered - 1; i >= fd->collapse; i--) - { - tree t, type = TREE_TYPE (fd->loops[i].v); - gimple_stmt_iterator gsi = gsi_after_labels (body_bb); - expand_omp_build_assign (&gsi, fd->loops[i].v, - fold_convert (type, fd->loops[i].n1)); - if (counts[i]) - expand_omp_build_assign (&gsi, counts[i], - build_zero_cst (fd->iter_type)); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_int (i - fd->collapse + 1), - NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); - if (!gsi_end_p (gsi)) - gsi_prev (&gsi); - else - gsi = gsi_last_bb (body_bb); - edge e1 = split_block (body_bb, gsi_stmt (gsi)); - basic_block new_body = e1->dest; - if (body_bb == cont_bb) - cont_bb = new_body; - edge e2 = NULL; - basic_block new_header; - if (EDGE_COUNT (cont_bb->preds) > 0) - { - gsi = gsi_last_bb (cont_bb); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loops[i].v, - fold_convert (sizetype, - fd->loops[i].step)); - else - t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, - fold_convert (type, fd->loops[i].step)); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - if (counts[i]) - { - t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], - build_int_cst (fd->iter_type, 1)); - expand_omp_build_assign (&gsi, counts[i], t); - t = counts[i]; - } - else - { - t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, fd->loops[i].n1); - t = fold_convert (fd->iter_type, t); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - } - aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_int (i - fd->collapse + 1), - NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, t); - gsi_prev (&gsi); - e2 = split_block (cont_bb, gsi_stmt (gsi)); - new_header = e2->dest; - } - else - new_header = cont_bb; - gsi = gsi_after_labels (new_header); - tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, - true, GSI_SAME_STMT); - tree n2 - = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), - true, NULL_TREE, true, GSI_SAME_STMT); - t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); - edge e3 = split_block (new_header, gsi_stmt (gsi)); - cont_bb = e3->dest; - remove_edge (e1); - make_edge (body_bb, new_header, EDGE_FALLTHRU); - e3->flags = EDGE_FALSE_VALUE; - e3->probability = REG_BR_PROB_BASE / 8; - e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); - e1->probability = REG_BR_PROB_BASE - e3->probability; - - set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); - set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); - - if (e2) - { - struct loop *loop = alloc_loop (); - loop->header = new_header; - loop->latch = e2->src; - add_loop (loop, body_bb->loop_father); - } - } - - /* If there are any lastprivate clauses and it is possible some loops - might have zero iterations, ensure all the decls are initialized, - otherwise we could crash evaluating C++ class iterators with lastprivate - clauses. */ - bool need_inits = false; - for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) - if (need_inits) - { - tree type = TREE_TYPE (fd->loops[i].v); - gimple_stmt_iterator gsi = gsi_after_labels (body_bb); - expand_omp_build_assign (&gsi, fd->loops[i].v, - fold_convert (type, fd->loops[i].n1)); - } - else - { - tree type = TREE_TYPE (fd->loops[i].v); - tree this_cond = fold_build2 (fd->loops[i].cond_code, - boolean_type_node, - fold_convert (type, fd->loops[i].n1), - fold_convert (type, fd->loops[i].n2)); - if (!integer_onep (this_cond)) - need_inits = true; - } - - return cont_bb; -} - - -/* A subroutine of expand_omp_for. Generate code for a parallel - loop with any schedule. Given parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); - if (more) goto L0; else goto L3; - L0: - V = istart0; - iend = iend0; - L1: - BODY; - V += STEP; - if (V cond iend) goto L1; else goto L2; - L2: - if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; - L3: - - If this is a combined omp parallel loop, instead of the call to - GOMP_loop_foo_start, we call GOMP_loop_foo_next. - If this is gimple_omp_for_combined_p loop, then instead of assigning - V and iend in L0 we assign the first two _looptemp_ clause decls of the - inner GIMPLE_OMP_FOR and V += STEP; and - if (V cond iend) goto L1; else goto L2; are removed. - - For collapsed loops, given parameters: - collapse(3) - for (V1 = N11; V1 cond1 N12; V1 += STEP1) - for (V2 = N21; V2 cond2 N22; V2 += STEP2) - for (V3 = N31; V3 cond3 N32; V3 += STEP3) - BODY; - - we generate pseudocode - - if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - goto Z1; - Z0: - count = 0; - Z1: - more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); - if (more) goto L0; else goto L3; - L0: - V = istart0; - T = V; - V3 = N31 + (T % count3) * STEP3; - T = T / count3; - V2 = N21 + (T % count2) * STEP2; - T = T / count2; - V1 = N11 + T * STEP1; - iend = iend0; - L1: - BODY; - V += 1; - if (V < iend) goto L10; else goto L2; - L10: - V3 += STEP3; - if (V3 cond3 N32) goto L1; else goto L11; - L11: - V3 = N31; - V2 += STEP2; - if (V2 cond2 N22) goto L1; else goto L12; - L12: - V2 = N21; - V1 += STEP1; - goto L1; - L2: - if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; - L3: - - */ - -static void -expand_omp_for_generic (struct omp_region *region, - struct omp_for_data *fd, - enum built_in_function start_fn, - enum built_in_function next_fn, - gimple *inner_stmt) -{ - tree type, istart0, iend0, iend; - tree t, vmain, vback, bias = NULL_TREE; - basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; - basic_block l2_bb = NULL, l3_bb = NULL; - gimple_stmt_iterator gsi; - gassign *assign_stmt; - bool in_combined_parallel = is_combined_parallel (region); - bool broken_loop = region->cont == NULL; - edge e, ne; - tree *counts = NULL; - int i; - bool ordered_lastprivate = false; - - gcc_assert (!broken_loop || !in_combined_parallel); - gcc_assert (fd->iter_type == long_integer_type_node - || !in_combined_parallel); - - entry_bb = region->entry; - cont_bb = region->cont; - collapse_bb = NULL; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (broken_loop - || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); - l1_bb = single_succ (l0_bb); - if (!broken_loop) - { - l2_bb = create_empty_bb (cont_bb); - gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb - || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest - == l1_bb)); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - } - else - l2_bb = NULL; - l3_bb = BRANCH_EDGE (entry_bb)->dest; - exit_bb = region->exit; - - gsi = gsi_last_bb (entry_bb); - - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - if (fd->ordered - && find_omp_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), - OMP_CLAUSE_LASTPRIVATE)) - ordered_lastprivate = false; - if (fd->collapse > 1 || fd->ordered) - { - int first_zero_iter1 = -1, first_zero_iter2 = -1; - basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; - - counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - zero_iter1_bb, first_zero_iter1, - zero_iter2_bb, first_zero_iter2, l2_dom_bb); - - if (zero_iter1_bb) - { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter1; - i < (fd->ordered ? fd->ordered : fd->collapse); i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; - gsi_prev (&gsi); - e = split_block (entry_bb, gsi_stmt (gsi)); - entry_bb = e->dest; - make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); - gsi = gsi_last_bb (entry_bb); - set_immediate_dominator (CDI_DOMINATORS, entry_bb, - get_immediate_dominator (CDI_DOMINATORS, - zero_iter1_bb)); - } - if (zero_iter2_bb) - { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter2; i < fd->ordered; i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; - if (zero_iter1_bb) - make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); - else - { - gsi_prev (&gsi); - e = split_block (entry_bb, gsi_stmt (gsi)); - entry_bb = e->dest; - make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); - gsi = gsi_last_bb (entry_bb); - set_immediate_dominator (CDI_DOMINATORS, entry_bb, - get_immediate_dominator - (CDI_DOMINATORS, zero_iter2_bb)); - } - } - if (fd->collapse == 1) - { - counts[0] = fd->loop.n2; - fd->loop = fd->loops[0]; - } - } - - type = TREE_TYPE (fd->loop.v); - istart0 = create_tmp_var (fd->iter_type, ".istart0"); - iend0 = create_tmp_var (fd->iter_type, ".iend0"); - TREE_ADDRESSABLE (istart0) = 1; - TREE_ADDRESSABLE (iend0) = 1; - - /* See if we need to bias by LLONG_MIN. */ - if (fd->iter_type == long_long_unsigned_type_node - && TREE_CODE (type) == INTEGER_TYPE - && !TYPE_UNSIGNED (type) - && fd->ordered == 0) - { - tree n1, n2; - - if (fd->loop.cond_code == LT_EXPR) - { - n1 = fd->loop.n1; - n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); - } - else - { - n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); - n2 = fd->loop.n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) - bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); - } - - gimple_stmt_iterator gsif = gsi; - gsi_prev (&gsif); - - tree arr = NULL_TREE; - if (in_combined_parallel) - { - gcc_assert (fd->ordered == 0); - /* In a combined parallel loop, emit a call to - GOMP_loop_foo_next. */ - t = build_call_expr (builtin_decl_explicit (next_fn), 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - } - else - { - tree t0, t1, t2, t3, t4; - /* If this is not a combined parallel loop, emit a call to - GOMP_loop_foo_start in ENTRY_BB. */ - t4 = build_fold_addr_expr (iend0); - t3 = build_fold_addr_expr (istart0); - if (fd->ordered) - { - t0 = build_int_cst (unsigned_type_node, - fd->ordered - fd->collapse + 1); - arr = create_tmp_var (build_array_type_nelts (fd->iter_type, - fd->ordered - - fd->collapse + 1), - ".omp_counts"); - DECL_NAMELESS (arr) = 1; - TREE_ADDRESSABLE (arr) = 1; - TREE_STATIC (arr) = 1; - vec<constructor_elt, va_gc> *v; - vec_alloc (v, fd->ordered - fd->collapse + 1); - int idx; - - for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) - { - tree c; - if (idx == 0 && fd->collapse > 1) - c = fd->loop.n2; - else - c = counts[idx + fd->collapse - 1]; - tree purpose = size_int (idx); - CONSTRUCTOR_APPEND_ELT (v, purpose, c); - if (TREE_CODE (c) != INTEGER_CST) - TREE_STATIC (arr) = 0; - } - - DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); - if (!TREE_STATIC (arr)) - force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, - void_type_node, arr), - true, NULL_TREE, true, GSI_SAME_STMT); - t1 = build_fold_addr_expr (arr); - t2 = NULL_TREE; - } - else - { - t2 = fold_convert (fd->iter_type, fd->loop.step); - t1 = fd->loop.n2; - t0 = fd->loop.n1; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc - = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - t0 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - t1 = OMP_CLAUSE_DECL (innerc); - } - if (POINTER_TYPE_P (TREE_TYPE (t0)) - && TYPE_PRECISION (TREE_TYPE (t0)) - != TYPE_PRECISION (fd->iter_type)) - { - /* Avoid casting pointers to integer of a different size. */ - tree itype = signed_type_for (type); - t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); - t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); - } - else - { - t1 = fold_convert (fd->iter_type, t1); - t0 = fold_convert (fd->iter_type, t0); - } - if (bias) - { - t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); - t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); - } - } - if (fd->iter_type == long_integer_type_node || fd->ordered) - { - if (fd->chunk_size) - { - t = fold_convert (fd->iter_type, fd->chunk_size); - t = omp_adjust_chunk_size (t, fd->simd_schedule); - if (fd->ordered) - t = build_call_expr (builtin_decl_explicit (start_fn), - 5, t0, t1, t, t3, t4); - else - t = build_call_expr (builtin_decl_explicit (start_fn), - 6, t0, t1, t2, t, t3, t4); - } - else if (fd->ordered) - t = build_call_expr (builtin_decl_explicit (start_fn), - 4, t0, t1, t3, t4); - else - t = build_call_expr (builtin_decl_explicit (start_fn), - 5, t0, t1, t2, t3, t4); - } - else - { - tree t5; - tree c_bool_type; - tree bfn_decl; - - /* The GOMP_loop_ull_*start functions have additional boolean - argument, true for < loops and false for > loops. - In Fortran, the C bool type can be different from - boolean_type_node. */ - bfn_decl = builtin_decl_explicit (start_fn); - c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); - t5 = build_int_cst (c_bool_type, - fd->loop.cond_code == LT_EXPR ? 1 : 0); - if (fd->chunk_size) - { - tree bfn_decl = builtin_decl_explicit (start_fn); - t = fold_convert (fd->iter_type, fd->chunk_size); - t = omp_adjust_chunk_size (t, fd->simd_schedule); - t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); - } - else - t = build_call_expr (builtin_decl_explicit (start_fn), - 6, t5, t0, t1, t2, t3, t4); - } - } - if (TREE_TYPE (t) != boolean_type_node) - t = fold_build2 (NE_EXPR, boolean_type_node, - t, build_int_cst (TREE_TYPE (t), 0)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - if (arr && !TREE_STATIC (arr)) - { - tree clobber = build_constructor (TREE_TYPE (arr), NULL); - TREE_THIS_VOLATILE (clobber) = 1; - gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), - GSI_SAME_STMT); - } - gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi_remove (&gsi, true); - - if (gsi_end_p (gsif)) - gsif = gsi_after_labels (gsi_bb (gsif)); - gsi_next (&gsif); - - /* Iteration setup for sequential loop goes in L0_BB. */ - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (inner_stmt) - == GF_OMP_FOR_KIND_SIMD); - tree innerc = find_omp_clause (gimple_omp_for_clauses (inner_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - } - - gsi = gsi_start_bb (l0_bb); - t = istart0; - if (fd->ordered && fd->collapse == 1) - t = fold_build2 (MULT_EXPR, fd->iter_type, t, - fold_convert (fd->iter_type, fd->loop.step)); - else if (bias) - t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); - if (fd->ordered && fd->collapse == 1) - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, fold_convert (sizetype, t)); - else - { - t = fold_convert (TREE_TYPE (startvar), t); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, t); - } - } - else - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); - t = fold_convert (TREE_TYPE (startvar), t); - } - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = iend0; - if (fd->ordered && fd->collapse == 1) - t = fold_build2 (MULT_EXPR, fd->iter_type, t, - fold_convert (fd->iter_type, fd->loop.step)); - else if (bias) - t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); - if (fd->ordered && fd->collapse == 1) - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, fold_convert (sizetype, t)); - else - { - t = fold_convert (TREE_TYPE (startvar), t); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, t); - } - } - else - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); - t = fold_convert (TREE_TYPE (startvar), t); - } - iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, iend); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) - assign_stmt = gimple_build_assign (fd->loop.v, iend); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - /* Handle linear clause adjustments. */ - tree itercnt = NULL_TREE; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) - for (tree c = gimple_omp_for_clauses (fd->for_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR - && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - { - tree d = OMP_CLAUSE_DECL (c); - bool is_ref = is_reference (d); - tree t = d, a, dest; - if (is_ref) - t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); - tree type = TREE_TYPE (t); - if (POINTER_TYPE_P (type)) - type = sizetype; - dest = unshare_expr (t); - tree v = create_tmp_var (TREE_TYPE (t), NULL); - expand_omp_build_assign (&gsif, v, t); - if (itercnt == NULL_TREE) - { - itercnt = startvar; - tree n1 = fd->loop.n1; - if (POINTER_TYPE_P (TREE_TYPE (itercnt))) - { - itercnt - = fold_convert (signed_type_for (TREE_TYPE (itercnt)), - itercnt); - n1 = fold_convert (TREE_TYPE (itercnt), n1); - } - itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), - itercnt, n1); - itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), - itercnt, fd->loop.step); - itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - } - a = fold_build2 (MULT_EXPR, type, - fold_convert (type, itercnt), - fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); - t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR - : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (fd->ordered) - { - /* Until now, counts array contained number of iterations or - variable containing it for ith loop. From now on, we need - those counts only for collapsed loops, and only for the 2nd - till the last collapsed one. Move those one element earlier, - we'll use counts[fd->collapse - 1] for the first source/sink - iteration counter and so on and counts[fd->ordered] - as the array holding the current counter values for - depend(source). */ - if (fd->collapse > 1) - memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); - if (broken_loop) - { - int i; - for (i = fd->collapse; i < fd->ordered; i++) - { - tree type = TREE_TYPE (fd->loops[i].v); - tree this_cond - = fold_build2 (fd->loops[i].cond_code, boolean_type_node, - fold_convert (type, fd->loops[i].n1), - fold_convert (type, fd->loops[i].n2)); - if (!integer_onep (this_cond)) - break; - } - if (i < fd->ordered) - { - cont_bb - = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); - add_bb_to_loop (cont_bb, l1_bb->loop_father); - gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); - gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); - gsi_insert_before (&gsi, g, GSI_SAME_STMT); - make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); - make_edge (cont_bb, l1_bb, 0); - l2_bb = create_empty_bb (cont_bb); - broken_loop = false; - } - } - expand_omp_ordered_source_sink (region, fd, counts, cont_bb); - cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, - ordered_lastprivate); - if (counts[fd->collapse - 1]) - { - gcc_assert (fd->collapse == 1); - gsi = gsi_last_bb (l0_bb); - expand_omp_build_assign (&gsi, counts[fd->collapse - 1], - istart0, true); - gsi = gsi_last_bb (cont_bb); - t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], - build_int_cst (fd->iter_type, 1)); - expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_zero_node, NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); - t = counts[fd->collapse - 1]; - } - else if (fd->collapse > 1) - t = fd->loop.v; - else - { - t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), - fd->loops[0].v, fd->loops[0].n1); - t = fold_convert (fd->iter_type, t); - } - gsi = gsi_last_bb (l0_bb); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_zero_node, NULL_TREE, NULL_TREE); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - expand_omp_build_assign (&gsi, aref, t, true); - } - - if (!broken_loop) - { - /* Code to control the increment and predicate for the sequential - loop goes in the CONT_BB. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, fd->loop.step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (vback) - && TREE_ADDRESSABLE (vback), - NULL_TREE, true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) - { - if (fd->collapse > 1) - t = fd->loop.v; - else - { - t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), - fd->loops[0].v, fd->loops[0].n1); - t = fold_convert (fd->iter_type, t); - } - tree aref = build4 (ARRAY_REF, fd->iter_type, - counts[fd->ordered], size_zero_node, - NULL_TREE, NULL_TREE); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - expand_omp_build_assign (&gsi, aref, t); - } - - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, - iend); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - } - - /* Remove GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); - - /* Emit code to get the next parallel iteration in L2_BB. */ - gsi = gsi_start_bb (l2_bb); - - t = build_call_expr (builtin_decl_explicit (next_fn), 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (TREE_TYPE (t) != boolean_type_node) - t = fold_build2 (NE_EXPR, boolean_type_node, - t, build_int_cst (TREE_TYPE (t), 0)); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); - } - - /* Add the loop cleanup function. */ - gsi = gsi_last_bb (exit_bb); - if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); - else if (gimple_omp_return_lhs (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); - else - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); - gcall *call_stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (gsi))) - gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); - gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); - if (fd->ordered) - { - tree arr = counts[fd->ordered]; - tree clobber = build_constructor (TREE_TYPE (arr), NULL); - TREE_THIS_VOLATILE (clobber) = 1; - gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), - GSI_SAME_STMT); - } - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; - find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; - - if (!broken_loop) - { - gimple_seq phis; - - e = find_edge (cont_bb, l3_bb); - ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); - - phis = phi_nodes (l3_bb); - for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *phi = gsi_stmt (gsi); - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), - PHI_ARG_DEF_FROM_EDGE (phi, e)); - } - remove_edge (e); - - make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); - e = find_edge (cont_bb, l1_bb); - if (e == NULL) - { - e = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (e->dest) == l1_bb); - } - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (e); - e = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (e); - e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - e->flags = EDGE_TRUE_VALUE; - if (e) - { - e->probability = REG_BR_PROB_BASE * 7 / 8; - find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; - } - else - { - e = find_edge (cont_bb, l2_bb); - e->flags = EDGE_FALLTHRU; - } - make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); - - if (gimple_in_ssa_p (cfun)) - { - /* Add phis to the outer loop that connect to the phis in the inner, - original loop, and move the loop entry value of the inner phi to - the loop entry value of the outer phi. */ - gphi_iterator psi; - for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) - { - source_location locus; - gphi *nphi; - gphi *exit_phi = psi.phi (); - - edge l2_to_l3 = find_edge (l2_bb, l3_bb); - tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); - - basic_block latch = BRANCH_EDGE (cont_bb)->dest; - edge latch_to_l1 = find_edge (latch, l1_bb); - gphi *inner_phi - = find_phi_with_arg_on_edge (exit_res, latch_to_l1); - - tree t = gimple_phi_result (exit_phi); - tree new_res = copy_ssa_name (t, NULL); - nphi = create_phi_node (new_res, l0_bb); - - edge l0_to_l1 = find_edge (l0_bb, l1_bb); - t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); - locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); - edge entry_to_l0 = find_edge (entry_bb, l0_bb); - add_phi_arg (nphi, t, entry_to_l0, locus); - - edge l2_to_l0 = find_edge (l2_bb, l0_bb); - add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); - - add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); - }; - } - - set_immediate_dominator (CDI_DOMINATORS, l2_bb, - recompute_dominator (CDI_DOMINATORS, l2_bb)); - set_immediate_dominator (CDI_DOMINATORS, l3_bb, - recompute_dominator (CDI_DOMINATORS, l3_bb)); - set_immediate_dominator (CDI_DOMINATORS, l0_bb, - recompute_dominator (CDI_DOMINATORS, l0_bb)); - set_immediate_dominator (CDI_DOMINATORS, l1_bb, - recompute_dominator (CDI_DOMINATORS, l1_bb)); - - /* We enter expand_omp_for_generic with a loop. This original loop may - have its own loop struct, or it may be part of an outer loop struct - (which may be the fake loop). */ - struct loop *outer_loop = entry_bb->loop_father; - bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; - - add_bb_to_loop (l2_bb, outer_loop); - - /* We've added a new loop around the original loop. Allocate the - corresponding loop struct. */ - struct loop *new_loop = alloc_loop (); - new_loop->header = l0_bb; - new_loop->latch = l2_bb; - add_loop (new_loop, outer_loop); - - /* Allocate a loop structure for the original loop unless we already - had one. */ - if (!orig_loop_has_loop_struct - && !gimple_omp_for_combined_p (fd->for_stmt)) - { - struct loop *orig_loop = alloc_loop (); - orig_loop->header = l1_bb; - /* The loop may have multiple latches. */ - add_loop (orig_loop, new_loop); - } - } -} - - -/* A subroutine of expand_omp_for. Generate code for a parallel - loop with static schedule and no specified chunk size. Given - parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; - if (cond is <) - adj = STEP - 1; - else - adj = STEP + 1; - if ((__typeof (V)) -1 > 0 && cond is >) - n = -(adj + N2 - N1) / -STEP; - else - n = (adj + N2 - N1) / STEP; - q = n / nthreads; - tt = n % nthreads; - if (threadid < tt) goto L3; else goto L4; - L3: - tt = 0; - q = q + 1; - L4: - s0 = q * threadid + tt; - e0 = s0 + q; - V = s0 * STEP + N1; - if (s0 >= e0) goto L2; else goto L0; - L0: - e = e0 * STEP + N1; - L1: - BODY; - V += STEP; - if (V cond e) goto L1; - L2: -*/ - -static void -expand_omp_for_static_nochunk (struct omp_region *region, - struct omp_for_data *fd, - gimple *inner_stmt) -{ - tree n, q, s0, e0, e, t, tt, nthreads, threadid; - tree type, itype, vmain, vback; - basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; - basic_block body_bb, cont_bb, collapse_bb = NULL; - basic_block fin_bb; - gimple_stmt_iterator gsi; - edge ep; - bool broken_loop = region->cont == NULL; - tree *counts = NULL; - tree n1, n2, step; - - itype = type = TREE_TYPE (fd->loop.v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - fin_bb = BRANCH_EDGE (entry_bb)->dest; - gcc_assert (broken_loop - || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); - seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); - body_bb = single_succ (seq_start_bb); - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb - || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - } - exit_bb = region->exit; - - /* Iteration space partitioning goes in ENTRY_BB. */ - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block l2_dom_bb = NULL, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - fin_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - t = NULL_TREE; - } - else if (gimple_omp_for_combined_into_p (fd->for_stmt)) - t = integer_one_node; - else - t = fold_binary (fd->loop.cond_code, boolean_type_node, - fold_convert (type, fd->loop.n1), - fold_convert (type, fd->loop.n2)); - if (fd->collapse == 1 - && TYPE_UNSIGNED (type) - && (t == NULL_TREE || !integer_onep (t))) - { - n1 = fold_convert (type, unshare_expr (fd->loop.n1)); - n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, - true, GSI_SAME_STMT); - n2 = fold_convert (type, unshare_expr (fd->loop.n2)); - n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, - true, GSI_SAME_STMT); - gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, - NULL_TREE, NULL_TREE); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL)) - { - gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, &gsi); - } - ep = split_block (entry_bb, cond_stmt); - ep->flags = EDGE_TRUE_VALUE; - entry_bb = ep->dest; - ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); - ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); - ep->probability = REG_BR_PROB_BASE / 2000 - 1; - if (gimple_in_ssa_p (cfun)) - { - int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; - for (gphi_iterator gpi = gsi_start_phis (fin_bb); - !gsi_end_p (gpi); gsi_next (&gpi)) - { - gphi *phi = gpi.phi (); - add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), - ep, UNKNOWN_LOCATION); - } - } - gsi = gsi_last_bb (entry_bb); - } - - switch (gimple_omp_for_kind (fd->for_stmt)) - { - case GF_OMP_FOR_KIND_FOR: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - break; - case GF_OMP_FOR_KIND_DISTRIBUTE: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); - break; - default: - gcc_unreachable (); - } - nthreads = build_call_expr (nthreads, 0); - nthreads = fold_convert (itype, nthreads); - nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, - true, GSI_SAME_STMT); - threadid = build_call_expr (threadid, 0); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - n1 = fd->loop.n1; - n2 = fd->loop.n2; - step = fd->loop.step; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - - t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); - if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - t = fold_convert (itype, t); - n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); - - q = create_tmp_reg (itype, "q"); - t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); - gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); - - tt = create_tmp_reg (itype, "tt"); - t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); - gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); - - t = build2 (LT_EXPR, boolean_type_node, threadid, tt); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - - second_bb = split_block (entry_bb, cond_stmt)->dest; - gsi = gsi_last_bb (second_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), - GSI_SAME_STMT); - gassign *assign_stmt - = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - third_bb = split_block (second_bb, assign_stmt)->dest; - gsi = gsi_last_bb (third_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - t = build2 (MULT_EXPR, itype, q, threadid); - t = build2 (PLUS_EXPR, itype, t, tt); - s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); - - t = fold_build2 (PLUS_EXPR, itype, s0, q); - e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); - - t = build2 (GE_EXPR, boolean_type_node, s0, e0); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi_remove (&gsi, true); - - /* Setup code for sequential iteration goes in SEQ_START_BB. */ - gsi = gsi_start_bb (seq_start_bb); - - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL - ? gimple_omp_parallel_clauses (inner_stmt) - : gimple_omp_for_clauses (inner_stmt); - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST - && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) - { - int i; - for (i = 1; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - if (innerc) - { - /* If needed (distribute parallel for with lastprivate), - propagate down the total number of iterations. */ - tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), - fd->loop.n2); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - } - } - t = fold_convert (itype, s0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = fold_convert (itype, e0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) - assign_stmt = gimple_build_assign (fd->loop.v, e); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - /* Handle linear clause adjustments. */ - tree itercnt = NULL_TREE; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) - for (tree c = gimple_omp_for_clauses (fd->for_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR - && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - { - tree d = OMP_CLAUSE_DECL (c); - bool is_ref = is_reference (d); - tree t = d, a, dest; - if (is_ref) - t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); - if (itercnt == NULL_TREE) - { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - itercnt = fold_build2 (MINUS_EXPR, itype, - fold_convert (itype, n1), - fold_convert (itype, fd->loop.n1)); - itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); - itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); - itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - } - else - itercnt = s0; - } - tree type = TREE_TYPE (t); - if (POINTER_TYPE_P (type)) - type = sizetype; - a = fold_build2 (MULT_EXPR, type, - fold_convert (type, itercnt), - fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); - dest = unshare_expr (t); - t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR - : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (!broken_loop) - { - /* The code controlling the sequential loop replaces the - GIMPLE_OMP_CONTINUE. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (vback) - && TREE_ADDRESSABLE (vback), - NULL_TREE, true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) - ? t : vback, e); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - } - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); - } - - /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ - gsi = gsi_last_bb (exit_bb); - if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) - { - t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); - } - gsi_remove (&gsi, true); - - /* Connect all the blocks. */ - ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); - ep->probability = REG_BR_PROB_BASE / 4 * 3; - ep = find_edge (entry_bb, second_bb); - ep->flags = EDGE_TRUE_VALUE; - ep->probability = REG_BR_PROB_BASE / 4; - find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; - find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; - - if (!broken_loop) - { - ep = find_edge (cont_bb, body_bb); - if (ep == NULL) - { - ep = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (ep->dest) == body_bb); - } - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (ep); - ep = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (ep); - ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - ep->flags = EDGE_TRUE_VALUE; - find_edge (cont_bb, fin_bb)->flags - = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; - } - - set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); - - set_immediate_dominator (CDI_DOMINATORS, body_bb, - recompute_dominator (CDI_DOMINATORS, body_bb)); - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); - - struct loop *loop = body_bb->loop_father; - if (loop != entry_bb->loop_father) - { - gcc_assert (broken_loop || loop->header == body_bb); - gcc_assert (broken_loop - || loop->latch == region->cont - || single_pred (loop->latch) == region->cont); - return; - } - - if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) - { - loop = alloc_loop (); - loop->header = body_bb; - if (collapse_bb == NULL) - loop->latch = cont_bb; - add_loop (loop, body_bb->loop_father); - } -} - -/* Return phi in E->DEST with ARG on edge E. */ - -static gphi * -find_phi_with_arg_on_edge (tree arg, edge e) -{ - basic_block bb = e->dest; - - for (gphi_iterator gpi = gsi_start_phis (bb); - !gsi_end_p (gpi); - gsi_next (&gpi)) - { - gphi *phi = gpi.phi (); - if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) - return phi; - } - - return NULL; -} - -/* A subroutine of expand_omp_for. Generate code for a parallel - loop with static schedule and a specified chunk size. Given - parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; - if (cond is <) - adj = STEP - 1; - else - adj = STEP + 1; - if ((__typeof (V)) -1 > 0 && cond is >) - n = -(adj + N2 - N1) / -STEP; - else - n = (adj + N2 - N1) / STEP; - trip = 0; - V = threadid * CHUNK * STEP + N1; -- this extra definition of V is - here so that V is defined - if the loop is not entered - L0: - s0 = (trip * nthreads + threadid) * CHUNK; - e0 = min(s0 + CHUNK, n); - if (s0 < n) goto L1; else goto L4; - L1: - V = s0 * STEP + N1; - e = e0 * STEP + N1; - L2: - BODY; - V += STEP; - if (V cond e) goto L2; else goto L3; - L3: - trip += 1; - goto L0; - L4: -*/ - -static void -expand_omp_for_static_chunk (struct omp_region *region, - struct omp_for_data *fd, gimple *inner_stmt) -{ - tree n, s0, e0, e, t; - tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; - tree type, itype, vmain, vback, vextra; - basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; - basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; - gimple_stmt_iterator gsi; - edge se; - bool broken_loop = region->cont == NULL; - tree *counts = NULL; - tree n1, n2, step; - - itype = type = TREE_TYPE (fd->loop.v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - entry_bb = region->entry; - se = split_block (entry_bb, last_stmt (entry_bb)); - entry_bb = se->src; - iter_part_bb = se->dest; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); - fin_bb = BRANCH_EDGE (iter_part_bb)->dest; - gcc_assert (broken_loop - || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); - seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); - body_bb = single_succ (seq_start_bb); - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb - || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); - } - exit_bb = region->exit; - - /* Trip and adjustment setup goes in ENTRY_BB. */ - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block l2_dom_bb = NULL, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - fin_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - t = NULL_TREE; - } - else if (gimple_omp_for_combined_into_p (fd->for_stmt)) - t = integer_one_node; - else - t = fold_binary (fd->loop.cond_code, boolean_type_node, - fold_convert (type, fd->loop.n1), - fold_convert (type, fd->loop.n2)); - if (fd->collapse == 1 - && TYPE_UNSIGNED (type) - && (t == NULL_TREE || !integer_onep (t))) - { - n1 = fold_convert (type, unshare_expr (fd->loop.n1)); - n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, - true, GSI_SAME_STMT); - n2 = fold_convert (type, unshare_expr (fd->loop.n2)); - n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, - true, GSI_SAME_STMT); - gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, - NULL_TREE, NULL_TREE); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL)) - { - gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, &gsi); - } - se = split_block (entry_bb, cond_stmt); - se->flags = EDGE_TRUE_VALUE; - entry_bb = se->dest; - se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); - se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); - se->probability = REG_BR_PROB_BASE / 2000 - 1; - if (gimple_in_ssa_p (cfun)) - { - int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; - for (gphi_iterator gpi = gsi_start_phis (fin_bb); - !gsi_end_p (gpi); gsi_next (&gpi)) - { - gphi *phi = gpi.phi (); - add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), - se, UNKNOWN_LOCATION); - } - } - gsi = gsi_last_bb (entry_bb); - } - - switch (gimple_omp_for_kind (fd->for_stmt)) - { - case GF_OMP_FOR_KIND_FOR: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - break; - case GF_OMP_FOR_KIND_DISTRIBUTE: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); - break; - default: - gcc_unreachable (); - } - nthreads = build_call_expr (nthreads, 0); - nthreads = fold_convert (itype, nthreads); - nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, - true, GSI_SAME_STMT); - threadid = build_call_expr (threadid, 0); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - n1 = fd->loop.n1; - n2 = fd->loop.n2; - step = fd->loop.step; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - tree chunk_size = fold_convert (itype, fd->chunk_size); - chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); - chunk_size - = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, - GSI_SAME_STMT); - - t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); - if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - t = fold_convert (itype, t); - n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - - trip_var = create_tmp_reg (itype, ".trip"); - if (gimple_in_ssa_p (cfun)) - { - trip_init = make_ssa_name (trip_var); - trip_main = make_ssa_name (trip_var); - trip_back = make_ssa_name (trip_var); - } - else - { - trip_init = trip_var; - trip_main = trip_var; - trip_back = trip_var; - } - - gassign *assign_stmt - = gimple_build_assign (trip_init, build_int_cst (itype, 0)); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR. */ - gsi_remove (&gsi, true); - - gimple_stmt_iterator gsif = gsi; - - /* Iteration space partitioning goes in ITER_PART_BB. */ - gsi = gsi_last_bb (iter_part_bb); - - t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); - t = fold_build2 (PLUS_EXPR, itype, t, threadid); - t = fold_build2 (MULT_EXPR, itype, t, chunk_size); - s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - - t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); - t = fold_build2 (MIN_EXPR, itype, t, n); - e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - - t = build2 (LT_EXPR, boolean_type_node, s0, n); - gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); - - /* Setup code for sequential iteration goes in SEQ_START_BB. */ - gsi = gsi_start_bb (seq_start_bb); - - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL - ? gimple_omp_parallel_clauses (inner_stmt) - : gimple_omp_for_clauses (inner_stmt); - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST - && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) - { - int i; - for (i = 1; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - if (innerc) - { - /* If needed (distribute parallel for with lastprivate), - propagate down the total number of iterations. */ - tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), - fd->loop.n2); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - } - } - - t = fold_convert (itype, s0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = fold_convert (itype, e0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) - assign_stmt = gimple_build_assign (fd->loop.v, e); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - /* Handle linear clause adjustments. */ - tree itercnt = NULL_TREE, itercntbias = NULL_TREE; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) - for (tree c = gimple_omp_for_clauses (fd->for_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR - && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - { - tree d = OMP_CLAUSE_DECL (c); - bool is_ref = is_reference (d); - tree t = d, a, dest; - if (is_ref) - t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); - tree type = TREE_TYPE (t); - if (POINTER_TYPE_P (type)) - type = sizetype; - dest = unshare_expr (t); - tree v = create_tmp_var (TREE_TYPE (t), NULL); - expand_omp_build_assign (&gsif, v, t); - if (itercnt == NULL_TREE) - { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - itercntbias - = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), - fold_convert (itype, fd->loop.n1)); - itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, - itercntbias, step); - itercntbias - = force_gimple_operand_gsi (&gsif, itercntbias, true, - NULL_TREE, true, - GSI_SAME_STMT); - itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); - itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - } - else - itercnt = s0; - } - a = fold_build2 (MULT_EXPR, type, - fold_convert (type, itercnt), - fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); - t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR - : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (!broken_loop) - { - /* The code controlling the sequential loop goes in CONT_BB, - replacing the GIMPLE_OMP_CONTINUE. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, step); - if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) - t = build2 (EQ_EXPR, boolean_type_node, - build_int_cst (itype, 0), - build_int_cst (itype, 1)); - else - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) - ? t : vback, e); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - } - - /* Remove GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); - - /* Trip update code goes into TRIP_UPDATE_BB. */ - gsi = gsi_start_bb (trip_update_bb); - - t = build_int_cst (itype, 1); - t = build2 (PLUS_EXPR, itype, trip_main, t); - assign_stmt = gimple_build_assign (trip_back, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - - /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ - gsi = gsi_last_bb (exit_bb); - if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) - { - t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); - } - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; - find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; - - if (!broken_loop) - { - se = find_edge (cont_bb, body_bb); - if (se == NULL) - { - se = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (se->dest) == body_bb); - } - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (se); - se = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (se); - se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - se->flags = EDGE_TRUE_VALUE; - find_edge (cont_bb, trip_update_bb)->flags - = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; - - redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb); - } - - if (gimple_in_ssa_p (cfun)) - { - gphi_iterator psi; - gphi *phi; - edge re, ene; - edge_var_map *vm; - size_t i; - - gcc_assert (fd->collapse == 1 && !broken_loop); - - /* When we redirect the edge from trip_update_bb to iter_part_bb, we - remove arguments of the phi nodes in fin_bb. We need to create - appropriate phi nodes in iter_part_bb instead. */ - se = find_edge (iter_part_bb, fin_bb); - re = single_succ_edge (trip_update_bb); - vec<edge_var_map> *head = redirect_edge_var_map_vector (re); - ene = single_succ_edge (entry_bb); - - psi = gsi_start_phis (fin_bb); - for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); - gsi_next (&psi), ++i) - { - gphi *nphi; - source_location locus; - - phi = psi.phi (); - t = gimple_phi_result (phi); - gcc_assert (t == redirect_edge_var_map_result (vm)); - - if (!single_pred_p (fin_bb)) - t = copy_ssa_name (t, phi); - - nphi = create_phi_node (t, iter_part_bb); - - t = PHI_ARG_DEF_FROM_EDGE (phi, se); - locus = gimple_phi_arg_location_from_edge (phi, se); - - /* A special case -- fd->loop.v is not yet computed in - iter_part_bb, we need to use vextra instead. */ - if (t == fd->loop.v) - t = vextra; - add_phi_arg (nphi, t, ene, locus); - locus = redirect_edge_var_map_location (vm); - tree back_arg = redirect_edge_var_map_def (vm); - add_phi_arg (nphi, back_arg, re, locus); - edge ce = find_edge (cont_bb, body_bb); - if (ce == NULL) - { - ce = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (ce->dest) == body_bb); - ce = single_succ_edge (ce->dest); - } - gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); - gcc_assert (inner_loop_phi != NULL); - add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), - find_edge (seq_start_bb, body_bb), locus); - - if (!single_pred_p (fin_bb)) - add_phi_arg (phi, gimple_phi_result (nphi), se, locus); - } - gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); - redirect_edge_var_map_clear (re); - if (single_pred_p (fin_bb)) - while (1) - { - psi = gsi_start_phis (fin_bb); - if (gsi_end_p (psi)) - break; - remove_phi_node (&psi, false); - } - - /* Make phi node for trip. */ - phi = create_phi_node (trip_main, iter_part_bb); - add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), - UNKNOWN_LOCATION); - add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), - UNKNOWN_LOCATION); - } - - if (!broken_loop) - set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); - set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, - recompute_dominator (CDI_DOMINATORS, iter_part_bb)); - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, - recompute_dominator (CDI_DOMINATORS, seq_start_bb)); - set_immediate_dominator (CDI_DOMINATORS, body_bb, - recompute_dominator (CDI_DOMINATORS, body_bb)); - - if (!broken_loop) - { - struct loop *loop = body_bb->loop_father; - struct loop *trip_loop = alloc_loop (); - trip_loop->header = iter_part_bb; - trip_loop->latch = trip_update_bb; - add_loop (trip_loop, iter_part_bb->loop_father); - - if (loop != entry_bb->loop_father) - { - gcc_assert (loop->header == body_bb); - gcc_assert (loop->latch == region->cont - || single_pred (loop->latch) == region->cont); - trip_loop->inner = loop; - return; - } - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - loop = alloc_loop (); - loop->header = body_bb; - if (collapse_bb == NULL) - loop->latch = cont_bb; - add_loop (loop, trip_loop); - } - } -} - -/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop. - Given parameters: - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">" or "!=", we generate pseudocode - - for (ind_var = low; ind_var < high; ind_var++) - { - V = n1 + (ind_var * STEP) - - <BODY> - } - - In the above pseudocode, low and high are function parameters of the - child function. In the function below, we are inserting a temp. - variable that will be making a call to two OMP functions that will not be - found in the body of _Cilk_for (since OMP_FOR cannot be mixed - with _Cilk_for). These functions are replaced with low and high - by the function that handles taskreg. */ - - -static void -expand_cilk_for (struct omp_region *region, struct omp_for_data *fd) -{ - bool broken_loop = region->cont == NULL; - basic_block entry_bb = region->entry; - basic_block cont_bb = region->cont; - - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (broken_loop - || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest; - basic_block l1_bb, l2_bb; - - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; - l2_bb = BRANCH_EDGE (entry_bb)->dest; - } - else - { - BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; - l1_bb = split_edge (BRANCH_EDGE (entry_bb)); - l2_bb = single_succ (l1_bb); - } - basic_block exit_bb = region->exit; - basic_block l2_dom_bb = NULL; - - gimple_stmt_iterator gsi = gsi_last_bb (entry_bb); - - /* Below statements until the "tree high_val = ..." are pseudo statements - used to pass information to be used by expand_omp_taskreg. - low_val and high_val will be replaced by the __low and __high - parameter from the child function. - - The call_exprs part is a place-holder, it is mainly used - to distinctly identify to the top-level part that this is - where we should put low and high (reasoning given in header - comment). */ - - tree child_fndecl - = gimple_omp_parallel_child_fn ( - as_a <gomp_parallel *> (last_stmt (region->outer->entry))); - tree t, low_val = NULL_TREE, high_val = NULL_TREE; - for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t)) - { - if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high")) - high_val = t; - else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low")) - low_val = t; - } - gcc_assert (low_val && high_val); - - tree type = TREE_TYPE (low_val); - tree ind_var = create_tmp_reg (type, "__cilk_ind_var"); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - /* Not needed in SSA form right now. */ - gcc_assert (!gimple_in_ssa_p (cfun)); - if (l2_dom_bb == NULL) - l2_dom_bb = l1_bb; - - tree n1 = low_val; - tree n2 = high_val; - - gimple *stmt = gimple_build_assign (ind_var, n1); - - /* Replace the GIMPLE_OMP_FOR statement. */ - gsi_replace (&gsi, stmt, true); - - if (!broken_loop) - { - /* Code to control the increment goes in the CONT_BB. */ - gsi = gsi_last_bb (cont_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); - stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var, - build_one_cst (type)); - - /* Replace GIMPLE_OMP_CONTINUE. */ - gsi_replace (&gsi, stmt, true); - } - - /* Emit the condition in L1_BB. */ - gsi = gsi_after_labels (l1_bb); - t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step), - fold_convert (TREE_TYPE (fd->loop.step), ind_var), - fd->loop.step); - if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1))) - t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1), - fd->loop.n1, fold_convert (sizetype, t)); - else - t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1), - fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t)); - t = fold_convert (TREE_TYPE (fd->loop.v), t); - expand_omp_build_assign (&gsi, fd->loop.v, t); - - /* The condition is always '<' since the runtime will fill in the low - and high values. */ - stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE); - gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); - - /* Remove GIMPLE_OMP_RETURN. */ - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - remove_edge (FALLTHRU_EDGE (entry_bb)); - - edge e, ne; - if (!broken_loop) - { - remove_edge (BRANCH_EDGE (entry_bb)); - make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); - - e = BRANCH_EDGE (l1_bb); - ne = FALLTHRU_EDGE (l1_bb); - e->flags = EDGE_TRUE_VALUE; - } - else - { - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - ne = single_succ_edge (l1_bb); - e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); - - } - ne->flags = EDGE_FALSE_VALUE; - e->probability = REG_BR_PROB_BASE * 7 / 8; - ne->probability = REG_BR_PROB_BASE / 8; - - set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); - set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); - - if (!broken_loop) - { - struct loop *loop = alloc_loop (); - loop->header = l1_bb; - loop->latch = cont_bb; - add_loop (loop, l1_bb->loop_father); - loop->safelen = INT_MAX; - } - - /* Pick the correct library function based on the precision of the - induction variable type. */ - tree lib_fun = NULL_TREE; - if (TYPE_PRECISION (type) == 32) - lib_fun = cilk_for_32_fndecl; - else if (TYPE_PRECISION (type) == 64) - lib_fun = cilk_for_64_fndecl; - else - gcc_unreachable (); - - gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR); - - /* WS_ARGS contains the library function flavor to call: - __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the - user-defined grain value. If the user does not define one, then zero - is passed in by the parser. */ - vec_alloc (region->ws_args, 2); - region->ws_args->quick_push (lib_fun); - region->ws_args->quick_push (fd->chunk_size); -} - -/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing - loop. Given parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - V = N1; - goto L1; - L0: - BODY; - V += STEP; - L1: - if (V cond N2) goto L0; else goto L2; - L2: - - For collapsed loops, given parameters: - collapse(3) - for (V1 = N11; V1 cond1 N12; V1 += STEP1) - for (V2 = N21; V2 cond2 N22; V2 += STEP2) - for (V3 = N31; V3 cond3 N32; V3 += STEP3) - BODY; - - we generate pseudocode - - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - V = 0; - V1 = N11; - V2 = N21; - V3 = N31; - goto L1; - L0: - BODY; - V += 1; - V3 += STEP3; - V2 += (V3 cond3 N32) ? 0 : STEP2; - V3 = (V3 cond3 N32) ? V3 : N31; - V1 += (V2 cond2 N22) ? 0 : STEP1; - V2 = (V2 cond2 N22) ? V2 : N21; - L1: - if (V < count) goto L0; else goto L2; - L2: - - */ - -static void -expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) -{ - tree type, t; - basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; - gimple_stmt_iterator gsi; - gimple *stmt; - gcond *cond_stmt; - bool broken_loop = region->cont == NULL; - edge e, ne; - tree *counts = NULL; - int i; - int safelen_int = INT_MAX; - tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE_SAFELEN); - tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__SIMDUID_); - tree n1, n2; - - if (safelen) - { - safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); - if (TREE_CODE (safelen) != INTEGER_CST) - safelen_int = 0; - else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) - safelen_int = tree_to_uhwi (safelen); - if (safelen_int == 1) - safelen_int = 0; - } - type = TREE_TYPE (fd->loop.v); - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (broken_loop - || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - l0_bb = FALLTHRU_EDGE (entry_bb)->dest; - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; - l2_bb = BRANCH_EDGE (entry_bb)->dest; - } - else - { - BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; - l1_bb = split_edge (BRANCH_EDGE (entry_bb)); - l2_bb = single_succ (l1_bb); - } - exit_bb = region->exit; - l2_dom_bb = NULL; - - gsi = gsi_last_bb (entry_bb); - - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - /* Not needed in SSA form right now. */ - gcc_assert (!gimple_in_ssa_p (cfun)); - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - zero_iter_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - } - if (l2_dom_bb == NULL) - l2_dom_bb = l1_bb; - - n1 = fd->loop.n1; - n2 = fd->loop.n2; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - tree step = fd->loop.step; - - bool is_simt = (safelen_int > 1 - && find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__SIMT_)); - tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; - if (is_simt) - { - cfun->curr_properties &= ~PROP_gimple_lomp_dev; - simt_lane = create_tmp_var (unsigned_type_node); - gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); - gimple_call_set_lhs (g, simt_lane); - gsi_insert_before (&gsi, g, GSI_SAME_STMT); - tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, - fold_convert (TREE_TYPE (step), simt_lane)); - n1 = fold_convert (type, n1); - if (POINTER_TYPE_P (type)) - n1 = fold_build_pointer_plus (n1, offset); - else - n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); - - /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ - if (fd->collapse > 1) - simt_maxlane = build_one_cst (unsigned_type_node); - else if (safelen_int < omp_max_simt_vf ()) - simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); - tree vf - = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, - unsigned_type_node, 0); - if (simt_maxlane) - vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); - vf = fold_convert (TREE_TYPE (step), vf); - step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); - } - - expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); - if (fd->collapse > 1) - { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - gsi_prev (&gsi); - expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); - gsi_next (&gsi); - } - else - for (i = 0; i < fd->collapse; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - } - } - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi_remove (&gsi, true); - - if (!broken_loop) - { - /* Code to control the increment goes in the CONT_BB. */ - gsi = gsi_last_bb (cont_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); - - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loop.v, step); - else - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); - expand_omp_build_assign (&gsi, fd->loop.v, t); - - if (fd->collapse > 1) - { - i = fd->collapse - 1; - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) - { - t = fold_convert (sizetype, fd->loops[i].step); - t = fold_build_pointer_plus (fd->loops[i].v, t); - } - else - { - t = fold_convert (TREE_TYPE (fd->loops[i].v), - fd->loops[i].step); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, t); - } - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - - for (i = fd->collapse - 1; i > 0; i--) - { - tree itype = TREE_TYPE (fd->loops[i].v); - tree itype2 = TREE_TYPE (fd->loops[i - 1].v); - if (POINTER_TYPE_P (itype2)) - itype2 = signed_type_for (itype2); - t = build3 (COND_EXPR, itype2, - build2 (fd->loops[i].cond_code, boolean_type_node, - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n2)), - build_int_cst (itype2, 0), - fold_convert (itype2, fd->loops[i - 1].step)); - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) - t = fold_build_pointer_plus (fd->loops[i - 1].v, t); - else - t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); - expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); - - t = build3 (COND_EXPR, itype, - build2 (fd->loops[i].cond_code, boolean_type_node, - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n2)), - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n1)); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - } - } - - /* Remove GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - } - - /* Emit the condition in L1_BB. */ - gsi = gsi_start_bb (l1_bb); - - t = fold_convert (type, n2); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - tree v = fd->loop.v; - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t = build2 (fd->loop.cond_code, boolean_type_node, v, t); - cond_stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, - NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, - NULL, NULL)) - { - gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, &gsi); - } - - /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ - if (is_simt) - { - gsi = gsi_start_bb (l2_bb); - step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loop.v, step); - else - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); - expand_omp_build_assign (&gsi, fd->loop.v, t); - } - - /* Remove GIMPLE_OMP_RETURN. */ - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - remove_edge (FALLTHRU_EDGE (entry_bb)); - - if (!broken_loop) - { - remove_edge (BRANCH_EDGE (entry_bb)); - make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); - - e = BRANCH_EDGE (l1_bb); - ne = FALLTHRU_EDGE (l1_bb); - e->flags = EDGE_TRUE_VALUE; - } - else - { - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - ne = single_succ_edge (l1_bb); - e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); - - } - ne->flags = EDGE_FALSE_VALUE; - e->probability = REG_BR_PROB_BASE * 7 / 8; - ne->probability = REG_BR_PROB_BASE / 8; - - set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); - - if (simt_maxlane) - { - cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, - NULL_TREE, NULL_TREE); - gsi = gsi_last_bb (entry_bb); - gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); - make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); - FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; - FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; - BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; - l2_dom_bb = entry_bb; - } - set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); - - if (!broken_loop) - { - struct loop *loop = alloc_loop (); - loop->header = l1_bb; - loop->latch = cont_bb; - add_loop (loop, l1_bb->loop_father); - loop->safelen = safelen_int; - if (simduid) - { - loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); - cfun->has_simduid_loops = true; - } - /* If not -fno-tree-loop-vectorize, hint that we want to vectorize - the loop. */ - if ((flag_tree_loop_vectorize - || (!global_options_set.x_flag_tree_loop_vectorize - && !global_options_set.x_flag_tree_vectorize)) - && flag_tree_loop_optimize - && loop->safelen > 1) - { - loop->force_vectorize = true; - cfun->has_force_vectorize_loops = true; - } - } - else if (simduid) - cfun->has_simduid_loops = true; -} - -/* Taskloop construct is represented after gimplification with - two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched - in between them. This routine expands the outer GIMPLE_OMP_FOR, - which should just compute all the needed loop temporaries - for GIMPLE_OMP_TASK. */ - -static void -expand_omp_taskloop_for_outer (struct omp_region *region, - struct omp_for_data *fd, - gimple *inner_stmt) -{ - tree type, bias = NULL_TREE; - basic_block entry_bb, cont_bb, exit_bb; - gimple_stmt_iterator gsi; - gassign *assign_stmt; - tree *counts = NULL; - int i; - - gcc_assert (inner_stmt); - gcc_assert (region->cont); - gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK - && gimple_omp_task_taskloop_p (inner_stmt)); - type = TREE_TYPE (fd->loop.v); - - /* See if we need to bias by LLONG_MIN. */ - if (fd->iter_type == long_long_unsigned_type_node - && TREE_CODE (type) == INTEGER_TYPE - && !TYPE_UNSIGNED (type)) - { - tree n1, n2; - - if (fd->loop.cond_code == LT_EXPR) - { - n1 = fd->loop.n1; - n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); - } - else - { - n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); - n2 = fd->loop.n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) - bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); - } - - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - exit_bb = region->exit; - - gsi = gsi_last_bb (entry_bb); - gimple *for_stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - zero_iter_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - - if (zero_iter_bb) - { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter; i < fd->collapse; i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; - gsi_prev (&gsi); - edge e = split_block (entry_bb, gsi_stmt (gsi)); - entry_bb = e->dest; - make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); - gsi = gsi_last_bb (entry_bb); - set_immediate_dominator (CDI_DOMINATORS, entry_bb, - get_immediate_dominator (CDI_DOMINATORS, - zero_iter_bb)); - } - } - - tree t0, t1; - t1 = fd->loop.n2; - t0 = fd->loop.n1; - if (POINTER_TYPE_P (TREE_TYPE (t0)) - && TYPE_PRECISION (TREE_TYPE (t0)) - != TYPE_PRECISION (fd->iter_type)) - { - /* Avoid casting pointers to integer of a different size. */ - tree itype = signed_type_for (type); - t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); - t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); - } - else - { - t1 = fold_convert (fd->iter_type, t1); - t0 = fold_convert (fd->iter_type, t0); - } - if (bias) - { - t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); - t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); - } - - tree innerc = find_omp_clause (gimple_omp_task_clauses (inner_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - tree startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - tree endvar = OMP_CLAUSE_DECL (innerc); - if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) - { - gcc_assert (innerc); - for (i = 1; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - if (innerc) - { - /* If needed (inner taskloop has lastprivate clause), propagate - down the total number of iterations. */ - tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - } - - t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t0); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (endvar, t1); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi = gsi_for_stmt (for_stmt); - gsi_remove (&gsi, true); - - gsi = gsi_last_bb (cont_bb); - gsi_remove (&gsi, true); - - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; - remove_edge (BRANCH_EDGE (entry_bb)); - FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE; - remove_edge (BRANCH_EDGE (cont_bb)); - set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); - set_immediate_dominator (CDI_DOMINATORS, region->entry, - recompute_dominator (CDI_DOMINATORS, region->entry)); -} - -/* Taskloop construct is represented after gimplification with - two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched - in between them. This routine expands the inner GIMPLE_OMP_FOR. - GOMP_taskloop{,_ull} function arranges for each task to be given just - a single range of iterations. */ - -static void -expand_omp_taskloop_for_inner (struct omp_region *region, - struct omp_for_data *fd, - gimple *inner_stmt) -{ - tree e, t, type, itype, vmain, vback, bias = NULL_TREE; - basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; - basic_block fin_bb; - gimple_stmt_iterator gsi; - edge ep; - bool broken_loop = region->cont == NULL; - tree *counts = NULL; - tree n1, n2, step; - - itype = type = TREE_TYPE (fd->loop.v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - /* See if we need to bias by LLONG_MIN. */ - if (fd->iter_type == long_long_unsigned_type_node - && TREE_CODE (type) == INTEGER_TYPE - && !TYPE_UNSIGNED (type)) - { - tree n1, n2; - - if (fd->loop.cond_code == LT_EXPR) - { - n1 = fd->loop.n1; - n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); - } - else - { - n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); - n2 = fd->loop.n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) - bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); - } - - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - fin_bb = BRANCH_EDGE (entry_bb)->dest; - gcc_assert (broken_loop - || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); - body_bb = FALLTHRU_EDGE (entry_bb)->dest; - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - } - exit_bb = region->exit; - - /* Iteration space partitioning goes in ENTRY_BB. */ - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block l2_dom_bb = NULL, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - fin_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - t = NULL_TREE; - } - else - t = integer_one_node; - - step = fd->loop.step; - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - if (bias) - { - n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); - n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); - } - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - tree clauses = gimple_omp_for_clauses (inner_stmt); - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - } - t = fold_convert (TREE_TYPE (startvar), n1); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - gimple *assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = fold_convert (TREE_TYPE (startvar), n2); - e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) - assign_stmt = gimple_build_assign (fd->loop.v, e); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (!broken_loop) - { - /* The code controlling the sequential loop replaces the - GIMPLE_OMP_CONTINUE. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (vback) - && TREE_ADDRESSABLE (vback), - NULL_TREE, true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) - ? t : vback, e); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - } - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); - } - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi = gsi_for_stmt (fd->for_stmt); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_RETURN statement. */ - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; - if (!broken_loop) - remove_edge (BRANCH_EDGE (entry_bb)); - else - { - remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); - region->outer->cont = NULL; - } - - /* Connect all the blocks. */ - if (!broken_loop) - { - ep = find_edge (cont_bb, body_bb); - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (ep); - ep = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (ep); - ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - ep->flags = EDGE_TRUE_VALUE; - find_edge (cont_bb, fin_bb)->flags - = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; - } - - set_immediate_dominator (CDI_DOMINATORS, body_bb, - recompute_dominator (CDI_DOMINATORS, body_bb)); - if (!broken_loop) - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); - - if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) - { - struct loop *loop = alloc_loop (); - loop->header = body_bb; - if (collapse_bb == NULL) - loop->latch = cont_bb; - add_loop (loop, body_bb->loop_father); - } -} - -/* A subroutine of expand_omp_for. Generate code for an OpenACC - partitioned loop. The lowering here is abstracted, in that the - loop parameters are passed through internal functions, which are - further lowered by oacc_device_lower, once we get to the target - compiler. The loop is of the form: - - for (V = B; V LTGT E; V += S) {BODY} - - where LTGT is < or >. We may have a specified chunking size, CHUNKING - (constant 0 for no chunking) and we will have a GWV partitioning - mask, specifying dimensions over which the loop is to be - partitioned (see note below). We generate code that looks like: - - <entry_bb> [incoming FALL->body, BRANCH->exit] - typedef signedintify (typeof (V)) T; // underlying signed integral type - T range = E - B; - T chunk_no = 0; - T DIR = LTGT == '<' ? +1 : -1; - T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); - T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); - - <head_bb> [created by splitting end of entry_bb] - T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); - T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); - if (!(offset LTGT bound)) goto bottom_bb; - - <body_bb> [incoming] - V = B + offset; - {BODY} - - <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] - offset += step; - if (offset LTGT bound) goto body_bb; [*] - - <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb - chunk_no++; - if (chunk < chunk_max) goto head_bb; - - <exit_bb> [incoming] - V = B + ((range -/+ 1) / S +/- 1) * S [*] - - [*] Needed if V live at end of loop - - Note: CHUNKING & GWV mask are specified explicitly here. This is a - transition, and will be specified by a more general mechanism shortly. - */ - -static void -expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) -{ - tree v = fd->loop.v; - enum tree_code cond_code = fd->loop.cond_code; - enum tree_code plus_code = PLUS_EXPR; - - tree chunk_size = integer_minus_one_node; - tree gwv = integer_zero_node; - tree iter_type = TREE_TYPE (v); - tree diff_type = iter_type; - tree plus_type = iter_type; - struct oacc_collapse *counts = NULL; - - gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) - == GF_OMP_FOR_KIND_OACC_LOOP); - gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); - gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); - - if (POINTER_TYPE_P (iter_type)) - { - plus_code = POINTER_PLUS_EXPR; - plus_type = sizetype; - } - if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) - diff_type = signed_type_for (diff_type); - - basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ - basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ - basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ - basic_block bottom_bb = NULL; - - /* entry_bb has two sucessors; the branch edge is to the exit - block, fallthrough edge to body. */ - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 - && BRANCH_EDGE (entry_bb)->dest == exit_bb); - - /* If cont_bb non-NULL, it has 2 successors. The branch successor is - body_bb, or to a block whose only successor is the body_bb. Its - fallthrough successor is the final block (same as the branch - successor of the entry_bb). */ - if (cont_bb) - { - basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; - basic_block bed = BRANCH_EDGE (cont_bb)->dest; - - gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); - gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); - } - else - gcc_assert (!gimple_in_ssa_p (cfun)); - - /* The exit block only has entry_bb and cont_bb as predecessors. */ - gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); - - tree chunk_no; - tree chunk_max = NULL_TREE; - tree bound, offset; - tree step = create_tmp_var (diff_type, ".step"); - bool up = cond_code == LT_EXPR; - tree dir = build_int_cst (diff_type, up ? +1 : -1); - bool chunking = !gimple_in_ssa_p (cfun);; - bool negating; - - /* SSA instances. */ - tree offset_incr = NULL_TREE; - tree offset_init = NULL_TREE; - - gimple_stmt_iterator gsi; - gassign *ass; - gcall *call; - gimple *stmt; - tree expr; - location_t loc; - edge split, be, fte; - - /* Split the end of entry_bb to create head_bb. */ - split = split_block (entry_bb, last_stmt (entry_bb)); - basic_block head_bb = split->dest; - entry_bb = split->src; - - /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ - gsi = gsi_last_bb (entry_bb); - gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); - loc = gimple_location (for_stmt); - - if (gimple_in_ssa_p (cfun)) - { - offset_init = gimple_omp_for_index (for_stmt, 0); - gcc_assert (integer_zerop (fd->loop.n1)); - /* The SSA parallelizer does gang parallelism. */ - gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); - } - - if (fd->collapse > 1) - { - counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); - tree total = expand_oacc_collapse_init (fd, &gsi, counts, - TREE_TYPE (fd->loop.n2)); - - if (SSA_VAR_P (fd->loop.n2)) - { - total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (fd->loop.n2, total); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - } - - } - - tree b = fd->loop.n1; - tree e = fd->loop.n2; - tree s = fd->loop.step; - - b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); - e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); - - /* Convert the step, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); - if (negating) - s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); - s = fold_convert (diff_type, s); - if (negating) - s = fold_build1 (NEGATE_EXPR, diff_type, s); - s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); - - if (!chunking) - chunk_size = integer_zero_node; - expr = fold_convert (diff_type, chunk_size); - chunk_size = force_gimple_operand_gsi (&gsi, expr, true, - NULL_TREE, true, GSI_SAME_STMT); - /* Determine the range, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (iter_type); - expr = fold_build2 (MINUS_EXPR, plus_type, - fold_convert (plus_type, negating ? b : e), - fold_convert (plus_type, negating ? e : b)); - expr = fold_convert (diff_type, expr); - if (negating) - expr = fold_build1 (NEGATE_EXPR, diff_type, expr); - tree range = force_gimple_operand_gsi (&gsi, expr, true, - NULL_TREE, true, GSI_SAME_STMT); - - chunk_no = build_int_cst (diff_type, 0); - if (chunking) - { - gcc_assert (!gimple_in_ssa_p (cfun)); - - expr = chunk_no; - chunk_max = create_tmp_var (diff_type, ".chunk_max"); - chunk_no = create_tmp_var (diff_type, ".chunk_no"); - - ass = gimple_build_assign (chunk_no, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_CHUNKS), - dir, range, s, chunk_size, gwv); - gimple_call_set_lhs (call, chunk_max); - gimple_set_location (call, loc); - gsi_insert_before (&gsi, call, GSI_SAME_STMT); - } - else - chunk_size = chunk_no; - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_STEP), - dir, range, s, chunk_size, gwv); - gimple_call_set_lhs (call, step); - gimple_set_location (call, loc); - gsi_insert_before (&gsi, call, GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR. */ - gsi_remove (&gsi, true); - - /* Fixup edges from head_bb */ - be = BRANCH_EDGE (head_bb); - fte = FALLTHRU_EDGE (head_bb); - be->flags |= EDGE_FALSE_VALUE; - fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; - - basic_block body_bb = fte->dest; - - if (gimple_in_ssa_p (cfun)) - { - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - - offset = gimple_omp_continue_control_use (cont_stmt); - offset_incr = gimple_omp_continue_control_def (cont_stmt); - } - else - { - offset = create_tmp_var (diff_type, ".offset"); - offset_init = offset_incr = offset; - } - bound = create_tmp_var (TREE_TYPE (offset), ".bound"); - - /* Loop offset & bound go into head_bb. */ - gsi = gsi_start_bb (head_bb); - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_OFFSET), - dir, range, s, - chunk_size, gwv, chunk_no); - gimple_call_set_lhs (call, offset_init); - gimple_set_location (call, loc); - gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_BOUND), - dir, range, s, - chunk_size, gwv, offset_init); - gimple_call_set_lhs (call, bound); - gimple_set_location (call, loc); - gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); - - expr = build2 (cond_code, boolean_type_node, offset_init, bound); - gsi_insert_after (&gsi, gimple_build_cond_empty (expr), - GSI_CONTINUE_LINKING); - - /* V assignment goes into body_bb. */ - if (!gimple_in_ssa_p (cfun)) - { - gsi = gsi_start_bb (body_bb); - - expr = build2 (plus_code, iter_type, b, - fold_convert (plus_type, offset)); - expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (v, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - if (fd->collapse > 1) - expand_oacc_collapse_vars (fd, &gsi, counts, v); - } - - /* Loop increment goes into cont_bb. If this is not a loop, we - will have spawned threads as if it was, and each one will - execute one iteration. The specification is not explicit about - whether such constructs are ill-formed or not, and they can - occur, especially when noreturn routines are involved. */ - if (cont_bb) - { - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - loc = gimple_location (cont_stmt); - - /* Increment offset. */ - if (gimple_in_ssa_p (cfun)) - expr= build2 (plus_code, iter_type, offset, - fold_convert (plus_type, step)); - else - expr = build2 (PLUS_EXPR, diff_type, offset, step); - expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (offset_incr, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - expr = build2 (cond_code, boolean_type_node, offset_incr, bound); - gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - - /* Fixup edges from cont_bb */ - be = BRANCH_EDGE (cont_bb); - fte = FALLTHRU_EDGE (cont_bb); - be->flags |= EDGE_TRUE_VALUE; - fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; - - if (chunking) - { - /* Split the beginning of exit_bb to make bottom_bb. We - need to insert a nop at the start, because splitting is - after a stmt, not before. */ - gsi = gsi_start_bb (exit_bb); - stmt = gimple_build_nop (); - gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); - split = split_block (exit_bb, stmt); - bottom_bb = split->src; - exit_bb = split->dest; - gsi = gsi_last_bb (bottom_bb); - - /* Chunk increment and test goes into bottom_bb. */ - expr = build2 (PLUS_EXPR, diff_type, chunk_no, - build_int_cst (diff_type, 1)); - ass = gimple_build_assign (chunk_no, expr); - gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); - - /* Chunk test at end of bottom_bb. */ - expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); - gsi_insert_after (&gsi, gimple_build_cond_empty (expr), - GSI_CONTINUE_LINKING); - - /* Fixup edges from bottom_bb. */ - split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; - make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); - } - } - - gsi = gsi_last_bb (exit_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - loc = gimple_location (gsi_stmt (gsi)); - - if (!gimple_in_ssa_p (cfun)) - { - /* Insert the final value of V, in case it is live. This is the - value for the only thread that survives past the join. */ - expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); - expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); - expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); - expr = fold_build2 (MULT_EXPR, diff_type, expr, s); - expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); - expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (v, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - } - - /* Remove the OMP_RETURN. */ - gsi_remove (&gsi, true); - - if (cont_bb) - { - /* We now have one or two nested loops. Update the loop - structures. */ - struct loop *parent = entry_bb->loop_father; - struct loop *body = body_bb->loop_father; - - if (chunking) - { - struct loop *chunk_loop = alloc_loop (); - chunk_loop->header = head_bb; - chunk_loop->latch = bottom_bb; - add_loop (chunk_loop, parent); - parent = chunk_loop; - } - else if (parent != body) - { - gcc_assert (body->header == body_bb); - gcc_assert (body->latch == cont_bb - || single_pred (body->latch) == cont_bb); - parent = NULL; - } - - if (parent) - { - struct loop *body_loop = alloc_loop (); - body_loop->header = body_bb; - body_loop->latch = cont_bb; - add_loop (body_loop, parent); - } - } -} - -/* Expand the OMP loop defined by REGION. */ - -static void -expand_omp_for (struct omp_region *region, gimple *inner_stmt) -{ - struct omp_for_data fd; - struct omp_for_data_loop *loops; - - loops - = (struct omp_for_data_loop *) - alloca (gimple_omp_for_collapse (last_stmt (region->entry)) - * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (as_a <gomp_for *> (last_stmt (region->entry)), - &fd, loops); - region->sched_kind = fd.sched_kind; - region->sched_modifiers = fd.sched_modifiers; - - gcc_assert (EDGE_COUNT (region->entry->succs) == 2); - BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; - FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; - if (region->cont) - { - gcc_assert (EDGE_COUNT (region->cont->succs) == 2); - BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; - FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; - } - else - /* If there isn't a continue then this is a degerate case where - the introduction of abnormal edges during lowering will prevent - original loops from being detected. Fix that up. */ - loops_state_set (LOOPS_NEED_FIXUP); - - if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) - expand_omp_simd (region, &fd); - else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) - expand_cilk_for (region, &fd); - else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) - { - gcc_assert (!inner_stmt); - expand_oacc_for (region, &fd); - } - else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) - { - if (gimple_omp_for_combined_into_p (fd.for_stmt)) - expand_omp_taskloop_for_inner (region, &fd, inner_stmt); - else - expand_omp_taskloop_for_outer (region, &fd, inner_stmt); - } - else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC - && !fd.have_ordered) - { - if (fd.chunk_size == NULL) - expand_omp_for_static_nochunk (region, &fd, inner_stmt); - else - expand_omp_for_static_chunk (region, &fd, inner_stmt); - } - else - { - int fn_index, start_ix, next_ix; - - gcc_assert (gimple_omp_for_kind (fd.for_stmt) - == GF_OMP_FOR_KIND_FOR); - if (fd.chunk_size == NULL - && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) - fd.chunk_size = integer_zero_node; - gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); - switch (fd.sched_kind) - { - case OMP_CLAUSE_SCHEDULE_RUNTIME: - fn_index = 3; - break; - case OMP_CLAUSE_SCHEDULE_DYNAMIC: - case OMP_CLAUSE_SCHEDULE_GUIDED: - if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) - && !fd.ordered - && !fd.have_ordered) - { - fn_index = 3 + fd.sched_kind; - break; - } - /* FALLTHRU */ - default: - fn_index = fd.sched_kind; - break; - } - if (!fd.ordered) - fn_index += fd.have_ordered * 6; - if (fd.ordered) - start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; - else - start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; - next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; - if (fd.iter_type == long_long_unsigned_type_node) - { - start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START - - (int)BUILT_IN_GOMP_LOOP_STATIC_START); - next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT - - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); - } - expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, - (enum built_in_function) next_ix, inner_stmt); - } - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_only_virtuals); -} - - -/* Expand code for an OpenMP sections directive. In pseudo code, we generate - - v = GOMP_sections_start (n); - L0: - switch (v) - { - case 0: - goto L2; - case 1: - section 1; - goto L1; - case 2: - ... - case n: - ... - default: - abort (); - } - L1: - v = GOMP_sections_next (); - goto L0; - L2: - reduction; - - If this is a combined parallel sections, replace the call to - GOMP_sections_start with call to GOMP_sections_next. */ - -static void -expand_omp_sections (struct omp_region *region) -{ - tree t, u, vin = NULL, vmain, vnext, l2; - unsigned len; - basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; - gimple_stmt_iterator si, switch_si; - gomp_sections *sections_stmt; - gimple *stmt; - gomp_continue *cont; - edge_iterator ei; - edge e; - struct omp_region *inner; - unsigned i, casei; - bool exit_reachable = region->cont != NULL; - - gcc_assert (region->exit != NULL); - entry_bb = region->entry; - l0_bb = single_succ (entry_bb); - l1_bb = region->cont; - l2_bb = region->exit; - if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) - l2 = gimple_block_label (l2_bb); - else - { - /* This can happen if there are reductions. */ - len = EDGE_COUNT (l0_bb->succs); - gcc_assert (len > 0); - e = EDGE_SUCC (l0_bb, len - 1); - si = gsi_last_bb (e->dest); - l2 = NULL_TREE; - if (gsi_end_p (si) - || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) - l2 = gimple_block_label (e->dest); - else - FOR_EACH_EDGE (e, ei, l0_bb->succs) - { - si = gsi_last_bb (e->dest); - if (gsi_end_p (si) - || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) - { - l2 = gimple_block_label (e->dest); - break; - } - } - } - if (exit_reachable) - default_bb = create_empty_bb (l1_bb->prev_bb); - else - default_bb = create_empty_bb (l0_bb); - - /* We will build a switch() with enough cases for all the - GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work - and a default case to abort if something goes wrong. */ - len = EDGE_COUNT (l0_bb->succs); - - /* Use vec::quick_push on label_vec throughout, since we know the size - in advance. */ - auto_vec<tree> label_vec (len); - - /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the - GIMPLE_OMP_SECTIONS statement. */ - si = gsi_last_bb (entry_bb); - sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); - gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); - vin = gimple_omp_sections_control (sections_stmt); - if (!is_combined_parallel (region)) - { - /* If we are not inside a combined parallel+sections region, - call GOMP_sections_start. */ - t = build_int_cst (unsigned_type_node, len - 1); - u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); - stmt = gimple_build_call (u, 1, t); - } - else - { - /* Otherwise, call GOMP_sections_next. */ - u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); - stmt = gimple_build_call (u, 0); - } - gimple_call_set_lhs (stmt, vin); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in - L0_BB. */ - switch_si = gsi_last_bb (l0_bb); - gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); - if (exit_reachable) - { - cont = as_a <gomp_continue *> (last_stmt (l1_bb)); - gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont); - vnext = gimple_omp_continue_control_def (cont); - } - else - { - vmain = vin; - vnext = NULL_TREE; - } - - t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); - label_vec.quick_push (t); - i = 1; - - /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ - for (inner = region->inner, casei = 1; - inner; - inner = inner->next, i++, casei++) - { - basic_block s_entry_bb, s_exit_bb; - - /* Skip optional reduction region. */ - if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) - { - --i; - --casei; - continue; - } - - s_entry_bb = inner->entry; - s_exit_bb = inner->exit; - - t = gimple_block_label (s_entry_bb); - u = build_int_cst (unsigned_type_node, casei); - u = build_case_label (u, NULL, t); - label_vec.quick_push (u); - - si = gsi_last_bb (s_entry_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); - gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); - gsi_remove (&si, true); - single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; - - if (s_exit_bb == NULL) - continue; - - si = gsi_last_bb (s_exit_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); - gsi_remove (&si, true); - - single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; - } - - /* Error handling code goes in DEFAULT_BB. */ - t = gimple_block_label (default_bb); - u = build_case_label (NULL, NULL, t); - make_edge (l0_bb, default_bb, 0); - add_bb_to_loop (default_bb, current_loops->tree_root); - - stmt = gimple_build_switch (vmain, u, label_vec); - gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); - gsi_remove (&switch_si, true); - - si = gsi_start_bb (default_bb); - stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); - gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); - - if (exit_reachable) - { - tree bfn_decl; - - /* Code to get the next section goes in L1_BB. */ - si = gsi_last_bb (l1_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); - - bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); - stmt = gimple_build_call (bfn_decl, 0); - gimple_call_set_lhs (stmt, vnext); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; - } - - /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ - si = gsi_last_bb (l2_bb); - if (gimple_omp_return_nowait_p (gsi_stmt (si))) - t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); - else if (gimple_omp_return_lhs (gsi_stmt (si))) - t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); - else - t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); - stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (si))) - gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); -} - - -/* Expand code for an OpenMP single directive. We've already expanded - much of the code, here we simply place the GOMP_barrier call. */ - -static void -expand_omp_single (struct omp_region *region) -{ - basic_block entry_bb, exit_bb; - gimple_stmt_iterator si; - - entry_bb = region->entry; - exit_bb = region->exit; - - si = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); - gsi_remove (&si, true); - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - si = gsi_last_bb (exit_bb); - if (!gimple_omp_return_nowait_p (gsi_stmt (si))) - { - tree t = gimple_omp_return_lhs (gsi_stmt (si)); - gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT); - } - gsi_remove (&si, true); - single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; -} - - -/* Generic expansion for OpenMP synchronization directives: master, - ordered and critical. All we need to do here is remove the entry - and exit markers for REGION. */ - -static void -expand_omp_synch (struct omp_region *region) -{ - basic_block entry_bb, exit_bb; - gimple_stmt_iterator si; - - entry_bb = region->entry; - exit_bb = region->exit; - - si = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); - gsi_remove (&si, true); - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - if (exit_bb) - { - si = gsi_last_bb (exit_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); - gsi_remove (&si, true); - single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; - } -} - -/* A subroutine of expand_omp_atomic. Attempt to implement the atomic - operation as a normal volatile load. */ - -static bool -expand_omp_atomic_load (basic_block load_bb, tree addr, - tree loaded_val, int index) -{ - enum built_in_function tmpbase; - gimple_stmt_iterator gsi; - basic_block store_bb; - location_t loc; - gimple *stmt; - tree decl, call, type, itype; - - gsi = gsi_last_bb (load_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); - loc = gimple_location (stmt); - - /* ??? If the target does not implement atomic_load_optab[mode], and mode - is smaller than word size, then expand_atomic_load assumes that the load - is atomic. We could avoid the builtin entirely in this case. */ - - tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); - decl = builtin_decl_explicit (tmpbase); - if (decl == NULL_TREE) - return false; - - type = TREE_TYPE (loaded_val); - itype = TREE_TYPE (TREE_TYPE (decl)); - - call = build_call_expr_loc (loc, decl, 2, addr, - build_int_cst (NULL, - gimple_omp_atomic_seq_cst_p (stmt) - ? MEMMODEL_SEQ_CST - : MEMMODEL_RELAXED)); - if (!useless_type_conversion_p (type, itype)) - call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); - call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); - - force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - store_bb = single_succ (load_bb); - gsi = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); - gsi_remove (&gsi, true); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - - return true; -} - -/* A subroutine of expand_omp_atomic. Attempt to implement the atomic - operation as a normal volatile store. */ - -static bool -expand_omp_atomic_store (basic_block load_bb, tree addr, - tree loaded_val, tree stored_val, int index) -{ - enum built_in_function tmpbase; - gimple_stmt_iterator gsi; - basic_block store_bb = single_succ (load_bb); - location_t loc; - gimple *stmt; - tree decl, call, type, itype; - machine_mode imode; - bool exchange; - - gsi = gsi_last_bb (load_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); - - /* If the load value is needed, then this isn't a store but an exchange. */ - exchange = gimple_omp_atomic_need_value_p (stmt); - - gsi = gsi_last_bb (store_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); - loc = gimple_location (stmt); - - /* ??? If the target does not implement atomic_store_optab[mode], and mode - is smaller than word size, then expand_atomic_store assumes that the store - is atomic. We could avoid the builtin entirely in this case. */ - - tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); - tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); - decl = builtin_decl_explicit (tmpbase); - if (decl == NULL_TREE) - return false; - - type = TREE_TYPE (stored_val); - - /* Dig out the type of the function's second argument. */ - itype = TREE_TYPE (decl); - itype = TYPE_ARG_TYPES (itype); - itype = TREE_CHAIN (itype); - itype = TREE_VALUE (itype); - imode = TYPE_MODE (itype); - - if (exchange && !can_atomic_exchange_p (imode, true)) - return false; - - if (!useless_type_conversion_p (itype, type)) - stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); - call = build_call_expr_loc (loc, decl, 3, addr, stored_val, - build_int_cst (NULL, - gimple_omp_atomic_seq_cst_p (stmt) - ? MEMMODEL_SEQ_CST - : MEMMODEL_RELAXED)); - if (exchange) - { - if (!useless_type_conversion_p (type, itype)) - call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); - call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); - } - - force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ - gsi = gsi_last_bb (load_bb); - gsi_remove (&gsi, true); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - - return true; -} - -/* A subroutine of expand_omp_atomic. Attempt to implement the atomic - operation as a __atomic_fetch_op builtin. INDEX is log2 of the - size of the data type, and thus usable to find the index of the builtin - decl. Returns false if the expression is not of the proper form. */ - -static bool -expand_omp_atomic_fetch_op (basic_block load_bb, - tree addr, tree loaded_val, - tree stored_val, int index) -{ - enum built_in_function oldbase, newbase, tmpbase; - tree decl, itype, call; - tree lhs, rhs; - basic_block store_bb = single_succ (load_bb); - gimple_stmt_iterator gsi; - gimple *stmt; - location_t loc; - enum tree_code code; - bool need_old, need_new; - machine_mode imode; - bool seq_cst; - - /* We expect to find the following sequences: - - load_bb: - GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) - - store_bb: - val = tmp OP something; (or: something OP tmp) - GIMPLE_OMP_STORE (val) - - ???FIXME: Allow a more flexible sequence. - Perhaps use data flow to pick the statements. - - */ - - gsi = gsi_after_labels (store_bb); - stmt = gsi_stmt (gsi); - loc = gimple_location (stmt); - if (!is_gimple_assign (stmt)) - return false; - gsi_next (&gsi); - if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) - return false; - need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); - need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); - seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); - gcc_checking_assert (!need_old || !need_new); - - if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) - return false; - - /* Check for one of the supported fetch-op operations. */ - code = gimple_assign_rhs_code (stmt); - switch (code) - { - case PLUS_EXPR: - case POINTER_PLUS_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; - newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; - break; - case MINUS_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; - newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; - break; - case BIT_AND_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; - newbase = BUILT_IN_ATOMIC_AND_FETCH_N; - break; - case BIT_IOR_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; - newbase = BUILT_IN_ATOMIC_OR_FETCH_N; - break; - case BIT_XOR_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; - newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; - break; - default: - return false; - } - - /* Make sure the expression is of the proper form. */ - if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) - rhs = gimple_assign_rhs2 (stmt); - else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) - && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) - rhs = gimple_assign_rhs1 (stmt); - else - return false; - - tmpbase = ((enum built_in_function) - ((need_new ? newbase : oldbase) + index + 1)); - decl = builtin_decl_explicit (tmpbase); - if (decl == NULL_TREE) - return false; - itype = TREE_TYPE (TREE_TYPE (decl)); - imode = TYPE_MODE (itype); - - /* We could test all of the various optabs involved, but the fact of the - matter is that (with the exception of i486 vs i586 and xadd) all targets - that support any atomic operaton optab also implements compare-and-swap. - Let optabs.c take care of expanding any compare-and-swap loop. */ - if (!can_compare_and_swap_p (imode, true)) - return false; - - gsi = gsi_last_bb (load_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); - - /* OpenMP does not imply any barrier-like semantics on its atomic ops. - It only requires that the operation happen atomically. Thus we can - use the RELAXED memory model. */ - call = build_call_expr_loc (loc, decl, 3, addr, - fold_convert_loc (loc, itype, rhs), - build_int_cst (NULL, - seq_cst ? MEMMODEL_SEQ_CST - : MEMMODEL_RELAXED)); - - if (need_old || need_new) - { - lhs = need_old ? loaded_val : stored_val; - call = fold_convert_loc (loc, TREE_TYPE (lhs), call); - call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); - } - else - call = fold_convert_loc (loc, void_type_node, call); - force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - gsi = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); - gsi_remove (&gsi, true); - gsi = gsi_last_bb (store_bb); - stmt = gsi_stmt (gsi); - gsi_remove (&gsi, true); - - if (gimple_in_ssa_p (cfun)) - { - release_defs (stmt); - update_ssa (TODO_update_ssa_no_phi); - } - - return true; -} - -/* A subroutine of expand_omp_atomic. Implement the atomic operation as: - - oldval = *addr; - repeat: - newval = rhs; // with oldval replacing *addr in rhs - oldval = __sync_val_compare_and_swap (addr, oldval, newval); - if (oldval != newval) - goto repeat; - - INDEX is log2 of the size of the data type, and thus usable to find the - index of the builtin decl. */ - -static bool -expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, - tree addr, tree loaded_val, tree stored_val, - int index) -{ - tree loadedi, storedi, initial, new_storedi, old_vali; - tree type, itype, cmpxchg, iaddr; - gimple_stmt_iterator si; - basic_block loop_header = single_succ (load_bb); - gimple *phi, *stmt; - edge e; - enum built_in_function fncode; - - /* ??? We need a non-pointer interface to __atomic_compare_exchange in - order to use the RELAXED memory model effectively. */ - fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N - + index + 1); - cmpxchg = builtin_decl_explicit (fncode); - if (cmpxchg == NULL_TREE) - return false; - type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); - itype = TREE_TYPE (TREE_TYPE (cmpxchg)); - - if (!can_compare_and_swap_p (TYPE_MODE (itype), true)) - return false; - - /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ - si = gsi_last_bb (load_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); - - /* For floating-point values, we'll need to view-convert them to integers - so that we can perform the atomic compare and swap. Simplify the - following code by always setting up the "i"ntegral variables. */ - if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) - { - tree iaddr_val; - - iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, - true)); - iaddr_val - = force_gimple_operand_gsi (&si, - fold_convert (TREE_TYPE (iaddr), addr), - false, NULL_TREE, true, GSI_SAME_STMT); - stmt = gimple_build_assign (iaddr, iaddr_val); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - loadedi = create_tmp_var (itype); - if (gimple_in_ssa_p (cfun)) - loadedi = make_ssa_name (loadedi); - } - else - { - iaddr = addr; - loadedi = loaded_val; - } - - fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); - tree loaddecl = builtin_decl_explicit (fncode); - if (loaddecl) - initial - = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)), - build_call_expr (loaddecl, 2, iaddr, - build_int_cst (NULL_TREE, - MEMMODEL_RELAXED))); - else - initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr, - build_int_cst (TREE_TYPE (iaddr), 0)); - - initial - = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, - GSI_SAME_STMT); - - /* Move the value to the LOADEDI temporary. */ - if (gimple_in_ssa_p (cfun)) - { - gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); - phi = create_phi_node (loadedi, loop_header); - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), - initial); - } - else - gsi_insert_before (&si, - gimple_build_assign (loadedi, initial), - GSI_SAME_STMT); - if (loadedi != loaded_val) - { - gimple_stmt_iterator gsi2; - tree x; - - x = build1 (VIEW_CONVERT_EXPR, type, loadedi); - gsi2 = gsi_start_bb (loop_header); - if (gimple_in_ssa_p (cfun)) - { - gassign *stmt; - x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, - true, GSI_SAME_STMT); - stmt = gimple_build_assign (loaded_val, x); - gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); - } - else - { - x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); - force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, - true, GSI_SAME_STMT); - } - } - gsi_remove (&si, true); - - si = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); - - if (iaddr == addr) - storedi = stored_val; - else - storedi = - force_gimple_operand_gsi (&si, - build1 (VIEW_CONVERT_EXPR, itype, - stored_val), true, NULL_TREE, true, - GSI_SAME_STMT); - - /* Build the compare&swap statement. */ - new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); - new_storedi = force_gimple_operand_gsi (&si, - fold_convert (TREE_TYPE (loadedi), - new_storedi), - true, NULL_TREE, - true, GSI_SAME_STMT); - - if (gimple_in_ssa_p (cfun)) - old_vali = loadedi; - else - { - old_vali = create_tmp_var (TREE_TYPE (loadedi)); - stmt = gimple_build_assign (old_vali, loadedi); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - - stmt = gimple_build_assign (loadedi, new_storedi); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - } - - /* Note that we always perform the comparison as an integer, even for - floating point. This allows the atomic operation to properly - succeed even with NaNs and -0.0. */ - stmt = gimple_build_cond_empty - (build2 (NE_EXPR, boolean_type_node, - new_storedi, old_vali)); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - - /* Update cfg. */ - e = single_succ_edge (store_bb); - e->flags &= ~EDGE_FALLTHRU; - e->flags |= EDGE_FALSE_VALUE; - - e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); - - /* Copy the new value to loadedi (we already did that before the condition - if we are not in SSA). */ - if (gimple_in_ssa_p (cfun)) - { - phi = gimple_seq_first_stmt (phi_nodes (loop_header)); - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); - } - - /* Remove GIMPLE_OMP_ATOMIC_STORE. */ - gsi_remove (&si, true); - - struct loop *loop = alloc_loop (); - loop->header = loop_header; - loop->latch = store_bb; - add_loop (loop, loop_header->loop_father); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - - return true; -} - -/* A subroutine of expand_omp_atomic. Implement the atomic operation as: - - GOMP_atomic_start (); - *addr = rhs; - GOMP_atomic_end (); - - The result is not globally atomic, but works so long as all parallel - references are within #pragma omp atomic directives. According to - responses received from omp@openmp.org, appears to be within spec. - Which makes sense, since that's how several other compilers handle - this situation as well. - LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're - expanding. STORED_VAL is the operand of the matching - GIMPLE_OMP_ATOMIC_STORE. - - We replace - GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with - loaded_val = *addr; - - and replace - GIMPLE_OMP_ATOMIC_STORE (stored_val) with - *addr = stored_val; -*/ - -static bool -expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, - tree addr, tree loaded_val, tree stored_val) -{ - gimple_stmt_iterator si; - gassign *stmt; - tree t; - - si = gsi_last_bb (load_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); - - t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); - t = build_call_expr (t, 0); - force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); - - stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr)); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - si = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); - - stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)), - stored_val); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - - t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); - t = build_call_expr (t, 0); - force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&si, true); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - return true; -} - -/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand - using expand_omp_atomic_fetch_op. If it failed, we try to - call expand_omp_atomic_pipeline, and if it fails too, the - ultimate fallback is wrapping the operation in a mutex - (expand_omp_atomic_mutex). REGION is the atomic region built - by build_omp_regions_1(). */ - -static void -expand_omp_atomic (struct omp_region *region) -{ - basic_block load_bb = region->entry, store_bb = region->exit; - gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); - gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); - tree loaded_val = gimple_omp_atomic_load_lhs (load); - tree addr = gimple_omp_atomic_load_rhs (load); - tree stored_val = gimple_omp_atomic_store_val (store); - tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); - HOST_WIDE_INT index; - - /* Make sure the type is one of the supported sizes. */ - index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); - index = exact_log2 (index); - if (index >= 0 && index <= 4) - { - unsigned int align = TYPE_ALIGN_UNIT (type); - - /* __sync builtins require strict data alignment. */ - if (exact_log2 (align) >= index) - { - /* Atomic load. */ - if (loaded_val == stored_val - && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT - || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) - && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD - && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) - return; - - /* Atomic store. */ - if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT - || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) - && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD - && store_bb == single_succ (load_bb) - && first_stmt (store_bb) == store - && expand_omp_atomic_store (load_bb, addr, loaded_val, - stored_val, index)) - return; - - /* When possible, use specialized atomic update functions. */ - if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) - && store_bb == single_succ (load_bb) - && expand_omp_atomic_fetch_op (load_bb, addr, - loaded_val, stored_val, index)) - return; - - /* If we don't have specialized __sync builtins, try and implement - as a compare and swap loop. */ - if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, - loaded_val, stored_val, index)) - return; - } - } - - /* The ultimate fallback is wrapping the operation in a mutex. */ - expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); -} - - -/* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK - macro on gomp-constants.h. We do not check for overflow. */ - -static tree -oacc_launch_pack (unsigned code, tree device, unsigned op) -{ - tree res; - - res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op)); - if (device) - { - device = fold_build2 (LSHIFT_EXPR, unsigned_type_node, - device, build_int_cst (unsigned_type_node, - GOMP_LAUNCH_DEVICE_SHIFT)); - res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device); - } - return res; -} - -/* Look for compute grid dimension clauses and convert to an attribute - attached to FN. This permits the target-side code to (a) massage - the dimensions, (b) emit that data and (c) optimize. Non-constant - dimensions are pushed onto ARGS. - - The attribute value is a TREE_LIST. A set of dimensions is - represented as a list of INTEGER_CST. Those that are runtime - exprs are represented as an INTEGER_CST of zero. - - TOOO. Normally the attribute will just contain a single such list. If - however it contains a list of lists, this will represent the use of - device_type. Each member of the outer list is an assoc list of - dimensions, keyed by the device type. The first entry will be the - default. Well, that's the plan. */ - -#define OACC_FN_ATTRIB "oacc function" - -/* Replace any existing oacc fn attribute with updated dimensions. */ - -void -replace_oacc_fn_attrib (tree fn, tree dims) -{ - tree ident = get_identifier (OACC_FN_ATTRIB); - tree attribs = DECL_ATTRIBUTES (fn); - - /* If we happen to be present as the first attrib, drop it. */ - if (attribs && TREE_PURPOSE (attribs) == ident) - attribs = TREE_CHAIN (attribs); - DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs); -} - -/* Scan CLAUSES for launch dimensions and attach them to the oacc - function attribute. Push any that are non-constant onto the ARGS - list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is - true, if these are for a kernels region offload function. */ - -void -set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args) -{ - /* Must match GOMP_DIM ordering. */ - static const omp_clause_code ids[] - = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, - OMP_CLAUSE_VECTOR_LENGTH }; - unsigned ix; - tree dims[GOMP_DIM_MAX]; - tree attr = NULL_TREE; - unsigned non_const = 0; - - for (ix = GOMP_DIM_MAX; ix--;) - { - tree clause = find_omp_clause (clauses, ids[ix]); - tree dim = NULL_TREE; - - if (clause) - dim = OMP_CLAUSE_EXPR (clause, ids[ix]); - dims[ix] = dim; - if (dim && TREE_CODE (dim) != INTEGER_CST) - { - dim = integer_zero_node; - non_const |= GOMP_DIM_MASK (ix); - } - attr = tree_cons (NULL_TREE, dim, attr); - /* Note kernelness with TREE_PUBLIC. */ - if (is_kernel) - TREE_PUBLIC (attr) = 1; - } - - replace_oacc_fn_attrib (fn, attr); - - if (non_const) - { - /* Push a dynamic argument set. */ - args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM, - NULL_TREE, non_const)); - for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) - if (non_const & GOMP_DIM_MASK (ix)) - args->safe_push (dims[ix]); - } -} - -/* Process the routine's dimension clauess to generate an attribute - value. Issue diagnostics as appropriate. We default to SEQ - (OpenACC 2.5 clarifies this). All dimensions have a size of zero - (dynamic). TREE_PURPOSE is set to indicate whether that dimension - can have a loop partitioned on it. non-zero indicates - yes, zero indicates no. By construction once a non-zero has been - reached, further inner dimensions must also be non-zero. We set - TREE_VALUE to zero for the dimensions that may be partitioned and - 1 for the other ones -- if a loop is (erroneously) spawned at - an outer level, we don't want to try and partition it. */ - -tree -build_oacc_routine_dims (tree clauses) -{ - /* Must match GOMP_DIM ordering. */ - static const omp_clause_code ids[] = - {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ}; - int ix; - int level = -1; - - for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses)) - for (ix = GOMP_DIM_MAX + 1; ix--;) - if (OMP_CLAUSE_CODE (clauses) == ids[ix]) - { - if (level >= 0) - error_at (OMP_CLAUSE_LOCATION (clauses), - "multiple loop axes specified for routine"); - level = ix; - break; - } - - /* Default to SEQ. */ - if (level < 0) - level = GOMP_DIM_MAX; - - tree dims = NULL_TREE; - - for (ix = GOMP_DIM_MAX; ix--;) - dims = tree_cons (build_int_cst (boolean_type_node, ix >= level), - build_int_cst (integer_type_node, ix < level), dims); - - return dims; -} - -/* Retrieve the oacc function attrib and return it. Non-oacc - functions will return NULL. */ - -tree -get_oacc_fn_attrib (tree fn) -{ - return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn)); -} - -/* Return true if this oacc fn attrib is for a kernels offload - region. We use the TREE_PUBLIC flag of each dimension -- only - need to check the first one. */ - -bool -oacc_fn_attrib_kernels_p (tree attr) -{ - return TREE_PUBLIC (TREE_VALUE (attr)); -} - -/* Return level at which oacc routine may spawn a partitioned loop, or - -1 if it is not a routine (i.e. is an offload fn). */ - -static int -oacc_fn_attrib_level (tree attr) -{ - tree pos = TREE_VALUE (attr); - - if (!TREE_PURPOSE (pos)) - return -1; - - int ix = 0; - for (ix = 0; ix != GOMP_DIM_MAX; - ix++, pos = TREE_CHAIN (pos)) - if (!integer_zerop (TREE_PURPOSE (pos))) - break; - - return ix; -} - -/* Extract an oacc execution dimension from FN. FN must be an - offloaded function or routine that has already had its execution - dimensions lowered to the target-specific values. */ - -int -get_oacc_fn_dim_size (tree fn, int axis) -{ - tree attrs = get_oacc_fn_attrib (fn); - - gcc_assert (axis < GOMP_DIM_MAX); - - tree dims = TREE_VALUE (attrs); - while (axis--) - dims = TREE_CHAIN (dims); - - int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); - - return size; -} - -/* Extract the dimension axis from an IFN_GOACC_DIM_POS or - IFN_GOACC_DIM_SIZE call. */ - -int -get_oacc_ifn_dim_arg (const gimple *stmt) -{ - gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE - || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS); - tree arg = gimple_call_arg (stmt, 0); - HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg); - - gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); - return (int) axis; -} - -/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending - at REGION_EXIT. */ - -static void -mark_loops_in_oacc_kernels_region (basic_block region_entry, - basic_block region_exit) -{ - struct loop *outer = region_entry->loop_father; - gcc_assert (region_exit == NULL || outer == region_exit->loop_father); - - /* Don't parallelize the kernels region if it contains more than one outer - loop. */ - unsigned int nr_outer_loops = 0; - struct loop *single_outer = NULL; - for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) - { - gcc_assert (loop_outer (loop) == outer); - - if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) - continue; - - if (region_exit != NULL - && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) - continue; - - nr_outer_loops++; - single_outer = loop; - } - if (nr_outer_loops != 1) - return; - - for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner) - if (loop->next) - return; - - /* Mark the loops in the region. */ - for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) - loop->in_oacc_kernels_region = true; -} - -/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ - -struct GTY(()) grid_launch_attributes_trees -{ - tree kernel_dim_array_type; - tree kernel_lattrs_dimnum_decl; - tree kernel_lattrs_grid_decl; - tree kernel_lattrs_group_decl; - tree kernel_launch_attributes_type; -}; - -static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; - -/* Create types used to pass kernel launch attributes to target. */ - -static void -grid_create_kernel_launch_attr_types (void) -{ - if (grid_attr_trees) - return; - grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); - - tree dim_arr_index_type - = build_index_type (build_int_cst (integer_type_node, 2)); - grid_attr_trees->kernel_dim_array_type - = build_array_type (uint32_type_node, dim_arr_index_type); - - grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); - grid_attr_trees->kernel_lattrs_dimnum_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), - uint32_type_node); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; - - grid_attr_trees->kernel_lattrs_grid_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), - grid_attr_trees->kernel_dim_array_type); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) - = grid_attr_trees->kernel_lattrs_dimnum_decl; - grid_attr_trees->kernel_lattrs_group_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), - grid_attr_trees->kernel_dim_array_type); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) - = grid_attr_trees->kernel_lattrs_grid_decl; - finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, - "__gomp_kernel_launch_attributes", - grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); -} - -/* Insert before the current statement in GSI a store of VALUE to INDEX of - array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be - of type uint32_type_node. */ - -static void -grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, - tree fld_decl, int index, tree value) -{ - tree ref = build4 (ARRAY_REF, uint32_type_node, - build3 (COMPONENT_REF, - grid_attr_trees->kernel_dim_array_type, - range_var, fld_decl, NULL_TREE), - build_int_cst (integer_type_node, index), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); -} - -/* Return a tree representation of a pointer to a structure with grid and - work-group size information. Statements filling that information will be - inserted before GSI, TGT_STMT is the target statement which has the - necessary information in it. */ - -static tree -grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, - gomp_target *tgt_stmt) -{ - grid_create_kernel_launch_attr_types (); - tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, - "__kernel_launch_attrs"); - - unsigned max_dim = 0; - for (tree clause = gimple_omp_target_clauses (tgt_stmt); - clause; - clause = OMP_CLAUSE_CHAIN (clause)) - { - if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) - continue; - - unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); - max_dim = MAX (dim, max_dim); - - grid_insert_store_range_dim (gsi, lattrs, - grid_attr_trees->kernel_lattrs_grid_decl, - dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); - grid_insert_store_range_dim (gsi, lattrs, - grid_attr_trees->kernel_lattrs_group_decl, - dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); - } - - tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, - grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); - gcc_checking_assert (max_dim <= 2); - tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); - gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), - GSI_SAME_STMT); - TREE_ADDRESSABLE (lattrs) = 1; - return build_fold_addr_expr (lattrs); -} - -/* Build target argument identifier from the DEVICE identifier, value - identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ - -static tree -get_target_argument_identifier_1 (int device, bool subseqent_param, int id) -{ - tree t = build_int_cst (integer_type_node, device); - if (subseqent_param) - t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, - build_int_cst (integer_type_node, - GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); - t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, - build_int_cst (integer_type_node, id)); - return t; -} - -/* Like above but return it in type that can be directly stored as an element - of the argument array. */ - -static tree -get_target_argument_identifier (int device, bool subseqent_param, int id) -{ - tree t = get_target_argument_identifier_1 (device, subseqent_param, id); - return fold_convert (ptr_type_node, t); -} - -/* Return a target argument consisting of DEVICE identifier, value identifier - ID, and the actual VALUE. */ - -static tree -get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, - tree value) -{ - tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, - fold_convert (integer_type_node, value), - build_int_cst (unsigned_type_node, - GOMP_TARGET_ARG_VALUE_SHIFT)); - t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, - get_target_argument_identifier_1 (device, false, id)); - t = fold_convert (ptr_type_node, t); - return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); -} - -/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, - push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, - otherwise push an identifier (with DEVICE and ID) and the VALUE in two - arguments. */ - -static void -push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, - int id, tree value, vec <tree> *args) -{ - if (tree_fits_shwi_p (value) - && tree_to_shwi (value) > -(1 << 15) - && tree_to_shwi (value) < (1 << 15)) - args->quick_push (get_target_argument_value (gsi, device, id, value)); - else - { - args->quick_push (get_target_argument_identifier (device, true, id)); - value = fold_convert (ptr_type_node, value); - value = force_gimple_operand_gsi (gsi, value, true, NULL, true, - GSI_SAME_STMT); - args->quick_push (value); - } -} - -/* Create an array of arguments that is then passed to GOMP_target. */ - -static tree -get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) -{ - auto_vec <tree, 6> args; - tree clauses = gimple_omp_target_clauses (tgt_stmt); - tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS); - if (c) - t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); - else - t = integer_minus_one_node; - push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, - GOMP_TARGET_ARG_NUM_TEAMS, t, &args); - - c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); - if (c) - t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); - else - t = integer_minus_one_node; - push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, - GOMP_TARGET_ARG_THREAD_LIMIT, t, - &args); - - /* Add HSA-specific grid sizes, if available. */ - if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)) - { - t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, - GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES); - args.quick_push (t); - args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); - } - - /* Produce more, perhaps device specific, arguments here. */ - - tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, - args.length () + 1), - ".omp_target_args"); - for (unsigned i = 0; i < args.length (); i++) - { - tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, - build_int_cst (integer_type_node, i), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), - GSI_SAME_STMT); - } - tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, - build_int_cst (integer_type_node, args.length ()), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), - GSI_SAME_STMT); - TREE_ADDRESSABLE (argarray) = 1; - return build_fold_addr_expr (argarray); -} - -/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ - -static void -expand_omp_target (struct omp_region *region) -{ - basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun; - tree child_fn, block, t; - gimple_stmt_iterator gsi; - gomp_target *entry_stmt; - gimple *stmt; - edge e; - bool offloaded, data_region; - - entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); - new_bb = region->entry; - - offloaded = is_gimple_omp_offloaded (entry_stmt); - switch (gimple_omp_target_kind (entry_stmt)) - { - case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_UPDATE: - case GF_OMP_TARGET_KIND_ENTER_DATA: - case GF_OMP_TARGET_KIND_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - case GF_OMP_TARGET_KIND_OACC_KERNELS: - case GF_OMP_TARGET_KIND_OACC_UPDATE: - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_DECLARE: - data_region = false; - break; - case GF_OMP_TARGET_KIND_DATA: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - data_region = true; - break; - default: - gcc_unreachable (); - } - - child_fn = NULL_TREE; - child_cfun = NULL; - if (offloaded) - { - child_fn = gimple_omp_target_child_fn (entry_stmt); - child_cfun = DECL_STRUCT_FUNCTION (child_fn); - } - - /* Supported by expand_omp_taskreg, but not here. */ - if (child_cfun != NULL) - gcc_checking_assert (!child_cfun->cfg); - gcc_checking_assert (!gimple_in_ssa_p (cfun)); - - entry_bb = region->entry; - exit_bb = region->exit; - - if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) - mark_loops_in_oacc_kernels_region (region->entry, region->exit); - - if (offloaded) - { - unsigned srcidx, dstidx, num; - - /* If the offloading region needs data sent from the parent - function, then the very first statement (except possible - tree profile counter updates) of the offloading body - is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since - &.OMP_DATA_O is passed as an argument to the child function, - we need to replace it with the argument as seen by the child - function. - - In most cases, this will end up being the identity assignment - .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had - a function call that has been inlined, the original PARM_DECL - .OMP_DATA_I may have been converted into a different local - variable. In which case, we need to keep the assignment. */ - tree data_arg = gimple_omp_target_data_arg (entry_stmt); - if (data_arg) - { - basic_block entry_succ_bb = single_succ (entry_bb); - gimple_stmt_iterator gsi; - tree arg; - gimple *tgtcopy_stmt = NULL; - tree sender = TREE_VEC_ELT (data_arg, 0); - - for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) - { - gcc_assert (!gsi_end_p (gsi)); - stmt = gsi_stmt (gsi); - if (gimple_code (stmt) != GIMPLE_ASSIGN) - continue; - - if (gimple_num_ops (stmt) == 2) - { - tree arg = gimple_assign_rhs1 (stmt); - - /* We're ignoring the subcode because we're - effectively doing a STRIP_NOPS. */ - - if (TREE_CODE (arg) == ADDR_EXPR - && TREE_OPERAND (arg, 0) == sender) - { - tgtcopy_stmt = stmt; - break; - } - } - } - - gcc_assert (tgtcopy_stmt != NULL); - arg = DECL_ARGUMENTS (child_fn); - - gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); - gsi_remove (&gsi, true); - } - - /* Declare local variables needed in CHILD_CFUN. */ - block = DECL_INITIAL (child_fn); - BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); - /* The gimplifier could record temporaries in the offloading block - rather than in containing function's local_decls chain, - which would mean cgraph missed finalizing them. Do it now. */ - for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) - if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) - varpool_node::finalize_decl (t); - DECL_SAVED_TREE (child_fn) = NULL; - /* We'll create a CFG for child_fn, so no gimple body is needed. */ - gimple_set_body (child_fn, NULL); - TREE_USED (block) = 1; - - /* Reset DECL_CONTEXT on function arguments. */ - for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) - DECL_CONTEXT (t) = child_fn; - - /* Split ENTRY_BB at GIMPLE_*, - so that it can be moved to the child function. */ - gsi = gsi_last_bb (entry_bb); - stmt = gsi_stmt (gsi); - gcc_assert (stmt - && gimple_code (stmt) == gimple_code (entry_stmt)); - e = split_block (entry_bb, stmt); - gsi_remove (&gsi, true); - entry_bb = e->dest; - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ - if (exit_bb) - { - gsi = gsi_last_bb (exit_bb); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - } - - /* Move the offloading region into CHILD_CFUN. */ - - block = gimple_block (entry_stmt); - - new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); - if (exit_bb) - single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; - /* When the OMP expansion process cannot guarantee an up-to-date - loop tree arrange for the child function to fixup loops. */ - if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; - - /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ - num = vec_safe_length (child_cfun->local_decls); - for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) - { - t = (*child_cfun->local_decls)[srcidx]; - if (DECL_CONTEXT (t) == cfun->decl) - continue; - if (srcidx != dstidx) - (*child_cfun->local_decls)[dstidx] = t; - dstidx++; - } - if (dstidx != num) - vec_safe_truncate (child_cfun->local_decls, dstidx); - - /* Inform the callgraph about the new function. */ - child_cfun->curr_properties = cfun->curr_properties; - child_cfun->has_simduid_loops |= cfun->has_simduid_loops; - child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; - cgraph_node *node = cgraph_node::get_create (child_fn); - node->parallelized_function = 1; - cgraph_node::add_new_function (child_fn, true); - - /* Add the new function to the offload table. */ - if (ENABLE_OFFLOADING) - vec_safe_push (offload_funcs, child_fn); - - bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) - && !DECL_ASSEMBLER_NAME_SET_P (child_fn); - - /* Fix the callgraph edges for child_cfun. Those for cfun will be - fixed in a following pass. */ - push_cfun (child_cfun); - if (need_asm) - assign_assembler_name_if_neeeded (child_fn); - cgraph_edge::rebuild_edges (); - - /* Some EH regions might become dead, see PR34608. If - pass_cleanup_cfg isn't the first pass to happen with the - new child, these dead EH edges might cause problems. - Clean them up now. */ - if (flag_exceptions) - { - basic_block bb; - bool changed = false; - - FOR_EACH_BB_FN (bb, cfun) - changed |= gimple_purge_dead_eh_edges (bb); - if (changed) - cleanup_tree_cfg (); - } - if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - verify_loop_structure (); - pop_cfun (); - - if (dump_file && !gimple_in_ssa_p (cfun)) - { - omp_any_child_fn_dumped = true; - dump_function_header (dump_file, child_fn, dump_flags); - dump_function_to_file (child_fn, dump_file, dump_flags); - } - } - - /* Emit a library call to launch the offloading region, or do data - transfers. */ - tree t1, t2, t3, t4, device, cond, depend, c, clauses; - enum built_in_function start_ix; - location_t clause_loc; - unsigned int flags_i = 0; - bool oacc_kernels_p = false; - - switch (gimple_omp_target_kind (entry_stmt)) - { - case GF_OMP_TARGET_KIND_REGION: - start_ix = BUILT_IN_GOMP_TARGET; - break; - case GF_OMP_TARGET_KIND_DATA: - start_ix = BUILT_IN_GOMP_TARGET_DATA; - break; - case GF_OMP_TARGET_KIND_UPDATE: - start_ix = BUILT_IN_GOMP_TARGET_UPDATE; - break; - case GF_OMP_TARGET_KIND_ENTER_DATA: - start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; - break; - case GF_OMP_TARGET_KIND_EXIT_DATA: - start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; - flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; - break; - case GF_OMP_TARGET_KIND_OACC_KERNELS: - oacc_kernels_p = true; - /* FALLTHROUGH */ - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - start_ix = BUILT_IN_GOACC_PARALLEL; - break; - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - start_ix = BUILT_IN_GOACC_DATA_START; - break; - case GF_OMP_TARGET_KIND_OACC_UPDATE: - start_ix = BUILT_IN_GOACC_UPDATE; - break; - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; - break; - case GF_OMP_TARGET_KIND_OACC_DECLARE: - start_ix = BUILT_IN_GOACC_DECLARE; - break; - default: - gcc_unreachable (); - } - - clauses = gimple_omp_target_clauses (entry_stmt); - - /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime - library choose) and there is no conditional. */ - cond = NULL_TREE; - device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); - - c = find_omp_clause (clauses, OMP_CLAUSE_IF); - if (c) - cond = OMP_CLAUSE_IF_EXPR (c); - - c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE); - if (c) - { - /* Even if we pass it to all library function calls, it is currently only - defined/used for the OpenMP target ones. */ - gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET - || start_ix == BUILT_IN_GOMP_TARGET_DATA - || start_ix == BUILT_IN_GOMP_TARGET_UPDATE - || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); - - device = OMP_CLAUSE_DEVICE_ID (c); - clause_loc = OMP_CLAUSE_LOCATION (c); - } - else - clause_loc = gimple_location (entry_stmt); - - c = find_omp_clause (clauses, OMP_CLAUSE_NOWAIT); - if (c) - flags_i |= GOMP_TARGET_FLAG_NOWAIT; - - /* Ensure 'device' is of the correct type. */ - device = fold_convert_loc (clause_loc, integer_type_node, device); - - /* If we found the clause 'if (cond)', build - (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ - if (cond) - { - cond = gimple_boolify (cond); - - basic_block cond_bb, then_bb, else_bb; - edge e; - tree tmp_var; - - tmp_var = create_tmp_var (TREE_TYPE (device)); - if (offloaded) - e = split_block_after_labels (new_bb); - else - { - gsi = gsi_last_bb (new_bb); - gsi_prev (&gsi); - e = split_block (new_bb, gsi_stmt (gsi)); - } - cond_bb = e->src; - new_bb = e->dest; - remove_edge (e); - - then_bb = create_empty_bb (cond_bb); - else_bb = create_empty_bb (then_bb); - set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); - set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); - - stmt = gimple_build_cond_empty (cond); - gsi = gsi_last_bb (cond_bb); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - gsi = gsi_start_bb (then_bb); - stmt = gimple_build_assign (tmp_var, device); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - gsi = gsi_start_bb (else_bb); - stmt = gimple_build_assign (tmp_var, - build_int_cst (integer_type_node, - GOMP_DEVICE_HOST_FALLBACK)); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); - make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); - add_bb_to_loop (then_bb, cond_bb->loop_father); - add_bb_to_loop (else_bb, cond_bb->loop_father); - make_edge (then_bb, new_bb, EDGE_FALLTHRU); - make_edge (else_bb, new_bb, EDGE_FALLTHRU); - - device = tmp_var; - gsi = gsi_last_bb (new_bb); - } - else - { - gsi = gsi_last_bb (new_bb); - device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, - true, GSI_SAME_STMT); - } - - t = gimple_omp_target_data_arg (entry_stmt); - if (t == NULL) - { - t1 = size_zero_node; - t2 = build_zero_cst (ptr_type_node); - t3 = t2; - t4 = t2; - } - else - { - t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); - t1 = size_binop (PLUS_EXPR, t1, size_int (1)); - t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); - t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); - t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); - } - - gimple *g; - bool tagging = false; - /* The maximum number used by any start_ix, without varargs. */ - auto_vec<tree, 11> args; - args.quick_push (device); - if (offloaded) - args.quick_push (build_fold_addr_expr (child_fn)); - args.quick_push (t1); - args.quick_push (t2); - args.quick_push (t3); - args.quick_push (t4); - switch (start_ix) - { - case BUILT_IN_GOACC_DATA_START: - case BUILT_IN_GOACC_DECLARE: - case BUILT_IN_GOMP_TARGET_DATA: - break; - case BUILT_IN_GOMP_TARGET: - case BUILT_IN_GOMP_TARGET_UPDATE: - case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: - args.quick_push (build_int_cst (unsigned_type_node, flags_i)); - c = find_omp_clause (clauses, OMP_CLAUSE_DEPEND); - if (c) - depend = OMP_CLAUSE_DECL (c); - else - depend = build_int_cst (ptr_type_node, 0); - args.quick_push (depend); - if (start_ix == BUILT_IN_GOMP_TARGET) - args.quick_push (get_target_arguments (&gsi, entry_stmt)); - break; - case BUILT_IN_GOACC_PARALLEL: - { - set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args); - tagging = true; - } - /* FALLTHRU */ - case BUILT_IN_GOACC_ENTER_EXIT_DATA: - case BUILT_IN_GOACC_UPDATE: - { - tree t_async = NULL_TREE; - - /* If present, use the value specified by the respective - clause, making sure that is of the correct type. */ - c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC); - if (c) - t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), - integer_type_node, - OMP_CLAUSE_ASYNC_EXPR (c)); - else if (!tagging) - /* Default values for t_async. */ - t_async = fold_convert_loc (gimple_location (entry_stmt), - integer_type_node, - build_int_cst (integer_type_node, - GOMP_ASYNC_SYNC)); - if (tagging && t_async) - { - unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; - - if (TREE_CODE (t_async) == INTEGER_CST) - { - /* See if we can pack the async arg in to the tag's - operand. */ - i_async = TREE_INT_CST_LOW (t_async); - if (i_async < GOMP_LAUNCH_OP_MAX) - t_async = NULL_TREE; - else - i_async = GOMP_LAUNCH_OP_MAX; - } - args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, - i_async)); - } - if (t_async) - args.safe_push (t_async); - - /* Save the argument index, and ... */ - unsigned t_wait_idx = args.length (); - unsigned num_waits = 0; - c = find_omp_clause (clauses, OMP_CLAUSE_WAIT); - if (!tagging || c) - /* ... push a placeholder. */ - args.safe_push (integer_zero_node); - - for (; c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) - { - args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), - integer_type_node, - OMP_CLAUSE_WAIT_EXPR (c))); - num_waits++; - } - - if (!tagging || num_waits) - { - tree len; - - /* Now that we know the number, update the placeholder. */ - if (tagging) - len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); - else - len = build_int_cst (integer_type_node, num_waits); - len = fold_convert_loc (gimple_location (entry_stmt), - unsigned_type_node, len); - args[t_wait_idx] = len; - } - } - break; - default: - gcc_unreachable (); - } - if (tagging) - /* Push terminal marker - zero. */ - args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); - - g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); - gimple_set_location (g, gimple_location (entry_stmt)); - gsi_insert_before (&gsi, g, GSI_SAME_STMT); - if (!offloaded) - { - g = gsi_stmt (gsi); - gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); - gsi_remove (&gsi, true); - } - if (data_region && region->exit) - { - gsi = gsi_last_bb (region->exit); - g = gsi_stmt (gsi); - gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); - gsi_remove (&gsi, true); - } -} - -/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with - iteration variable derived from the thread number. INTRA_GROUP means this - is an expansion of a loop iterating over work-items within a separate - iteration over groups. */ - -static void -grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) -{ - gimple_stmt_iterator gsi; - gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); - gcc_checking_assert (gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_GRID_LOOP); - size_t collapse = gimple_omp_for_collapse (for_stmt); - struct omp_for_data_loop *loops - = XALLOCAVEC (struct omp_for_data_loop, - gimple_omp_for_collapse (for_stmt)); - struct omp_for_data fd; - - remove_edge (BRANCH_EDGE (kfor->entry)); - basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; - - gcc_assert (kfor->cont); - extract_omp_for_data (for_stmt, &fd, loops); - - gsi = gsi_start_bb (body_bb); - - for (size_t dim = 0; dim < collapse; dim++) - { - tree type, itype; - itype = type = TREE_TYPE (fd.loops[dim].v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - tree n1 = fd.loops[dim].n1; - tree step = fd.loops[dim].step; - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - tree threadid; - if (gimple_omp_for_grid_group_iter (for_stmt)) - { - gcc_checking_assert (!intra_group); - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKGROUPID), 1, - build_int_cstu (unsigned_type_node, dim)); - } - else if (intra_group) - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKITEMID), 1, - build_int_cstu (unsigned_type_node, dim)); - else - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKITEMABSID), 1, - build_int_cstu (unsigned_type_node, dim)); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - tree startvar = fd.loops[dim].v; - tree t = fold_build2 (MULT_EXPR, itype, threadid, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (type, t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, true, GSI_SAME_STMT); - gassign *assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - } - /* Remove the omp for statement */ - gsi = gsi_last_bb (kfor->entry); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi = gsi_last_bb (kfor->cont); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); - gsi_remove (&gsi, true); - - /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ - gsi = gsi_last_bb (kfor->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - if (intra_group) - gsi_insert_before (&gsi, build_omp_barrier (NULL_TREE), GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Fixup the much simpler CFG. */ - remove_edge (find_edge (kfor->cont, body_bb)); - - if (kfor->cont != body_bb) - set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); - set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); -} - -/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap - argument_decls. */ - -struct grid_arg_decl_map -{ - tree old_arg; - tree new_arg; -}; - -/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones - pertaining to kernel function. */ - -static tree -grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) -{ - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; - tree t = *tp; - - if (t == adm->old_arg) - *tp = adm->new_arg; - *walk_subtrees = !TYPE_P (t) && !DECL_P (t); - return NULL_TREE; -} - -static void expand_omp (struct omp_region *region); - -/* If TARGET region contains a kernel body for loop, remove its region from the - TARGET and expand it in HSA gridified kernel fashion. */ - -static void -grid_expand_target_grid_body (struct omp_region *target) -{ - if (!hsa_gen_requested_p ()) - return; - - gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); - struct omp_region **pp; - - for (pp = &target->inner; *pp; pp = &(*pp)->next) - if ((*pp)->type == GIMPLE_OMP_GRID_BODY) - break; - - struct omp_region *gpukernel = *pp; - - tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); - if (!gpukernel) - { - /* HSA cannot handle OACC stuff. */ - if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) - return; - gcc_checking_assert (orig_child_fndecl); - gcc_assert (!find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)); - cgraph_node *n = cgraph_node::get (orig_child_fndecl); - - hsa_register_kernel (n); - return; - } - - gcc_assert (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)); - tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry))); - *pp = gpukernel->next; - for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) - if ((*pp)->type == GIMPLE_OMP_FOR) - break; - - struct omp_region *kfor = *pp; - gcc_assert (kfor); - gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); - gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); - *pp = kfor->next; - if (kfor->inner) - { - if (gimple_omp_for_grid_group_iter (for_stmt)) - { - struct omp_region **next_pp; - for (pp = &kfor->inner; *pp; pp = next_pp) - { - next_pp = &(*pp)->next; - if ((*pp)->type != GIMPLE_OMP_FOR) - continue; - gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); - gcc_assert (gimple_omp_for_kind (inner) - == GF_OMP_FOR_KIND_GRID_LOOP); - grid_expand_omp_for_loop (*pp, true); - *pp = (*pp)->next; - next_pp = pp; - } - } - expand_omp (kfor->inner); - } - if (gpukernel->inner) - expand_omp (gpukernel->inner); - - tree kern_fndecl = copy_node (orig_child_fndecl); - DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); - SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); - tree tgtblock = gimple_block (tgt_stmt); - tree fniniblock = make_node (BLOCK); - BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; - BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); - BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); - BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; - DECL_INITIAL (kern_fndecl) = fniniblock; - push_struct_function (kern_fndecl); - cfun->function_end_locus = gimple_location (tgt_stmt); - init_tree_ssa (cfun); - pop_cfun (); - - tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); - gcc_assert (!DECL_CHAIN (old_parm_decl)); - tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); - DECL_CONTEXT (new_parm_decl) = kern_fndecl; - DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; - gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); - DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); - DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; - struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); - kern_cfun->curr_properties = cfun->curr_properties; - - grid_expand_omp_for_loop (kfor, false); - - /* Remove the omp for statement */ - gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); - gsi_remove (&gsi, true); - /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real - return. */ - gsi = gsi_last_bb (gpukernel->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gimple *ret_stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Statements in the first BB in the target construct have been produced by - target lowering and must be copied inside the GPUKERNEL, with the two - exceptions of the first OMP statement and the OMP_DATA assignment - statement. */ - gsi = gsi_start_bb (single_succ (gpukernel->entry)); - tree data_arg = gimple_omp_target_data_arg (tgt_stmt); - tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; - for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); - !gsi_end_p (tsi); gsi_next (&tsi)) - { - gimple *stmt = gsi_stmt (tsi); - if (is_gimple_omp (stmt)) - break; - if (sender - && is_gimple_assign (stmt) - && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR - && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) - continue; - gimple *copy = gimple_copy (stmt); - gsi_insert_before (&gsi, copy, GSI_SAME_STMT); - gimple_set_block (copy, fniniblock); - } - - move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), - gpukernel->exit, inside_block); - - cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); - kcn->mark_force_output (); - cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); - - hsa_register_kernel (kcn, orig_child); - - cgraph_node::add_new_function (kern_fndecl, true); - push_cfun (kern_cfun); - cgraph_edge::rebuild_edges (); - - /* Re-map any mention of the PARM_DECL of the original function to the - PARM_DECL of the new one. - - TODO: It would be great if lowering produced references into the GPU - kernel decl straight away and we did not have to do this. */ - struct grid_arg_decl_map adm; - adm.old_arg = old_parm_decl; - adm.new_arg = new_parm_decl; - basic_block bb; - FOR_EACH_BB_FN (bb, kern_cfun) - { - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = &adm; - walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); - } - } - pop_cfun (); - - return; -} - -/* Expand the parallel region tree rooted at REGION. Expansion - proceeds in depth-first order. Innermost regions are expanded - first. This way, parallel regions that require a new function to - be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any - internal dependencies in their body. */ - -static void -expand_omp (struct omp_region *region) -{ - omp_any_child_fn_dumped = false; - while (region) - { - location_t saved_location; - gimple *inner_stmt = NULL; - - /* First, determine whether this is a combined parallel+workshare - region. */ - if (region->type == GIMPLE_OMP_PARALLEL) - determine_parallel_type (region); - else if (region->type == GIMPLE_OMP_TARGET) - grid_expand_target_grid_body (region); - - if (region->type == GIMPLE_OMP_FOR - && gimple_omp_for_combined_p (last_stmt (region->entry))) - inner_stmt = last_stmt (region->inner->entry); - - if (region->inner) - expand_omp (region->inner); - - saved_location = input_location; - if (gimple_has_location (last_stmt (region->entry))) - input_location = gimple_location (last_stmt (region->entry)); - - switch (region->type) - { - case GIMPLE_OMP_PARALLEL: - case GIMPLE_OMP_TASK: - expand_omp_taskreg (region); - break; - - case GIMPLE_OMP_FOR: - expand_omp_for (region, inner_stmt); - break; - - case GIMPLE_OMP_SECTIONS: - expand_omp_sections (region); - break; - - case GIMPLE_OMP_SECTION: - /* Individual omp sections are handled together with their - parent GIMPLE_OMP_SECTIONS region. */ - break; - - case GIMPLE_OMP_SINGLE: - expand_omp_single (region); - break; - - case GIMPLE_OMP_ORDERED: - { - gomp_ordered *ord_stmt - = as_a <gomp_ordered *> (last_stmt (region->entry)); - if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), - OMP_CLAUSE_DEPEND)) - { - /* We'll expand these when expanding corresponding - worksharing region with ordered(n) clause. */ - gcc_assert (region->outer - && region->outer->type == GIMPLE_OMP_FOR); - region->ord_stmt = ord_stmt; - break; - } - } - /* FALLTHRU */ - case GIMPLE_OMP_MASTER: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_TEAMS: - expand_omp_synch (region); - break; - - case GIMPLE_OMP_ATOMIC_LOAD: - expand_omp_atomic (region); - break; - - case GIMPLE_OMP_TARGET: - expand_omp_target (region); - break; - - default: - gcc_unreachable (); - } - - input_location = saved_location; - region = region->next; - } - if (omp_any_child_fn_dumped) - { - if (dump_file) - dump_function_header (dump_file, current_function_decl, dump_flags); - omp_any_child_fn_dumped = false; - } -} - -/* Helper for build_omp_regions. Scan the dominator tree starting at - block BB. PARENT is the region that contains BB. If SINGLE_TREE is - true, the function ends once a single tree is built (otherwise, whole - forest of OMP constructs may be built). */ - -static void -build_omp_regions_1 (basic_block bb, struct omp_region *parent, - bool single_tree) -{ - gimple_stmt_iterator gsi; - gimple *stmt; - basic_block son; - - gsi = gsi_last_bb (bb); - if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) - { - struct omp_region *region; - enum gimple_code code; - - stmt = gsi_stmt (gsi); - code = gimple_code (stmt); - if (code == GIMPLE_OMP_RETURN) - { - /* STMT is the return point out of region PARENT. Mark it - as the exit point and make PARENT the immediately - enclosing region. */ - gcc_assert (parent); - region = parent; - region->exit = bb; - parent = parent->outer; - } - else if (code == GIMPLE_OMP_ATOMIC_STORE) - { - /* GIMPLE_OMP_ATOMIC_STORE is analoguous to - GIMPLE_OMP_RETURN, but matches with - GIMPLE_OMP_ATOMIC_LOAD. */ - gcc_assert (parent); - gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); - region = parent; - region->exit = bb; - parent = parent->outer; - } - else if (code == GIMPLE_OMP_CONTINUE) - { - gcc_assert (parent); - parent->cont = bb; - } - else if (code == GIMPLE_OMP_SECTIONS_SWITCH) - { - /* GIMPLE_OMP_SECTIONS_SWITCH is part of - GIMPLE_OMP_SECTIONS, and we do nothing for it. */ - } - else - { - region = new_omp_region (bb, code, parent); - /* Otherwise... */ - if (code == GIMPLE_OMP_TARGET) - { - switch (gimple_omp_target_kind (stmt)) - { - case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_DATA: - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - case GF_OMP_TARGET_KIND_OACC_KERNELS: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - break; - case GF_OMP_TARGET_KIND_UPDATE: - case GF_OMP_TARGET_KIND_ENTER_DATA: - case GF_OMP_TARGET_KIND_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_UPDATE: - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_DECLARE: - /* ..., other than for those stand-alone directives... */ - region = NULL; - break; - default: - gcc_unreachable (); - } - } - else if (code == GIMPLE_OMP_ORDERED - && find_omp_clause (gimple_omp_ordered_clauses - (as_a <gomp_ordered *> (stmt)), - OMP_CLAUSE_DEPEND)) - /* #pragma omp ordered depend is also just a stand-alone - directive. */ - region = NULL; - /* ..., this directive becomes the parent for a new region. */ - if (region) - parent = region; - } - } - - if (single_tree && !parent) - return; - - for (son = first_dom_son (CDI_DOMINATORS, bb); - son; - son = next_dom_son (CDI_DOMINATORS, son)) - build_omp_regions_1 (son, parent, single_tree); -} - -/* Builds the tree of OMP regions rooted at ROOT, storing it to - root_omp_region. */ - -static void -build_omp_regions_root (basic_block root) -{ - gcc_assert (root_omp_region == NULL); - build_omp_regions_1 (root, NULL, true); - gcc_assert (root_omp_region != NULL); -} - -/* Expands omp construct (and its subconstructs) starting in HEAD. */ - -void -omp_expand_local (basic_block head) -{ - build_omp_regions_root (head); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "\nOMP region tree\n\n"); - dump_omp_region (dump_file, root_omp_region, 0); - fprintf (dump_file, "\n"); - } - - remove_exit_barriers (root_omp_region); - expand_omp (root_omp_region); - - free_omp_regions (); -} - -/* Scan the CFG and build a tree of OMP regions. Return the root of - the OMP region tree. */ - -static void -build_omp_regions (void) -{ - gcc_assert (root_omp_region == NULL); - calculate_dominance_info (CDI_DOMINATORS); - build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); -} - -/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ - -static unsigned int -execute_expand_omp (void) -{ - build_omp_regions (); - - if (!root_omp_region) - return 0; - - if (dump_file) - { - fprintf (dump_file, "\nOMP region tree\n\n"); - dump_omp_region (dump_file, root_omp_region, 0); - fprintf (dump_file, "\n"); - } - - remove_exit_barriers (root_omp_region); - - expand_omp (root_omp_region); - - if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - verify_loop_structure (); - cleanup_tree_cfg (); - - free_omp_regions (); - - return 0; -} - -/* OMP expansion -- the default pass, run before creation of SSA form. */ - -namespace { - -const pass_data pass_data_expand_omp = -{ - GIMPLE_PASS, /* type */ - "ompexp", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_gimple_any, /* properties_required */ - PROP_gimple_eomp, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_expand_omp : public gimple_opt_pass -{ -public: - pass_expand_omp (gcc::context *ctxt) - : gimple_opt_pass (pass_data_expand_omp, ctxt) - {} - - /* opt_pass methods: */ - virtual unsigned int execute (function *) - { - bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 - || flag_openmp_simd != 0) - && !seen_error ()); - - /* This pass always runs, to provide PROP_gimple_eomp. - But often, there is nothing to do. */ - if (!gate) - return 0; - - return execute_expand_omp (); - } - -}; // class pass_expand_omp - -} // anon namespace - -gimple_opt_pass * -make_pass_expand_omp (gcc::context *ctxt) -{ - return new pass_expand_omp (ctxt); -} - -namespace { - -const pass_data pass_data_expand_omp_ssa = -{ - GIMPLE_PASS, /* type */ - "ompexpssa", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ - PROP_gimple_eomp, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ -}; - -class pass_expand_omp_ssa : public gimple_opt_pass -{ -public: - pass_expand_omp_ssa (gcc::context *ctxt) - : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *fun) - { - return !(fun->curr_properties & PROP_gimple_eomp); - } - virtual unsigned int execute (function *) { return execute_expand_omp (); } - opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } - -}; // class pass_expand_omp_ssa - -} // anon namespace - -gimple_opt_pass * -make_pass_expand_omp_ssa (gcc::context *ctxt) -{ - return new pass_expand_omp_ssa (ctxt); -} /* Routines to lower OMP directives into OMP-GIMPLE. */ @@ -14606,7 +5860,7 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx) new_body = maybe_catch_exception (new_body); t = gimple_build_omp_return - (!!find_omp_clause (gimple_omp_sections_clauses (stmt), + (!!omp_find_clause (gimple_omp_sections_clauses (stmt), OMP_CLAUSE_NOWAIT)); gimple_seq_add_stmt (&new_body, t); maybe_add_implicit_barrier_cancel (ctx, &new_body); @@ -14769,7 +6023,7 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx) bind_body = maybe_catch_exception (bind_body); t = gimple_build_omp_return - (!!find_omp_clause (gimple_omp_single_clauses (single_stmt), + (!!omp_find_clause (gimple_omp_single_clauses (single_stmt), OMP_CLAUSE_NOWAIT)); gimple_seq_add_stmt (&bind_body_tail, t); maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail); @@ -14876,7 +6130,7 @@ lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt, unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt); struct omp_for_data_loop *loops = XALLOCAVEC (struct omp_for_data_loop, len); - extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops); + omp_extract_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops); if (!fd.ordered) return; @@ -14980,7 +6234,7 @@ lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt, { gcc_assert (i < len); - /* extract_omp_for_data has canonicalized the condition. */ + /* omp_extract_for_data has canonicalized the condition. */ gcc_assert (fd.loops[i].cond_code == LT_EXPR || fd.loops[i].cond_code == GT_EXPR); bool forward = fd.loops[i].cond_code == LT_EXPR; @@ -15111,16 +6365,16 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx) gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt); gcall *x; gbind *bind; - bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + bool simd = omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD); /* FIXME: this should check presence of OMP_CLAUSE__SIMT_ on the enclosing loop. */ bool maybe_simt = simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1; - bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + bool threads = omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_THREADS); - if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_DEPEND)) { /* FIXME: This is needs to be moved to the expansion to verify various @@ -15322,47 +6576,6 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx) BLOCK_VARS (block) = gimple_bind_vars (bind); } -/* Return the lastprivate predicate for a given gridified loop described by FD). - TODO: When grid stuff is moved to a separate file, move this too. */ - -static tree -grid_lastprivate_predicate (struct omp_for_data *fd) -{ - /* When dealing with a gridified loop, we need to check up to three collapsed - iteration variables but they are not actually captured in this fd. - Fortunately, we can easily rely on HSA builtins to get this - information. */ - - tree id, size; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP - && gimple_omp_for_grid_intra_group (fd->for_stmt)) - { - id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); - size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); - } - else - { - id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); - size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); - } - tree cond = NULL; - for (int dim = 0; dim < fd->collapse; dim++) - { - tree dim_tree = build_int_cstu (unsigned_type_node, dim); - tree u1 = build_int_cstu (unsigned_type_node, 1); - tree c2 - = build2 (EQ_EXPR, boolean_type_node, - build2 (PLUS_EXPR, unsigned_type_node, - build_call_expr (id, 1, dim_tree), u1), - build_call_expr (size, 1, dim_tree)); - if (cond) - cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); - else - cond = c2; - } - return cond; -} - /* A subroutine of lower_omp_for. Generate code to emit the predicate for a lastprivate clause. Given a loop control predicate of (V cond N2), we gate the clause on (!(V cond N2)). The lowered form @@ -15391,7 +6604,7 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP || gimple_omp_for_grid_phony (fd->for_stmt)) - cond = grid_lastprivate_predicate (fd); + cond = omp_grid_lastprivate_predicate (fd); else { tree n2 = fd->loop.n2; @@ -15415,7 +6628,7 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, else { struct omp_for_data outer_fd; - extract_omp_for_data (gfor, &outer_fd, NULL); + omp_extract_for_data (gfor, &outer_fd, NULL); n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2); } } @@ -15429,16 +6642,16 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, int i; tree taskreg_clauses = gimple_omp_taskreg_clauses (taskreg_ctx->stmt); - tree innerc = find_omp_clause (taskreg_clauses, + tree innerc = omp_find_clause (taskreg_clauses, OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); for (i = 0; i < fd->collapse; i++) { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); if (innerc) n2 = fold_convert (TREE_TYPE (n2), @@ -15516,7 +6729,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (gimple_omp_for_combined_into_p (stmt)) { - extract_omp_for_data (stmt, &fd, NULL); + omp_extract_for_data (stmt, &fd, NULL); fdp = &fd; /* We need two temporaries with fd.loop.v type (istart/iend) @@ -15534,7 +6747,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree clauses = *pc; if (taskreg_for) outerc - = find_omp_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt), + = omp_find_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt), OMP_CLAUSE__LOOPTEMP_); for (i = 0; i < count; i++) { @@ -15543,7 +6756,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) { gcc_assert (outerc); temp = lookup_decl (OMP_CLAUSE_DECL (outerc), ctx->outer); - outerc = find_omp_clause (OMP_CLAUSE_CHAIN (outerc), + outerc = omp_find_clause (OMP_CLAUSE_CHAIN (outerc), OMP_CLAUSE__LOOPTEMP_); } else @@ -15590,7 +6803,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) } /* Once lowered, extract the bounds and clauses. */ - extract_omp_for_data (stmt, &fd, NULL); + omp_extract_for_data (stmt, &fd, NULL); if (is_gimple_omp_oacc (ctx->stmt) && !ctx_in_oacc_kernels_region (ctx)) @@ -15879,7 +7092,7 @@ create_task_copyfn (gomp_task *task_stmt, omp_context *ctx) sf = *tcctx.cb.decl_map->get (sf); src = build_simple_mem_ref_loc (loc, sarg); src = omp_build_component_ref (src, sf); - if (use_pointer_for_field (decl, NULL) || is_reference (decl)) + if (use_pointer_for_field (decl, NULL) || omp_is_reference (decl)) src = build_simple_mem_ref_loc (loc, src); } else @@ -15977,7 +7190,7 @@ lower_depend_clauses (tree *pclauses, gimple_seq *iseq, gimple_seq *oseq) gimple *g; size_t n_in = 0, n_out = 0, idx = 2, i; - clauses = find_omp_clause (*pclauses, OMP_CLAUSE_DEPEND); + clauses = omp_find_clause (*pclauses, OMP_CLAUSE_DEPEND); gcc_assert (clauses); for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND) @@ -16068,7 +7281,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq dep_ilist = NULL; gimple_seq dep_olist = NULL; if (gimple_code (stmt) == GIMPLE_OMP_TASK - && find_omp_clause (clauses, OMP_CLAUSE_DEPEND)) + && omp_find_clause (clauses, OMP_CLAUSE_DEPEND)) { push_gimplify_context (); dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); @@ -16218,7 +7431,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq dep_ilist = NULL; gimple_seq dep_olist = NULL; - if (find_omp_clause (clauses, OMP_CLAUSE_DEPEND)) + if (omp_find_clause (clauses, OMP_CLAUSE_DEPEND)) { push_gimplify_context (); dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); @@ -16348,7 +7561,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (is_reference (new_var)) + if (omp_is_reference (new_var)) { /* Create a local object to hold the instance value. */ @@ -16376,7 +7589,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) goto oacc_firstprivate; map_cnt++; var = OMP_CLAUSE_DECL (c); - if (!is_reference (var) + if (!omp_is_reference (var) && !is_gimple_reg_type (TREE_TYPE (var))) { tree new_var = lookup_decl (var, ctx); @@ -16582,7 +7795,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (!is_reference (var)) + if (!omp_is_reference (var)) { if (is_gimple_reg (var) && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) @@ -16732,7 +7945,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (is_oacc_parallel (ctx)) goto oacc_firstprivate_map; ovar = OMP_CLAUSE_DECL (c); - if (is_reference (ovar)) + if (omp_is_reference (ovar)) talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar))); else talign = DECL_ALIGN_UNIT (ovar); @@ -16740,7 +7953,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) x = build_sender_ref (ovar, ctx); tkind = GOMP_MAP_FIRSTPRIVATE; type = TREE_TYPE (ovar); - if (is_reference (ovar)) + if (omp_is_reference (ovar)) type = TREE_TYPE (type); if ((INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) <= POINTER_SIZE) @@ -16748,7 +7961,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tkind = GOMP_MAP_FIRSTPRIVATE_INT; tree t = var; - if (is_reference (var)) + if (omp_is_reference (var)) t = build_simple_mem_ref (var); else if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) TREE_NO_WARNING (var) = 1; @@ -16757,7 +7970,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) t = fold_convert (TREE_TYPE (x), t); gimplify_assign (x, t, &ilist); } - else if (is_reference (var)) + else if (omp_is_reference (var)) gimplify_assign (x, var, &ilist); else if (is_gimple_reg (var)) { @@ -16776,7 +7989,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } if (tkind == GOMP_MAP_FIRSTPRIVATE_INT) s = size_int (0); - else if (is_reference (ovar)) + else if (omp_is_reference (ovar)) s = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ovar))); else s = TYPE_SIZE_UNIT (TREE_TYPE (ovar)); @@ -16810,7 +8023,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) var = build_fold_addr_expr (var); else { - if (is_reference (ovar)) + if (omp_is_reference (ovar)) { type = TREE_TYPE (type); if (TREE_CODE (type) != ARRAY_TYPE) @@ -16889,13 +8102,13 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (is_gimple_omp_oacc (ctx->stmt)) break; var = OMP_CLAUSE_DECL (c); - if (is_reference (var) + if (omp_is_reference (var) || is_gimple_reg_type (TREE_TYPE (var))) { tree new_var = lookup_decl (var, ctx); tree type; type = TREE_TYPE (var); - if (is_reference (var)) + if (omp_is_reference (var)) type = TREE_TYPE (type); if ((INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) <= POINTER_SIZE) @@ -16907,7 +8120,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) x = fold_convert (type, x); gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue); - if (is_reference (var)) + if (omp_is_reference (var)) { tree v = create_tmp_var_raw (type, get_name (var)); gimple_add_tmp_var (v); @@ -16921,7 +8134,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else { - x = build_receiver_ref (var, !is_reference (var), ctx); + x = build_receiver_ref (var, !omp_is_reference (var), ctx); gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue); gimple_seq_add_stmt (&new_body, @@ -16945,7 +8158,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (is_gimple_omp_oacc (ctx->stmt)) break; var = OMP_CLAUSE_DECL (c); - if (is_reference (var)) + if (omp_is_reference (var)) { location_t clause_loc = OMP_CLAUSE_LOCATION (c); tree new_var = lookup_decl (var, ctx); @@ -17000,7 +8213,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree type = TREE_TYPE (var); tree new_var = lookup_decl (var, ctx); - if (is_reference (var)) + if (omp_is_reference (var)) { type = TREE_TYPE (type); if (TREE_CODE (type) != ARRAY_TYPE) @@ -17089,7 +8302,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) offset)); } else - is_ref = is_reference (var); + is_ref = omp_is_reference (var); if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE) is_ref = false; bool ref_to_array = false; @@ -17169,7 +8382,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq_add_stmt (&new_body, gimple_build_assign (new_pvar, x)); } - else if (is_reference (var) && !is_gimple_omp_oacc (ctx->stmt)) + else if (omp_is_reference (var) && !is_gimple_omp_oacc (ctx->stmt)) { location_t clause_loc = OMP_CLAUSE_LOCATION (c); tree new_var = lookup_decl (var, ctx); @@ -17251,7 +8464,7 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq dlist = NULL; gimple_seq olist = NULL; - tree num_teams = find_omp_clause (gimple_omp_teams_clauses (teams_stmt), + tree num_teams = omp_find_clause (gimple_omp_teams_clauses (teams_stmt), OMP_CLAUSE_NUM_TEAMS); if (num_teams == NULL_TREE) num_teams = build_int_cst (unsigned_type_node, 0); @@ -17261,7 +8474,7 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) num_teams = fold_convert (unsigned_type_node, num_teams); gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue); } - tree thread_limit = find_omp_clause (gimple_omp_teams_clauses (teams_stmt), + tree thread_limit = omp_find_clause (gimple_omp_teams_clauses (teams_stmt), OMP_CLAUSE_THREAD_LIMIT); if (thread_limit == NULL_TREE) thread_limit = build_int_cst (unsigned_type_node, 0); @@ -17620,1333 +8833,6 @@ lower_omp (gimple_seq *body, omp_context *ctx) input_location = saved_location; } -/* Structure describing the basic properties of the loop we ara analyzing - whether it can be gridified and when it is gridified. */ - -struct grid_prop -{ - /* True when we are doing tiling gridification, i.e. when there is a distinct - distribute loop over groups and a loop construct over work-items. False - when distribute and parallel for loops form a combined construct. */ - bool tiling; - /* Location of the target construct for optimization information - messages. */ - location_t target_loc; - /* The collapse clause of the involved loops. Collapse value of all of them - must be the same for gridification to take place. */ - size_t collapse; - /* Group sizes, if requested by the user or NULL if not requested. */ - tree group_sizes[3]; -}; - -#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ - "gridified HSA kernel because " - -/* Return true if STMT is an assignment of a register-type into a local - VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to - any of the trees specifying group sizes there. */ - -static bool -grid_safe_assignment_p (gimple *stmt, grid_prop *grid) -{ - gassign *assign = dyn_cast <gassign *> (stmt); - if (!assign) - return false; - if (gimple_clobber_p (assign)) - return true; - tree lhs = gimple_assign_lhs (assign); - if (!VAR_P (lhs) - || !is_gimple_reg_type (TREE_TYPE (lhs)) - || is_global_var (lhs)) - return false; - if (grid) - for (unsigned i = 0; i < grid->collapse; i++) - if (lhs == grid->group_sizes[i]) - return false; - return true; -} - -/* Return true if all statements in SEQ are assignments to local register-type - variables that do not hold group size information. */ - -static bool -grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) -{ - if (!seq) - return true; - - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) - return false; - return true; -} - -/* Scan statements in SEQ and call itself recursively on any bind. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. If during whole search only assignments to - register-type local variables (that do not overwrite group size information) - and one single OMP statement is encountered, return true, otherwise return - false. RET is where we store any OMP statement encountered. */ - -static bool -grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, - const char *name, gimple **ret) -{ - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (grid_safe_assignment_p (stmt, grid)) - continue; - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind), - grid, name, ret)) - return false; - } - else if (is_gimple_omp (stmt)) - { - if (*ret) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct " - "contains multiple OpenMP constructs\n", - name); - dump_printf_loc (MSG_NOTE, gimple_location (*ret), - "The first OpenMP construct within " - "a parallel\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "The second OpenMP construct within " - "a parallel\n"); - } - return false; - } - *ret = stmt; - } - else - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct contains " - "a complex statement\n", name); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This statement cannot be analyzed for " - "gridification\n"); - } - return false; - } - } - return true; -} - -/* Scan statements in SEQ and make sure that it and any binds in it contain - only assignments to local register-type variables (that do not overwrite - group size information) and one OMP construct. If so, return that - construct, otherwise return NULL. GRID describes hitherto discovered - properties of the loop that is evaluated for possible gridification. If - dumping is enabled and function fails, use NAME to dump a note with the - reason for failure. */ - -static gimple * -grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, - const char *name) -{ - if (!seq) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", - name); - return NULL; - } - - gimple *ret = NULL; - if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) - { - if (!ret && dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct does not contain" - "any other OpenMP construct\n", name); - return ret; - } - else - return NULL; -} - -/* Walker function looking for statements there is no point gridifying (and for - noreturn function calls which we cannot do). Return non-NULL if such a - function is found. */ - -static tree -grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *wi) -{ - *handled_ops_p = false; - gimple *stmt = gsi_stmt (*gsi); - switch (gimple_code (stmt)) - { - case GIMPLE_CALL: - if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) - { - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - } - break; - - /* We may reduce the following list if we find a way to implement the - clauses, but now there is no point trying further. */ - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_TASK: - case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_SECTIONS: - case GIMPLE_OMP_SECTIONS_SWITCH: - case GIMPLE_OMP_TARGET: - case GIMPLE_OMP_ORDERED: - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - default: - break; - } - return NULL; -} - -/* Examine clauses of omp parallel statement PAR and if any prevents - gridification, issue a missed-optimization diagnostics and return false, - otherwise return true. GRID describes hitherto discovered properties of the - loop that is evaluated for possible gridification. */ - -static bool -grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc) -{ - tree clauses = gimple_omp_parallel_clauses (par); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_NUM_THREADS: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "because there is " - "a num_threads clause of the parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, gimple_location (par), - "Parallel construct has a num_threads clause\n"); - } - return false; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "a reduction clause" - "is present\n "); - dump_printf_loc (MSG_NOTE, gimple_location (par), - "Parallel construct has a reduction clause\n"); - } - return false; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - return true; -} - -/* Examine clauses and the body of omp loop statement GFOR and if something - prevents gridification, issue a missed-optimization diagnostics and return - false, otherwise return true. GRID describes hitherto discovered properties - of the loop that is evaluated for possible gridification. */ - -static bool -grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) -{ - if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), - grid)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop " - "loop bounds computation contains a complex " - "statement\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct cannot be analyzed for " - "gridification\n"); - } - return false; - } - - tree clauses = gimple_omp_for_clauses (gfor); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_SCHEDULE: - if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop " - "has a non-automatic schedule clause\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct has a non automatic " - "schedule clause\n"); - } - return false; - } - break; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a reduction " - "clause is present\n "); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct has a reduction schedule " - "clause\n"); - } - return false; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - if (walk_gimple_seq (gimple_omp_body (gfor), - grid_find_ungridifiable_statement, - NULL, &wi)) - { - gimple *bad = (gimple *) wi.info; - if (dump_enabled_p ()) - { - if (is_gimple_call (bad)) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "call to a noreturn function\n"); - else - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "statement %s which cannot be transformed\n", - gimple_code_name[(int) gimple_code (bad)]); - dump_printf_loc (MSG_NOTE, gimple_location (bad), - "This statement cannot be analyzed for " - "gridification\n"); - } - return false; - } - return true; -} - -/* Given distribute omp construct represented by DIST, which in the original - source forms a compound construct with a looping construct, return true if it - can be turned into a gridified HSA kernel. Otherwise return false. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. */ - -static bool -grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) -{ - location_t tloc = grid->target_loc; - gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), - grid, "distribute"); - gomp_parallel *par; - if (!stmt - || !(par = dyn_cast <gomp_parallel *> (stmt)) - || !grid_parallel_clauses_gridifiable (par, tloc)) - return false; - - stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, - "parallel"); - gomp_for *gfor; - if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) - return false; - - if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the inner loop is not " - "a simple for loop\n"); - return false; - } - gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); - - if (!grid_inner_loop_gridifiable_p (gfor, grid)) - return false; - - return true; -} - -/* Given an omp loop statement GFOR, return true if it can participate in - tiling gridification, i.e. in one where the distribute and parallel for - loops do not form a compound statement. GRID describes hitherto discovered - properties of the loop that is evaluated for possible gridification. */ - -static bool -grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) -{ - if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "an inner loop is not " - "a simple for loop\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "This statement is not a simple for loop\n"); - } - return false; - } - - if (!grid_inner_loop_gridifiable_p (gfor, grid)) - return false; - - if (gimple_omp_for_collapse (gfor) != grid->collapse) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "an inner loop does not " - "have use the same collapse clause\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct uses a different collapse clause\n"); - } - return false; - } - - struct omp_for_data fd; - struct omp_for_data_loop *loops - = (struct omp_for_data_loop *)alloca (grid->collapse - * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (gfor, &fd, loops); - for (unsigned i = 0; i < grid->collapse; i++) - { - tree itype, type = TREE_TYPE (fd.loops[i].v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - else - itype = type; - - tree n1 = fold_convert (itype, fd.loops[i].n1); - tree n2 = fold_convert (itype, fd.loops[i].n2); - tree t = build_int_cst (itype, - (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, n1); - if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); - - if (!operand_equal_p (grid->group_sizes[i], t, 0)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute and " - "an internal loop do not agree on tile size\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct does not seem to loop over " - "a tile size\n"); - } - return false; - } - } - return true; -} - -/* Facing a call to FNDECL in the body of a distribute construct, return true - if we can handle it or false if it precludes gridification. */ - -static bool -grid_call_permissible_in_distribute_p (tree fndecl) -{ - if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) - return true; - - const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - if (strstr (name, "omp_") != name) - return false; - - if ((strcmp (name, "omp_get_thread_num") == 0) - || (strcmp (name, "omp_get_num_threads") == 0) - || (strcmp (name, "omp_get_num_teams") == 0) - || (strcmp (name, "omp_get_team_num") == 0) - || (strcmp (name, "omp_get_level") == 0) - || (strcmp (name, "omp_get_active_level") == 0) - || (strcmp (name, "omp_in_parallel") == 0)) - return true; - - return false; -} - -/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body - of a distribute construct that is pointed at by GSI, modify it as necessary - for gridification. If the statement itself got removed, return true. */ - -static bool -grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) -{ - gimple *stmt = gsi_stmt (*gsi); - tree fndecl = gimple_call_fndecl (stmt); - gcc_checking_assert (stmt); - if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) - return false; - - const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - if ((strcmp (name, "omp_get_thread_num") == 0) - || (strcmp (name, "omp_get_level") == 0) - || (strcmp (name, "omp_get_active_level") == 0) - || (strcmp (name, "omp_in_parallel") == 0)) - { - tree lhs = gimple_call_lhs (stmt); - if (lhs) - { - gassign *assign - = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); - gsi_insert_before (gsi, assign, GSI_SAME_STMT); - } - gsi_remove (gsi, true); - return true; - } - - /* The rest of the omp functions can stay as they are, HSA back-end will - handle them correctly. */ - gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) - || (strcmp (name, "omp_get_num_teams") == 0) - || (strcmp (name, "omp_get_team_num") == 0)); - return false; -} - -/* Given a sequence of statements within a distribute omp construct or a - parallel construct, which in the original source does not form a compound - construct with a looping construct, return true if it does not prevent us - from turning it into a gridified HSA kernel. Otherwise return false. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. IN_PARALLEL must be true if seq is within a - parallel construct and flase if it is only within a distribute - construct. */ - -static bool -grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, - bool in_parallel) -{ - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (grid_safe_assignment_p (stmt, grid) - || gimple_code (stmt) == GIMPLE_GOTO - || gimple_code (stmt) == GIMPLE_LABEL - || gimple_code (stmt) == GIMPLE_COND) - continue; - else if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), - grid, in_parallel)) - return false; - continue; - } - else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) - { - if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a try..catch region\n"); - dump_printf_loc (MSG_NOTE, gimple_location (try_stmt), - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), - grid, in_parallel)) - return false; - if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), - grid, in_parallel)) - return false; - continue; - } - else if (is_gimple_call (stmt)) - { - tree fndecl = gimple_call_fndecl (stmt); - if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) - continue; - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a call\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) - { - if (in_parallel) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a parallel " - "construct contains another parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This parallel construct is nested in " - "another one\n"); - } - return false; - } - if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) - || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), - grid, true)) - return false; - } - else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) - { - if (!in_parallel) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a loop " - "construct is not nested within a parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This loop construct is not nested in " - "a parallel construct\n"); - } - return false; - } - if (!grid_gfor_follows_tiling_pattern (gfor, grid)) - return false; - } - else - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a complex statement\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - } - return true; -} - -/* If TARGET follows a pattern that can be turned into a gridified HSA kernel, - return true, otherwise return false. In the case of success, also fill in - GRID with information describing the kernel grid. */ - -static bool -grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) -{ - if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) - return false; - - location_t tloc = gimple_location (target); - grid->target_loc = tloc; - gimple *stmt - = grid_find_single_omp_among_assignments (gimple_omp_body (target), - grid, "target"); - if (!stmt) - return false; - gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); - tree group_size = NULL; - if (!teams) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "it does not have a sole teams " - "construct in it.\n"); - return false; - } - - tree clauses = gimple_omp_teams_clauses (teams); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_NUM_TEAMS: - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams construct " - "contains a num_teams clause\n "); - return false; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "a reduction " - "clause is present\n "); - return false; - - case OMP_CLAUSE_THREAD_LIMIT: - if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) - group_size = OMP_CLAUSE_OPERAND (clauses, 0); - break; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - - stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, - "teams"); - if (!stmt) - return false; - gomp_for *dist = dyn_cast <gomp_for *> (stmt); - if (!dist) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams construct does not " - "have a single distribute construct in it.\n"); - return false; - } - - gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); - - grid->collapse = gimple_omp_for_collapse (dist); - if (grid->collapse > 3) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the distribute construct " - "contains collapse clause with parameter greater " - "than 3\n"); - return false; - } - - struct omp_for_data fd; - struct omp_for_data_loop *dist_loops - = (struct omp_for_data_loop *)alloca (grid->collapse - * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (dist, &fd, dist_loops); - if (fd.chunk_size) - { - if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams " - "thread limit is different from distribute " - "schedule chunk\n"); - return false; - } - group_size = fd.chunk_size; - } - if (group_size && grid->collapse > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "group size cannot be " - "set using thread_limit or schedule clauses " - "when also using a collapse clause greater than 1\n"); - return false; - } - - if (gimple_omp_for_combined_p (dist)) - { - grid->tiling = false; - grid->group_sizes[0] = group_size; - for (unsigned i = 1; i < grid->collapse; i++) - grid->group_sizes[i] = NULL; - return grid_dist_follows_simple_pattern (dist, grid); - } - else - { - grid->tiling = true; - if (group_size) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "group size cannot be set " - "using thread_limit or schedule clauses when " - "distribute and loop constructs do not form " - "one combined construct\n"); - return false; - } - for (unsigned i = 0; i < grid->collapse; i++) - { - if (fd.loops[i].cond_code == GT_EXPR) - grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, - TREE_TYPE (fd.loops[i].step), - fd.loops[i].step); - else - grid->group_sizes[i] = fd.loops[i].step; - } - return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, - false); - } -} - -/* Operand walker, used to remap pre-body declarations according to a hash map - provided in DATA. */ - -static tree -grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) -{ - tree t = *tp; - - if (DECL_P (t) || TYPE_P (t)) - *walk_subtrees = 0; - else - *walk_subtrees = 1; - - if (VAR_P (t)) - { - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; - tree *repl = declmap->get (t); - if (repl) - *tp = *repl; - } - return NULL_TREE; -} - -/* Identifiers of segments into which a particular variable should be places - when gridifying. */ - -enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, - GRID_SEGMENT_GLOBAL}; - -/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial - builtin call into SEQ that will make sure the variable is always considered - address taken. */ - -static void -grid_mark_variable_segment (tree var, enum grid_var_segment segment) -{ - /* Making a non-addressable variables would require that we re-gimplify all - their uses. Fortunately, we do not have to do this because if they are - not addressable, it means they are not used in atomic or parallel - statements and so relaxed GPU consistency rules mean we can just keep them - private. */ - if (!TREE_ADDRESSABLE (var)) - return; - - switch (segment) - { - case GRID_SEGMENT_GROUP: - DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), - NULL, DECL_ATTRIBUTES (var)); - break; - case GRID_SEGMENT_GLOBAL: - DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), - NULL, DECL_ATTRIBUTES (var)); - break; - default: - gcc_unreachable (); - } - - if (!TREE_STATIC (var)) - { - TREE_STATIC (var) = 1; - varpool_node::finalize_decl (var); - } - -} - -/* Copy leading register-type assignments to local variables in SRC to just - before DST, Creating temporaries, adjusting mapping of operands in WI and - remapping operands as necessary. Add any new temporaries to TGT_BIND. - Return the first statement that does not conform to grid_safe_assignment_p - or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all - variables in traversed bind statements so that they are put into the - appropriate segment. */ - -static gimple * -grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, - gbind *tgt_bind, - enum grid_var_segment var_segment, - struct walk_stmt_info *wi) -{ - hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; - gimple_stmt_iterator gsi; - for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - gimple *r = grid_copy_leading_local_assignments - (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); - - if (var_segment != GRID_SEGMENT_PRIVATE) - for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) - grid_mark_variable_segment (var, var_segment); - if (r) - return r; - else - continue; - } - if (!grid_safe_assignment_p (stmt, NULL)) - return stmt; - tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); - tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), - TREE_TYPE (lhs)); - DECL_CONTEXT (repl) = current_function_decl; - gimple_bind_append_vars (tgt_bind, repl); - - declmap->put (lhs, repl); - gassign *copy = as_a <gassign *> (gimple_copy (stmt)); - walk_gimple_op (copy, grid_remap_prebody_decls, wi); - gsi_insert_before (dst, copy, GSI_SAME_STMT); - } - return NULL; -} - -/* Statement walker function to make adjustments to statements within the - gridifed kernel copy. */ - -static tree -grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *) -{ - *handled_ops_p = false; - gimple *stmt = gsi_stmt (*gsi); - if (gimple_code (stmt) == GIMPLE_OMP_FOR - && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) - { - gomp_for *loop = as_a <gomp_for *> (stmt); - tree clauses = gimple_omp_for_clauses (loop); - tree cl = find_omp_clause (clauses, OMP_CLAUSE_SAFELEN); - if (cl) - OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; - else - { - tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); - OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; - OMP_CLAUSE_CHAIN (c) = clauses; - gimple_omp_for_set_clauses (loop, c); - } - } - return NULL_TREE; -} - -/* Given a PARLOOP that is a normal for looping construct but also a part of a - combined construct with a simd loop, eliminate the simd loop. */ - -static void -grid_eliminate_combined_simd_part (gomp_for *parloop) -{ - struct walk_stmt_info wi; - - memset (&wi, 0, sizeof (wi)); - wi.val_only = true; - enum gf_mask msk = GF_OMP_FOR_SIMD; - wi.info = (void *) &msk; - walk_gimple_seq (gimple_omp_body (parloop), find_combined_for, NULL, &wi); - gimple *stmt = (gimple *) wi.info; - /* We expect that the SIMD id the only statement in the parallel loop. */ - gcc_assert (stmt - && gimple_code (stmt) == GIMPLE_OMP_FOR - && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) - && gimple_omp_for_combined_into_p (stmt) - && !gimple_omp_for_combined_p (stmt)); - gomp_for *simd = as_a <gomp_for *> (stmt); - - /* Copy over the iteration properties because the body refers to the index in - the bottmom-most loop. */ - unsigned i, collapse = gimple_omp_for_collapse (parloop); - gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); - for (i = 0; i < collapse; i++) - { - gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); - gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); - gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); - gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); - } - - tree *tgt= gimple_omp_for_clauses_ptr (parloop); - while (*tgt) - tgt = &OMP_CLAUSE_CHAIN (*tgt); - - /* Copy over all clauses, except for linaer clauses, which are turned into - private clauses, and all other simd-specificl clauses, which are - ignored. */ - tree *pc = gimple_omp_for_clauses_ptr (simd); - while (*pc) - { - tree c = *pc; - switch (TREE_CODE (c)) - { - case OMP_CLAUSE_LINEAR: - { - tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); - OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); - OMP_CLAUSE_CHAIN (priv) = NULL; - *tgt = priv; - tgt = &OMP_CLAUSE_CHAIN (priv); - pc = &OMP_CLAUSE_CHAIN (c); - break; - } - - case OMP_CLAUSE_SAFELEN: - case OMP_CLAUSE_SIMDLEN: - case OMP_CLAUSE_ALIGNED: - pc = &OMP_CLAUSE_CHAIN (c); - break; - - default: - *pc = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = NULL; - *tgt = c; - tgt = &OMP_CLAUSE_CHAIN(c); - break; - } - } - - /* Finally, throw away the simd and mark the parallel loop as not - combined. */ - gimple_omp_set_body (parloop, gimple_omp_body (simd)); - gimple_omp_for_set_combined_p (parloop, false); -} - -/* Statement walker function marking all parallels as grid_phony and loops as - grid ones representing threads of a particular thread group. */ - -static tree -grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *wi_in) -{ - *handled_ops_p = false; - if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) - { - *handled_ops_p = true; - gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); - gimple_omp_for_set_grid_intra_group (loop, true); - if (gimple_omp_for_combined_p (loop)) - grid_eliminate_combined_simd_part (loop); - - struct walk_stmt_info body_wi; - memset (&body_wi, 0, sizeof (body_wi)); - walk_gimple_seq_mod (gimple_omp_body_ptr (loop), - grid_process_grid_body, NULL, &body_wi); - - gbind *bind = (gbind *) wi_in->info; - tree c; - for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) - { - push_gimplify_context (); - tree ov = OMP_CLAUSE_DECL (c); - tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), - TREE_TYPE (ov)); - - grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); - DECL_CONTEXT (gv) = current_function_decl; - gimple_bind_append_vars (bind, gv); - tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); - gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); - x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); - gimple_seq l = NULL; - gimplify_and_add (x, &l); - gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); - pop_gimplify_context (bind); - } - } - return NULL_TREE; -} - -/* Statement walker function marking all parallels as grid_phony and loops as - grid ones representing threads of a particular thread group. */ - -static tree -grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *wi_in) -{ - *handled_ops_p = false; - wi_in->removed_stmt = false; - gimple *stmt = gsi_stmt (*gsi); - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) - grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); - } - else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) - { - *handled_ops_p = true; - gimple_omp_parallel_set_grid_phony (parallel, true); - - gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); - gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); - gimple_seq s = NULL; - gimple_seq_add_stmt (&s, new_bind); - gimple_omp_set_body (parallel, s); - - struct walk_stmt_info wi_par; - memset (&wi_par, 0, sizeof (wi_par)); - wi_par.info = new_bind; - walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), - grid_mark_tiling_loops, NULL, &wi_par); - } - else if (is_a <gcall *> (stmt)) - wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); - return NULL_TREE; -} - -/* Given freshly copied top level kernel SEQ, identify the individual OMP - components, mark them as part of kernel, copy assignment leading to them - just before DST, remapping them using WI and adding new temporaries to - TGT_BIND, and and return the loop that will be used for kernel dispatch. */ - -static gomp_for * -grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, - gimple_stmt_iterator *dst, - gbind *tgt_bind, struct walk_stmt_info *wi) -{ - gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, - GRID_SEGMENT_GLOBAL, wi); - gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); - gcc_assert (teams); - gimple_omp_teams_set_grid_phony (teams, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, - tgt_bind, GRID_SEGMENT_GLOBAL, wi); - gcc_checking_assert (stmt); - gomp_for *dist = dyn_cast <gomp_for *> (stmt); - gcc_assert (dist); - gimple_seq prebody = gimple_omp_for_pre_body (dist); - if (prebody) - grid_copy_leading_local_assignments (prebody, dst, tgt_bind, - GRID_SEGMENT_GROUP, wi); - - if (grid->tiling) - { - gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); - gimple_omp_for_set_grid_group_iter (dist, true); - - struct walk_stmt_info wi_tiled; - memset (&wi_tiled, 0, sizeof (wi_tiled)); - walk_gimple_seq_mod (gimple_omp_body_ptr (dist), - grid_mark_tiling_parallels_and_loops, NULL, - &wi_tiled); - return dist; - } - else - { - gimple_omp_for_set_grid_phony (dist, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, - tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - gcc_checking_assert (stmt); - gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); - gimple_omp_parallel_set_grid_phony (parallel, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), - dst, tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - gomp_for *inner_loop = as_a <gomp_for *> (stmt); - gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); - prebody = gimple_omp_for_pre_body (inner_loop); - if (prebody) - grid_copy_leading_local_assignments (prebody, dst, tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - - if (gimple_omp_for_combined_p (inner_loop)) - grid_eliminate_combined_simd_part (inner_loop); - struct walk_stmt_info body_wi;; - memset (&body_wi, 0, sizeof (body_wi)); - walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), - grid_process_grid_body, NULL, &body_wi); - - return inner_loop; - } -} - -/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, - create a GPU kernel for it. GSI must point to the same statement, TGT_BIND - is the bind into which temporaries inserted before TARGET should be - added. */ - -static void -grid_attempt_target_gridification (gomp_target *target, - gimple_stmt_iterator *gsi, - gbind *tgt_bind) -{ - /* removed group_size */ - grid_prop grid; - memset (&grid, 0, sizeof (grid)); - if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) - return; - - location_t loc = gimple_location (target); - if (dump_enabled_p ()) - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - "Target construct will be turned into a gridified HSA " - "kernel\n"); - - /* Copy target body to a GPUKERNEL construct: */ - gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals - (gimple_omp_body (target)); - - hash_map<tree, tree> *declmap = new hash_map<tree, tree>; - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (struct walk_stmt_info)); - wi.info = declmap; - - /* Copy assignments in between OMP statements before target, mark OMP - statements within copy appropriately. */ - gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, - tgt_bind, &wi); - - gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); - gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); - tree new_block = gimple_bind_block (new_bind); - tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); - BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); - BLOCK_SUBBLOCKS (enc_block) = new_block; - BLOCK_SUPERCONTEXT (new_block) = enc_block; - gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); - gimple_seq_add_stmt - (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), - gpukernel); - - for (size_t i = 0; i < grid.collapse; i++) - walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); - push_gimplify_context (); - for (size_t i = 0; i < grid.collapse; i++) - { - tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i)); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - else - itype = type; - - enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); - tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); - walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); - tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); - walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); - adjust_for_condition (loc, &cond_code, &n2); - n1 = fold_convert (itype, n1); - n2 = fold_convert (itype, n2); - - tree step - = get_omp_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); - - tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, n1); - if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - if (grid.tiling) - { - if (cond_code == GT_EXPR) - step = fold_build1 (NEGATE_EXPR, itype, step); - t = fold_build2 (MULT_EXPR, itype, t, step); - } - - tree gs = fold_convert (uint32_type_node, t); - gimple_seq tmpseq = NULL; - gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); - if (!gimple_seq_empty_p (tmpseq)) - gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); - - tree ws; - if (grid.group_sizes[i]) - { - ws = fold_convert (uint32_type_node, grid.group_sizes[i]); - tmpseq = NULL; - gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); - if (!gimple_seq_empty_p (tmpseq)) - gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); - } - else - ws = build_zero_cst (uint32_type_node); - - tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); - OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; - OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; - OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; - OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); - gimple_omp_target_set_clauses (target, c); - } - pop_gimplify_context (tgt_bind); - delete declmap; - return; -} - -/* Walker function doing all the work for create_target_kernels. */ - -static tree -grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *incoming) -{ - *handled_ops_p = false; - - gimple *stmt = gsi_stmt (*gsi); - gomp_target *target = dyn_cast <gomp_target *> (stmt); - if (target) - { - gbind *tgt_bind = (gbind *) incoming->info; - gcc_checking_assert (tgt_bind); - grid_attempt_target_gridification (target, gsi, tgt_bind); - return NULL_TREE; - } - gbind *bind = dyn_cast <gbind *> (stmt); - if (bind) - { - *handled_ops_p = true; - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = bind; - walk_gimple_seq_mod (gimple_bind_body_ptr (bind), - grid_gridify_all_targets_stmt, NULL, &wi); - } - return NULL_TREE; -} - -/* Attempt to gridify all target constructs in BODY_P. All such targets will - have their bodies duplicated, with the new copy being put into a - gimple_omp_grid_body statement. All kernel-related construct within the - grid_body will be marked with phony flags or kernel kinds. Moreover, some - re-structuring is often needed, such as copying pre-bodies before the target - construct so that kernel grid sizes can be computed. */ - -static void -grid_gridify_all_targets (gimple_seq *body_p) -{ - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); -} - - /* Main entry point. */ static unsigned int @@ -18968,7 +8854,7 @@ execute_lower_omp (void) body = gimple_body (current_function_decl); if (hsa_gen_requested_p ()) - grid_gridify_all_targets (&body); + omp_grid_gridify_all_targets (&body); scan_omp (&body, NULL); gcc_assert (taskreg_nesting_level == 0); @@ -19285,163 +9171,6 @@ diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, return NULL_TREE; } -/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant - GIMPLE_* codes. */ -bool -make_gimple_omp_edges (basic_block bb, struct omp_region **region, - int *region_idx) -{ - gimple *last = last_stmt (bb); - enum gimple_code code = gimple_code (last); - struct omp_region *cur_region = *region; - bool fallthru = false; - - switch (code) - { - case GIMPLE_OMP_PARALLEL: - case GIMPLE_OMP_TASK: - case GIMPLE_OMP_FOR: - case GIMPLE_OMP_SINGLE: - case GIMPLE_OMP_TEAMS: - case GIMPLE_OMP_MASTER: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_GRID_BODY: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - break; - - case GIMPLE_OMP_ORDERED: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - if (find_omp_clause (gimple_omp_ordered_clauses - (as_a <gomp_ordered *> (last)), - OMP_CLAUSE_DEPEND)) - cur_region = cur_region->outer; - break; - - case GIMPLE_OMP_TARGET: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - switch (gimple_omp_target_kind (last)) - { - case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_DATA: - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - case GF_OMP_TARGET_KIND_OACC_KERNELS: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - break; - case GF_OMP_TARGET_KIND_UPDATE: - case GF_OMP_TARGET_KIND_ENTER_DATA: - case GF_OMP_TARGET_KIND_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_UPDATE: - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_DECLARE: - cur_region = cur_region->outer; - break; - default: - gcc_unreachable (); - } - break; - - case GIMPLE_OMP_SECTIONS: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - break; - - case GIMPLE_OMP_SECTIONS_SWITCH: - fallthru = false; - break; - - case GIMPLE_OMP_ATOMIC_LOAD: - case GIMPLE_OMP_ATOMIC_STORE: - fallthru = true; - break; - - case GIMPLE_OMP_RETURN: - /* In the case of a GIMPLE_OMP_SECTION, the edge will go - somewhere other than the next block. This will be - created later. */ - cur_region->exit = bb; - if (cur_region->type == GIMPLE_OMP_TASK) - /* Add an edge corresponding to not scheduling the task - immediately. */ - make_edge (cur_region->entry, bb, EDGE_ABNORMAL); - fallthru = cur_region->type != GIMPLE_OMP_SECTION; - cur_region = cur_region->outer; - break; - - case GIMPLE_OMP_CONTINUE: - cur_region->cont = bb; - switch (cur_region->type) - { - case GIMPLE_OMP_FOR: - /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE - succs edges as abnormal to prevent splitting - them. */ - single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; - /* Make the loopback edge. */ - make_edge (bb, single_succ (cur_region->entry), - EDGE_ABNORMAL); - - /* Create an edge from GIMPLE_OMP_FOR to exit, which - corresponds to the case that the body of the loop - is not executed at all. */ - make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); - make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); - fallthru = false; - break; - - case GIMPLE_OMP_SECTIONS: - /* Wire up the edges into and out of the nested sections. */ - { - basic_block switch_bb = single_succ (cur_region->entry); - - struct omp_region *i; - for (i = cur_region->inner; i ; i = i->next) - { - gcc_assert (i->type == GIMPLE_OMP_SECTION); - make_edge (switch_bb, i->entry, 0); - make_edge (i->exit, bb, EDGE_FALLTHRU); - } - - /* Make the loopback edge to the block with - GIMPLE_OMP_SECTIONS_SWITCH. */ - make_edge (bb, switch_bb, 0); - - /* Make the edge from the switch to exit. */ - make_edge (switch_bb, bb->next_bb, 0); - fallthru = false; - } - break; - - case GIMPLE_OMP_TASK: - fallthru = true; - break; - - default: - gcc_unreachable (); - } - break; - - default: - gcc_unreachable (); - } - - if (*region != cur_region) - { - *region = cur_region; - if (cur_region) - *region_idx = cur_region->entry->index; - else - *region_idx = 0; - } - - return fallthru; -} - static unsigned int diagnose_omp_structured_block_errors (void) { @@ -19507,1623 +9236,5 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt) return new pass_diagnose_omp_blocks (ctxt); } -/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and - adds their addresses and sizes to constructor-vector V_CTOR. */ -static void -add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls, - vec<constructor_elt, va_gc> *v_ctor) -{ - unsigned len = vec_safe_length (v_decls); - for (unsigned i = 0; i < len; i++) - { - tree it = (*v_decls)[i]; - bool is_var = VAR_P (it); - bool is_link_var - = is_var -#ifdef ACCEL_COMPILER - && DECL_HAS_VALUE_EXPR_P (it) -#endif - && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it)); - - tree size = NULL_TREE; - if (is_var) - size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it)); - - tree addr; - if (!is_link_var) - addr = build_fold_addr_expr (it); - else - { -#ifdef ACCEL_COMPILER - /* For "omp declare target link" vars add address of the pointer to - the target table, instead of address of the var. */ - tree value_expr = DECL_VALUE_EXPR (it); - tree link_ptr_decl = TREE_OPERAND (value_expr, 0); - varpool_node::finalize_decl (link_ptr_decl); - addr = build_fold_addr_expr (link_ptr_decl); -#else - addr = build_fold_addr_expr (it); -#endif - - /* Most significant bit of the size marks "omp declare target link" - vars in host and target tables. */ - unsigned HOST_WIDE_INT isize = tree_to_uhwi (size); - isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node) - * BITS_PER_UNIT - 1); - size = wide_int_to_tree (const_ptr_type_node, isize); - } - - CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr); - if (is_var) - CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size); - } -} - -/* Create new symbols containing (address, size) pairs for global variables, - marked with "omp declare target" attribute, as well as addresses for the - functions, which are outlined offloading regions. */ -void -omp_finish_file (void) -{ - unsigned num_funcs = vec_safe_length (offload_funcs); - unsigned num_vars = vec_safe_length (offload_vars); - - if (num_funcs == 0 && num_vars == 0) - return; - - if (targetm_common.have_named_sections) - { - vec<constructor_elt, va_gc> *v_f, *v_v; - vec_alloc (v_f, num_funcs); - vec_alloc (v_v, num_vars * 2); - - add_decls_addresses_to_decl_constructor (offload_funcs, v_f); - add_decls_addresses_to_decl_constructor (offload_vars, v_v); - - tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node, - num_vars * 2); - tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node, - num_funcs); - SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node)); - SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node)); - tree ctor_v = build_constructor (vars_decl_type, v_v); - tree ctor_f = build_constructor (funcs_decl_type, v_f); - TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1; - TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1; - tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (".offload_func_table"), - funcs_decl_type); - tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (".offload_var_table"), - vars_decl_type); - TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1; - /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node), - otherwise a joint table in a binary will contain padding between - tables from multiple object files. */ - DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1; - SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type)); - SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type)); - DECL_INITIAL (funcs_decl) = ctor_f; - DECL_INITIAL (vars_decl) = ctor_v; - set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME); - set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME); - - varpool_node::finalize_decl (vars_decl); - varpool_node::finalize_decl (funcs_decl); - } - else - { - for (unsigned i = 0; i < num_funcs; i++) - { - tree it = (*offload_funcs)[i]; - targetm.record_offload_symbol (it); - } - for (unsigned i = 0; i < num_vars; i++) - { - tree it = (*offload_vars)[i]; - targetm.record_offload_symbol (it); - } - } -} - -/* Find the number of threads (POS = false), or thread number (POS = - true) for an OpenACC region partitioned as MASK. Setup code - required for the calculation is added to SEQ. */ - -static tree -oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) -{ - tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1); - unsigned ix; - - /* Start at gang level, and examine relevant dimension indices. */ - for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) - if (GOMP_DIM_MASK (ix) & mask) - { - tree arg = build_int_cst (unsigned_type_node, ix); - - if (res) - { - /* We had an outer index, so scale that by the size of - this dimension. */ - tree n = create_tmp_var (integer_type_node); - gimple *call - = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg); - - gimple_call_set_lhs (call, n); - gimple_seq_add_stmt (seq, call); - res = fold_build2 (MULT_EXPR, integer_type_node, res, n); - } - if (pos) - { - /* Determine index in this dimension. */ - tree id = create_tmp_var (integer_type_node); - gimple *call = gimple_build_call_internal - (IFN_GOACC_DIM_POS, 1, arg); - - gimple_call_set_lhs (call, id); - gimple_seq_add_stmt (seq, call); - if (res) - res = fold_build2 (PLUS_EXPR, integer_type_node, res, id); - else - res = id; - } - } - - if (res == NULL_TREE) - res = integer_zero_node; - - return res; -} - -/* Transform IFN_GOACC_LOOP calls to actual code. See - expand_oacc_for for where these are generated. At the vector - level, we stride loops, such that each member of a warp will - operate on adjacent iterations. At the worker and gang level, - each gang/warp executes a set of contiguous iterations. Chunking - can override this such that each iteration engine executes a - contiguous chunk, and then moves on to stride to the next chunk. */ - -static void -oacc_xform_loop (gcall *call) -{ - gimple_stmt_iterator gsi = gsi_for_stmt (call); - enum ifn_goacc_loop_kind code - = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0)); - tree dir = gimple_call_arg (call, 1); - tree range = gimple_call_arg (call, 2); - tree step = gimple_call_arg (call, 3); - tree chunk_size = NULL_TREE; - unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5)); - tree lhs = gimple_call_lhs (call); - tree type = TREE_TYPE (lhs); - tree diff_type = TREE_TYPE (range); - tree r = NULL_TREE; - gimple_seq seq = NULL; - bool chunking = false, striding = true; - unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning - unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any) - -#ifdef ACCEL_COMPILER - chunk_size = gimple_call_arg (call, 4); - if (integer_minus_onep (chunk_size) /* Force static allocation. */ - || integer_zerop (chunk_size)) /* Default (also static). */ - { - /* If we're at the gang level, we want each to execute a - contiguous run of iterations. Otherwise we want each element - to stride. */ - striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG)); - chunking = false; - } - else - { - /* Chunk of size 1 is striding. */ - striding = integer_onep (chunk_size); - chunking = !striding; - } -#endif - - /* striding=true, chunking=true - -> invalid. - striding=true, chunking=false - -> chunks=1 - striding=false,chunking=true - -> chunks=ceil (range/(chunksize*threads*step)) - striding=false,chunking=false - -> chunk_size=ceil(range/(threads*step)),chunks=1 */ - push_gimplify_context (true); - - switch (code) - { - default: gcc_unreachable (); - - case IFN_GOACC_LOOP_CHUNKS: - if (!chunking) - r = build_int_cst (type, 1); - else - { - /* chunk_max - = (range - dir) / (chunks * step * num_threads) + dir */ - tree per = oacc_thread_numbers (false, mask, &seq); - per = fold_convert (type, per); - chunk_size = fold_convert (type, chunk_size); - per = fold_build2 (MULT_EXPR, type, per, chunk_size); - per = fold_build2 (MULT_EXPR, type, per, step); - r = build2 (MINUS_EXPR, type, range, dir); - r = build2 (PLUS_EXPR, type, r, per); - r = build2 (TRUNC_DIV_EXPR, type, r, per); - } - break; - - case IFN_GOACC_LOOP_STEP: - { - /* If striding, step by the entire compute volume, otherwise - step by the inner volume. */ - unsigned volume = striding ? mask : inner_mask; - - r = oacc_thread_numbers (false, volume, &seq); - r = build2 (MULT_EXPR, type, fold_convert (type, r), step); - } - break; - - case IFN_GOACC_LOOP_OFFSET: - if (striding) - { - r = oacc_thread_numbers (true, mask, &seq); - r = fold_convert (diff_type, r); - } - else - { - tree inner_size = oacc_thread_numbers (false, inner_mask, &seq); - tree outer_size = oacc_thread_numbers (false, outer_mask, &seq); - tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size), - inner_size, outer_size); - - volume = fold_convert (diff_type, volume); - if (chunking) - chunk_size = fold_convert (diff_type, chunk_size); - else - { - tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); - - chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); - chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); - chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); - } - - tree span = build2 (MULT_EXPR, diff_type, chunk_size, - fold_convert (diff_type, inner_size)); - r = oacc_thread_numbers (true, outer_mask, &seq); - r = fold_convert (diff_type, r); - r = build2 (MULT_EXPR, diff_type, r, span); - - tree inner = oacc_thread_numbers (true, inner_mask, &seq); - inner = fold_convert (diff_type, inner); - r = fold_build2 (PLUS_EXPR, diff_type, r, inner); - - if (chunking) - { - tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6)); - tree per - = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size); - per = build2 (MULT_EXPR, diff_type, per, chunk); - - r = build2 (PLUS_EXPR, diff_type, r, per); - } - } - r = fold_build2 (MULT_EXPR, diff_type, r, step); - if (type != diff_type) - r = fold_convert (type, r); - break; - - case IFN_GOACC_LOOP_BOUND: - if (striding) - r = range; - else - { - tree inner_size = oacc_thread_numbers (false, inner_mask, &seq); - tree outer_size = oacc_thread_numbers (false, outer_mask, &seq); - tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size), - inner_size, outer_size); - - volume = fold_convert (diff_type, volume); - if (chunking) - chunk_size = fold_convert (diff_type, chunk_size); - else - { - tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); - - chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); - chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); - chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); - } - - tree span = build2 (MULT_EXPR, diff_type, chunk_size, - fold_convert (diff_type, inner_size)); - - r = fold_build2 (MULT_EXPR, diff_type, span, step); - - tree offset = gimple_call_arg (call, 6); - r = build2 (PLUS_EXPR, diff_type, r, - fold_convert (diff_type, offset)); - r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, - diff_type, r, range); - } - if (diff_type != type) - r = fold_convert (type, r); - break; - } - - gimplify_assign (lhs, r, &seq); - - pop_gimplify_context (NULL); - - gsi_replace_with_seq (&gsi, seq, true); -} - -/* Default partitioned and minimum partitioned dimensions. */ - -static int oacc_default_dims[GOMP_DIM_MAX]; -static int oacc_min_dims[GOMP_DIM_MAX]; - -/* Parse the default dimension parameter. This is a set of - :-separated optional compute dimensions. Each specified dimension - is a positive integer. When device type support is added, it is - planned to be a comma separated list of such compute dimensions, - with all but the first prefixed by the colon-terminated device - type. */ - -static void -oacc_parse_default_dims (const char *dims) -{ - int ix; - - for (ix = GOMP_DIM_MAX; ix--;) - { - oacc_default_dims[ix] = -1; - oacc_min_dims[ix] = 1; - } - -#ifndef ACCEL_COMPILER - /* Cannot be overridden on the host. */ - dims = NULL; -#endif - if (dims) - { - const char *pos = dims; - - for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++) - { - if (ix) - { - if (*pos != ':') - goto malformed; - pos++; - } - - if (*pos != ':') - { - long val; - const char *eptr; - - errno = 0; - val = strtol (pos, CONST_CAST (char **, &eptr), 10); - if (errno || val <= 0 || (int) val != val) - goto malformed; - pos = eptr; - oacc_default_dims[ix] = (int) val; - } - } - if (*pos) - { - malformed: - error_at (UNKNOWN_LOCATION, - "-fopenacc-dim operand is malformed at '%s'", pos); - } - } - - /* Allow the backend to validate the dimensions. */ - targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1); - targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2); -} - -/* Validate and update the dimensions for offloaded FN. ATTRS is the - raw attribute. DIMS is an array of dimensions, which is filled in. - LEVEL is the partitioning level of a routine, or -1 for an offload - region itself. USED is the mask of partitioned execution in the - function. */ - -static void -oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used) -{ - tree purpose[GOMP_DIM_MAX]; - unsigned ix; - tree pos = TREE_VALUE (attrs); - bool is_kernel = oacc_fn_attrib_kernels_p (attrs); - - /* Make sure the attribute creator attached the dimension - information. */ - gcc_assert (pos); - - for (ix = 0; ix != GOMP_DIM_MAX; ix++) - { - purpose[ix] = TREE_PURPOSE (pos); - tree val = TREE_VALUE (pos); - dims[ix] = val ? TREE_INT_CST_LOW (val) : -1; - pos = TREE_CHAIN (pos); - } - - bool changed = targetm.goacc.validate_dims (fn, dims, level); - - /* Default anything left to 1 or a partitioned default. */ - for (ix = 0; ix != GOMP_DIM_MAX; ix++) - if (dims[ix] < 0) - { - /* The OpenACC spec says 'If the [num_gangs] clause is not - specified, an implementation-defined default will be used; - the default may depend on the code within the construct.' - (2.5.6). Thus an implementation is free to choose - non-unity default for a parallel region that doesn't have - any gang-partitioned loops. However, it appears that there - is a sufficient body of user code that expects non-gang - partitioned regions to not execute in gang-redundant mode. - So we (a) don't warn about the non-portability and (b) pick - the minimum permissible dimension size when there is no - partitioned execution. Otherwise we pick the global - default for the dimension, which the user can control. The - same wording and logic applies to num_workers and - vector_length, however the worker- or vector- single - execution doesn't have the same impact as gang-redundant - execution. (If the minimum gang-level partioning is not 1, - the target is probably too confusing.) */ - dims[ix] = (used & GOMP_DIM_MASK (ix) - ? oacc_default_dims[ix] : oacc_min_dims[ix]); - changed = true; - } - - if (changed) - { - /* Replace the attribute with new values. */ - pos = NULL_TREE; - for (ix = GOMP_DIM_MAX; ix--;) - { - pos = tree_cons (purpose[ix], - build_int_cst (integer_type_node, dims[ix]), - pos); - if (is_kernel) - TREE_PUBLIC (pos) = 1; - } - replace_oacc_fn_attrib (fn, pos); - } -} - -/* Create an empty OpenACC loop structure at LOC. */ - -static oacc_loop * -new_oacc_loop_raw (oacc_loop *parent, location_t loc) -{ - oacc_loop *loop = XCNEW (oacc_loop); - - loop->parent = parent; - loop->child = loop->sibling = NULL; - - if (parent) - { - loop->sibling = parent->child; - parent->child = loop; - } - - loop->loc = loc; - loop->marker = NULL; - memset (loop->heads, 0, sizeof (loop->heads)); - memset (loop->tails, 0, sizeof (loop->tails)); - loop->routine = NULL_TREE; - - loop->mask = loop->flags = loop->inner = 0; - loop->ifns = 0; - loop->chunk_size = 0; - loop->head_end = NULL; - - return loop; -} - -/* Create an outermost, dummy OpenACC loop for offloaded function - DECL. */ - -static oacc_loop * -new_oacc_loop_outer (tree decl) -{ - return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl)); -} - -/* Start a new OpenACC loop structure beginning at head marker HEAD. - Link into PARENT loop. Return the new loop. */ - -static oacc_loop * -new_oacc_loop (oacc_loop *parent, gcall *marker) -{ - oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker)); - - loop->marker = marker; - - /* TODO: This is where device_type flattening would occur for the loop - flags. */ - - loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3)); - - tree chunk_size = integer_zero_node; - if (loop->flags & OLF_GANG_STATIC) - chunk_size = gimple_call_arg (marker, 4); - loop->chunk_size = chunk_size; - - return loop; -} - -/* Create a dummy loop encompassing a call to a openACC routine. - Extract the routine's partitioning requirements. */ - -static void -new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs) -{ - oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call)); - int level = oacc_fn_attrib_level (attrs); - - gcc_assert (level >= 0); - - loop->marker = call; - loop->routine = decl; - loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) - ^ (GOMP_DIM_MASK (level) - 1)); -} - -/* Finish off the current OpenACC loop ending at tail marker TAIL. - Return the parent loop. */ - -static oacc_loop * -finish_oacc_loop (oacc_loop *loop) -{ - /* If the loop has been collapsed, don't partition it. */ - if (!loop->ifns) - loop->mask = loop->flags = 0; - return loop->parent; -} - -/* Free all OpenACC loop structures within LOOP (inclusive). */ - -static void -free_oacc_loop (oacc_loop *loop) -{ - if (loop->sibling) - free_oacc_loop (loop->sibling); - if (loop->child) - free_oacc_loop (loop->child); - - free (loop); -} - -/* Dump out the OpenACC loop head or tail beginning at FROM. */ - -static void -dump_oacc_loop_part (FILE *file, gcall *from, int depth, - const char *title, int level) -{ - enum ifn_unique_kind kind - = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0)); - - fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level); - for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;) - { - gimple *stmt = gsi_stmt (gsi); - - if (gimple_call_internal_p (stmt, IFN_UNIQUE)) - { - enum ifn_unique_kind k - = ((enum ifn_unique_kind) TREE_INT_CST_LOW - (gimple_call_arg (stmt, 0))); - - if (k == kind && stmt != from) - break; - } - print_gimple_stmt (file, stmt, depth * 2 + 2, 0); - - gsi_next (&gsi); - while (gsi_end_p (gsi)) - gsi = gsi_start_bb (single_succ (gsi_bb (gsi))); - } -} - -/* Dump OpenACC loops LOOP, its siblings and its children. */ - -static void -dump_oacc_loop (FILE *file, oacc_loop *loop, int depth) -{ - int ix; - - fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "", - loop->flags, loop->mask, - LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc)); - - if (loop->marker) - print_gimple_stmt (file, loop->marker, depth * 2, 0); - - if (loop->routine) - fprintf (file, "%*sRoutine %s:%u:%s\n", - depth * 2, "", DECL_SOURCE_FILE (loop->routine), - DECL_SOURCE_LINE (loop->routine), - IDENTIFIER_POINTER (DECL_NAME (loop->routine))); - - for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) - if (loop->heads[ix]) - dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix); - for (ix = GOMP_DIM_MAX; ix--;) - if (loop->tails[ix]) - dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix); - - if (loop->child) - dump_oacc_loop (file, loop->child, depth + 1); - if (loop->sibling) - dump_oacc_loop (file, loop->sibling, depth); -} - -void debug_oacc_loop (oacc_loop *); - -/* Dump loops to stderr. */ - -DEBUG_FUNCTION void -debug_oacc_loop (oacc_loop *loop) -{ - dump_oacc_loop (stderr, loop, 0); -} - -/* DFS walk of basic blocks BB onwards, creating OpenACC loop - structures as we go. By construction these loops are properly - nested. */ - -static void -oacc_loop_discover_walk (oacc_loop *loop, basic_block bb) -{ - int marker = 0; - int remaining = 0; - - if (bb->flags & BB_VISITED) - return; - - follow: - bb->flags |= BB_VISITED; - - /* Scan for loop markers. */ - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (!is_gimple_call (stmt)) - continue; - - gcall *call = as_a <gcall *> (stmt); - - /* If this is a routine, make a dummy loop for it. */ - if (tree decl = gimple_call_fndecl (call)) - if (tree attrs = get_oacc_fn_attrib (decl)) - { - gcc_assert (!marker); - new_oacc_loop_routine (loop, call, decl, attrs); - } - - if (!gimple_call_internal_p (call)) - continue; - - switch (gimple_call_internal_fn (call)) - { - default: - break; - - case IFN_GOACC_LOOP: - /* Count the goacc loop abstraction fns, to determine if the - loop was collapsed already. */ - loop->ifns++; - break; - - case IFN_UNIQUE: - enum ifn_unique_kind kind - = (enum ifn_unique_kind) (TREE_INT_CST_LOW - (gimple_call_arg (call, 0))); - if (kind == IFN_UNIQUE_OACC_HEAD_MARK - || kind == IFN_UNIQUE_OACC_TAIL_MARK) - { - if (gimple_call_num_args (call) == 2) - { - gcc_assert (marker && !remaining); - marker = 0; - if (kind == IFN_UNIQUE_OACC_TAIL_MARK) - loop = finish_oacc_loop (loop); - else - loop->head_end = call; - } - else - { - int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); - - if (!marker) - { - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) - loop = new_oacc_loop (loop, call); - remaining = count; - } - gcc_assert (count == remaining); - if (remaining) - { - remaining--; - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) - loop->heads[marker] = call; - else - loop->tails[remaining] = call; - } - marker++; - } - } - } - } - if (remaining || marker) - { - bb = single_succ (bb); - gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED)); - goto follow; - } - - /* Walk successor blocks. */ - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, bb->succs) - oacc_loop_discover_walk (loop, e->dest); -} - -/* LOOP is the first sibling. Reverse the order in place and return - the new first sibling. Recurse to child loops. */ - -static oacc_loop * -oacc_loop_sibling_nreverse (oacc_loop *loop) -{ - oacc_loop *last = NULL; - do - { - if (loop->child) - loop->child = oacc_loop_sibling_nreverse (loop->child); - - oacc_loop *next = loop->sibling; - loop->sibling = last; - last = loop; - loop = next; - } - while (loop); - - return last; -} - -/* Discover the OpenACC loops marked up by HEAD and TAIL markers for - the current function. */ - -static oacc_loop * -oacc_loop_discovery () -{ - /* Clear basic block flags, in particular BB_VISITED which we're going to use - in the following. */ - clear_bb_flags (); - - oacc_loop *top = new_oacc_loop_outer (current_function_decl); - oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun)); - - /* The siblings were constructed in reverse order, reverse them so - that diagnostics come out in an unsurprising order. */ - top = oacc_loop_sibling_nreverse (top); - - return top; -} - -/* Transform the abstract internal function markers starting at FROM - to be for partitioning level LEVEL. Stop when we meet another HEAD - or TAIL marker. */ - -static void -oacc_loop_xform_head_tail (gcall *from, int level) -{ - enum ifn_unique_kind kind - = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0)); - tree replacement = build_int_cst (unsigned_type_node, level); - - for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;) - { - gimple *stmt = gsi_stmt (gsi); - - if (gimple_call_internal_p (stmt, IFN_UNIQUE)) - { - enum ifn_unique_kind k - = ((enum ifn_unique_kind) - TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); - - if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN) - *gimple_call_arg_ptr (stmt, 2) = replacement; - else if (k == kind && stmt != from) - break; - } - else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)) - *gimple_call_arg_ptr (stmt, 3) = replacement; - - gsi_next (&gsi); - while (gsi_end_p (gsi)) - gsi = gsi_start_bb (single_succ (gsi_bb (gsi))); - } -} - -/* Transform the IFN_GOACC_LOOP internal functions by providing the - determined partitioning mask and chunking argument. END_MARKER - points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS - is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is - the replacement partitioning mask and CHUNK_ARG is the replacement - chunking arg. */ - -static void -oacc_loop_xform_loop (gcall *end_marker, unsigned ifns, - tree mask_arg, tree chunk_arg) -{ - gimple_stmt_iterator gsi = gsi_for_stmt (end_marker); - - gcc_checking_assert (ifns); - for (;;) - { - for (; !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (!is_gimple_call (stmt)) - continue; - - gcall *call = as_a <gcall *> (stmt); - - if (!gimple_call_internal_p (call)) - continue; - - if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP) - continue; - - *gimple_call_arg_ptr (call, 5) = mask_arg; - *gimple_call_arg_ptr (call, 4) = chunk_arg; - ifns--; - if (!ifns) - return; - } - - /* The LOOP_BOUND ifn could be in the single successor - block. */ - basic_block bb = single_succ (gsi_bb (gsi)); - gsi = gsi_start_bb (bb); - } -} - -/* Process the discovered OpenACC loops, setting the correct - partitioning level etc. */ - -static void -oacc_loop_process (oacc_loop *loop) -{ - if (loop->child) - oacc_loop_process (loop->child); - - if (loop->mask && !loop->routine) - { - int ix; - unsigned mask = loop->mask; - unsigned dim = GOMP_DIM_GANG; - tree mask_arg = build_int_cst (unsigned_type_node, mask); - tree chunk_arg = loop->chunk_size; - - oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg); - - for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++) - { - while (!(GOMP_DIM_MASK (dim) & mask)) - dim++; - - oacc_loop_xform_head_tail (loop->heads[ix], dim); - oacc_loop_xform_head_tail (loop->tails[ix], dim); - - mask ^= GOMP_DIM_MASK (dim); - } - } - - if (loop->sibling) - oacc_loop_process (loop->sibling); -} - -/* Walk the OpenACC loop heirarchy checking and assigning the - programmer-specified partitionings. OUTER_MASK is the partitioning - this loop is contained within. Return mask of partitioning - encountered. If any auto loops are discovered, set GOMP_DIM_MAX - bit. */ - -static unsigned -oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) -{ - unsigned this_mask = loop->mask; - unsigned mask_all = 0; - bool noisy = true; - -#ifdef ACCEL_COMPILER - /* When device_type is supported, we want the device compiler to be - noisy, if the loop parameters are device_type-specific. */ - noisy = false; -#endif - - if (!loop->routine) - { - bool auto_par = (loop->flags & OLF_AUTO) != 0; - bool seq_par = (loop->flags & OLF_SEQ) != 0; - - this_mask = ((loop->flags >> OLF_DIM_BASE) - & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)); - - if ((this_mask != 0) + auto_par + seq_par > 1) - { - if (noisy) - error_at (loop->loc, - seq_par - ? "%<seq%> overrides other OpenACC loop specifiers" - : "%<auto%> conflicts with other OpenACC loop specifiers"); - auto_par = false; - loop->flags &= ~OLF_AUTO; - if (seq_par) - { - loop->flags &= - ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE); - this_mask = 0; - } - } - if (auto_par && (loop->flags & OLF_INDEPENDENT)) - mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); - } - - if (this_mask & outer_mask) - { - const oacc_loop *outer; - for (outer = loop->parent; outer; outer = outer->parent) - if (outer->mask & this_mask) - break; - - if (noisy) - { - if (outer) - { - error_at (loop->loc, - "%s uses same OpenACC parallelism as containing loop", - loop->routine ? "routine call" : "inner loop"); - inform (outer->loc, "containing loop here"); - } - else - error_at (loop->loc, - "%s uses OpenACC parallelism disallowed by containing routine", - loop->routine ? "routine call" : "loop"); - - if (loop->routine) - inform (DECL_SOURCE_LOCATION (loop->routine), - "routine %qD declared here", loop->routine); - } - this_mask &= ~outer_mask; - } - else - { - unsigned outermost = least_bit_hwi (this_mask); - - if (outermost && outermost <= outer_mask) - { - if (noisy) - { - error_at (loop->loc, - "incorrectly nested OpenACC loop parallelism"); - - const oacc_loop *outer; - for (outer = loop->parent; - outer->flags && outer->flags < outermost; - outer = outer->parent) - continue; - inform (outer->loc, "containing loop here"); - } - - this_mask &= ~outermost; - } - } - - loop->mask = this_mask; - mask_all |= this_mask; - - if (loop->child) - { - loop->inner = oacc_loop_fixed_partitions (loop->child, - outer_mask | this_mask); - mask_all |= loop->inner; - } - - if (loop->sibling) - mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask); - - return mask_all; -} - -/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops. - OUTER_MASK is the partitioning this loop is contained within. - Return the cumulative partitioning used by this loop, siblings and - children. */ - -static unsigned -oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) -{ - bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT); - bool noisy = true; - -#ifdef ACCEL_COMPILER - /* When device_type is supported, we want the device compiler to be - noisy, if the loop parameters are device_type-specific. */ - noisy = false; -#endif - - if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1)) - { - /* Allocate the outermost loop at the outermost available - level. */ - unsigned this_mask = outer_mask + 1; - - if (!(this_mask & loop->inner)) - loop->mask = this_mask; - } - - if (loop->child) - { - unsigned child_mask = outer_mask | loop->mask; - - if (loop->mask || assign) - child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX); - - loop->inner = oacc_loop_auto_partitions (loop->child, child_mask); - } - - if (assign && !loop->mask) - { - /* Allocate the loop at the innermost available level. */ - unsigned this_mask = 0; - - /* Determine the outermost partitioning used within this loop. */ - this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX); - this_mask = least_bit_hwi (this_mask); - - /* Pick the partitioning just inside that one. */ - this_mask >>= 1; - - /* And avoid picking one use by an outer loop. */ - this_mask &= ~outer_mask; - - if (!this_mask && noisy) - warning_at (loop->loc, 0, - "insufficient partitioning available to parallelize loop"); - - loop->mask = this_mask; - } - - if (assign && dump_file) - fprintf (dump_file, "Auto loop %s:%d assigned %d\n", - LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc), - loop->mask); - - unsigned inner_mask = 0; - - if (loop->sibling) - inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask); - - inner_mask |= loop->inner | loop->mask; - - return inner_mask; -} - -/* Walk the OpenACC loop heirarchy to check and assign partitioning - axes. Return mask of partitioning. */ - -static unsigned -oacc_loop_partition (oacc_loop *loop, unsigned outer_mask) -{ - unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask); - - if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX)) - { - mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX); - mask_all |= oacc_loop_auto_partitions (loop, outer_mask); - } - return mask_all; -} - -/* Default fork/join early expander. Delete the function calls if - there is no RTL expander. */ - -bool -default_goacc_fork_join (gcall *ARG_UNUSED (call), - const int *ARG_UNUSED (dims), bool is_fork) -{ - if (is_fork) - return targetm.have_oacc_fork (); - else - return targetm.have_oacc_join (); -} - -/* Default goacc.reduction early expander. - - LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET) - If RES_PTR is not integer-zerop: - SETUP - emit 'LHS = *RES_PTR', LHS = NULL - TEARDOWN - emit '*RES_PTR = VAR' - If LHS is not NULL - emit 'LHS = VAR' */ - -void -default_goacc_reduction (gcall *call) -{ - unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0)); - gimple_stmt_iterator gsi = gsi_for_stmt (call); - tree lhs = gimple_call_lhs (call); - tree var = gimple_call_arg (call, 2); - gimple_seq seq = NULL; - - if (code == IFN_GOACC_REDUCTION_SETUP - || code == IFN_GOACC_REDUCTION_TEARDOWN) - { - /* Setup and Teardown need to copy from/to the receiver object, - if there is one. */ - tree ref_to_res = gimple_call_arg (call, 1); - - if (!integer_zerop (ref_to_res)) - { - tree dst = build_simple_mem_ref (ref_to_res); - tree src = var; - - if (code == IFN_GOACC_REDUCTION_SETUP) - { - src = dst; - dst = lhs; - lhs = NULL; - } - gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src)); - } - } - - /* Copy VAR to LHS, if there is an LHS. */ - if (lhs) - gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var)); - - gsi_replace_with_seq (&gsi, seq, true); -} - -/* Main entry point for oacc transformations which run on the device - compiler after LTO, so we know what the target device is at this - point (including the host fallback). */ - -static unsigned int -execute_oacc_device_lower () -{ - tree attrs = get_oacc_fn_attrib (current_function_decl); - - if (!attrs) - /* Not an offloaded function. */ - return 0; - - /* Parse the default dim argument exactly once. */ - if ((const void *)flag_openacc_dims != &flag_openacc_dims) - { - oacc_parse_default_dims (flag_openacc_dims); - flag_openacc_dims = (char *)&flag_openacc_dims; - } - - /* Discover, partition and process the loops. */ - oacc_loop *loops = oacc_loop_discovery (); - int fn_level = oacc_fn_attrib_level (attrs); - - if (dump_file) - fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs) - ? "Function is kernels offload\n" - : fn_level < 0 ? "Function is parallel offload\n" - : "Function is routine level %d\n", fn_level); - - unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0; - unsigned used_mask = oacc_loop_partition (loops, outer_mask); - int dims[GOMP_DIM_MAX]; - - oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask); - - if (dump_file) - { - const char *comma = "Compute dimensions ["; - for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ") - fprintf (dump_file, "%s%d", comma, dims[ix]); - fprintf (dump_file, "]\n"); - } - - oacc_loop_process (loops); - if (dump_file) - { - fprintf (dump_file, "OpenACC loops\n"); - dump_oacc_loop (dump_file, loops, 0); - fprintf (dump_file, "\n"); - } - - /* Offloaded targets may introduce new basic blocks, which require - dominance information to update SSA. */ - calculate_dominance_info (CDI_DOMINATORS); - - /* Now lower internal loop functions to target-specific code - sequences. */ - basic_block bb; - FOR_ALL_BB_FN (bb, cfun) - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) - { - gimple *stmt = gsi_stmt (gsi); - if (!is_gimple_call (stmt)) - { - gsi_next (&gsi); - continue; - } - - gcall *call = as_a <gcall *> (stmt); - if (!gimple_call_internal_p (call)) - { - gsi_next (&gsi); - continue; - } - - /* Rewind to allow rescan. */ - gsi_prev (&gsi); - bool rescan = false, remove = false; - enum internal_fn ifn_code = gimple_call_internal_fn (call); - - switch (ifn_code) - { - default: break; - - case IFN_GOACC_LOOP: - oacc_xform_loop (call); - rescan = true; - break; - - case IFN_GOACC_REDUCTION: - /* Mark the function for SSA renaming. */ - mark_virtual_operands_for_renaming (cfun); - - /* If the level is -1, this ended up being an unused - axis. Handle as a default. */ - if (integer_minus_onep (gimple_call_arg (call, 3))) - default_goacc_reduction (call); - else - targetm.goacc.reduction (call); - rescan = true; - break; - - case IFN_UNIQUE: - { - enum ifn_unique_kind kind - = ((enum ifn_unique_kind) - TREE_INT_CST_LOW (gimple_call_arg (call, 0))); - - switch (kind) - { - default: - gcc_unreachable (); - - case IFN_UNIQUE_OACC_FORK: - case IFN_UNIQUE_OACC_JOIN: - if (integer_minus_onep (gimple_call_arg (call, 2))) - remove = true; - else if (!targetm.goacc.fork_join - (call, dims, kind == IFN_UNIQUE_OACC_FORK)) - remove = true; - break; - - case IFN_UNIQUE_OACC_HEAD_MARK: - case IFN_UNIQUE_OACC_TAIL_MARK: - remove = true; - break; - } - break; - } - } - - if (gsi_end_p (gsi)) - /* We rewound past the beginning of the BB. */ - gsi = gsi_start_bb (bb); - else - /* Undo the rewind. */ - gsi_next (&gsi); - - if (remove) - { - if (gimple_vdef (call)) - replace_uses_by (gimple_vdef (call), gimple_vuse (call)); - if (gimple_call_lhs (call)) - { - /* Propagate the data dependency var. */ - gimple *ass = gimple_build_assign (gimple_call_lhs (call), - gimple_call_arg (call, 1)); - gsi_replace (&gsi, ass, false); - } - else - gsi_remove (&gsi, true); - } - else if (!rescan) - /* If not rescanning, advance over the call. */ - gsi_next (&gsi); - } - - free_oacc_loop (loops); - - return 0; -} - -/* Default launch dimension validator. Force everything to 1. A - backend that wants to provide larger dimensions must override this - hook. */ - -bool -default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims, - int ARG_UNUSED (fn_level)) -{ - bool changed = false; - - for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) - { - if (dims[ix] != 1) - { - dims[ix] = 1; - changed = true; - } - } - - return changed; -} - -/* Default dimension bound is unknown on accelerator and 1 on host. */ - -int -default_goacc_dim_limit (int ARG_UNUSED (axis)) -{ -#ifdef ACCEL_COMPILER - return 0; -#else - return 1; -#endif -} - -namespace { - -const pass_data pass_data_oacc_device_lower = -{ - GIMPLE_PASS, /* type */ - "oaccdevlow", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg, /* properties_required */ - 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ -}; - -class pass_oacc_device_lower : public gimple_opt_pass -{ -public: - pass_oacc_device_lower (gcc::context *ctxt) - : gimple_opt_pass (pass_data_oacc_device_lower, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *) { return flag_openacc; }; - - virtual unsigned int execute (function *) - { - return execute_oacc_device_lower (); - } - -}; // class pass_oacc_device_lower - -} // anon namespace - -gimple_opt_pass * -make_pass_oacc_device_lower (gcc::context *ctxt) -{ - return new pass_oacc_device_lower (ctxt); -} - - -/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets, - VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and - LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT - internal functions on non-SIMT targets, and likewise some SIMD internal - functions on SIMT targets. */ - -static unsigned int -execute_omp_device_lower () -{ - int vf = targetm.simt.vf ? targetm.simt.vf () : 1; - basic_block bb; - gimple_stmt_iterator gsi; - FOR_EACH_BB_FN (bb, cfun) - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) - continue; - tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE; - tree type = lhs ? TREE_TYPE (lhs) : integer_type_node; - switch (gimple_call_internal_fn (stmt)) - { - case IFN_GOMP_USE_SIMT: - rhs = vf == 1 ? integer_zero_node : integer_one_node; - break; - case IFN_GOMP_SIMT_LANE: - case IFN_GOMP_SIMT_LAST_LANE: - rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE; - break; - case IFN_GOMP_SIMT_VF: - rhs = build_int_cst (type, vf); - break; - case IFN_GOMP_SIMT_ORDERED_PRED: - rhs = vf == 1 ? integer_zero_node : NULL_TREE; - if (rhs || !lhs) - unlink_stmt_vdef (stmt); - break; - case IFN_GOMP_SIMT_VOTE_ANY: - case IFN_GOMP_SIMT_XCHG_BFLY: - case IFN_GOMP_SIMT_XCHG_IDX: - rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE; - break; - case IFN_GOMP_SIMD_LANE: - case IFN_GOMP_SIMD_LAST_LANE: - rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE; - break; - case IFN_GOMP_SIMD_VF: - rhs = vf != 1 ? build_one_cst (type) : NULL_TREE; - break; - default: - continue; - } - if (lhs && !rhs) - continue; - stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop (); - gsi_replace (&gsi, stmt, false); - } - if (vf != 1) - cfun->has_force_vectorize_loops = false; - return 0; -} - -namespace { - -const pass_data pass_data_omp_device_lower = -{ - GIMPLE_PASS, /* type */ - "ompdevlow", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg, /* properties_required */ - PROP_gimple_lomp_dev, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_update_ssa, /* todo_flags_finish */ -}; - -class pass_omp_device_lower : public gimple_opt_pass -{ -public: - pass_omp_device_lower (gcc::context *ctxt) - : gimple_opt_pass (pass_data_omp_device_lower, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *ARG_UNUSED (fun)) - { - /* FIXME: this should use PROP_gimple_lomp_dev. */ -#ifdef ACCEL_COMPILER - return true; -#else - return ENABLE_OFFLOADING && (flag_openmp || in_lto_p); -#endif - } - virtual unsigned int execute (function *) - { - return execute_omp_device_lower (); - } - -}; // class pass_expand_omp_ssa - -} // anon namespace - -gimple_opt_pass * -make_pass_omp_device_lower (gcc::context *ctxt) -{ - return new pass_omp_device_lower (ctxt); -} - -/* "omp declare target link" handling pass. */ - -namespace { - -const pass_data pass_data_omp_target_link = -{ - GIMPLE_PASS, /* type */ - "omptargetlink", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_ssa, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_update_ssa, /* todo_flags_finish */ -}; - -class pass_omp_target_link : public gimple_opt_pass -{ -public: - pass_omp_target_link (gcc::context *ctxt) - : gimple_opt_pass (pass_data_omp_target_link, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *fun) - { -#ifdef ACCEL_COMPILER - tree attrs = DECL_ATTRIBUTES (fun->decl); - return lookup_attribute ("omp declare target", attrs) - || lookup_attribute ("omp target entrypoint", attrs); -#else - (void) fun; - return false; -#endif - } - - virtual unsigned execute (function *); -}; - -/* Callback for walk_gimple_stmt used to scan for link var operands. */ - -static tree -find_link_var_op (tree *tp, int *walk_subtrees, void *) -{ - tree t = *tp; - - if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t) - && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t))) - { - *walk_subtrees = 0; - return t; - } - - return NULL_TREE; -} - -unsigned -pass_omp_target_link::execute (function *fun) -{ - basic_block bb; - FOR_EACH_BB_FN (bb, fun) - { - gimple_stmt_iterator gsi; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL)) - gimple_regimplify_operands (gsi_stmt (gsi), &gsi); - } - - return 0; -} - -} // anon namespace - -gimple_opt_pass * -make_pass_omp_target_link (gcc::context *ctxt) -{ - return new pass_omp_target_link (ctxt); -} #include "gt-omp-low.h" diff --git a/gcc/omp-low.h b/gcc/omp-low.h index b1f7885da0e..687f357e454 100644 --- a/gcc/omp-low.h +++ b/gcc/omp-low.h @@ -20,25 +20,12 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_OMP_LOW_H #define GCC_OMP_LOW_H -struct omp_region; - -extern tree find_omp_clause (tree, enum omp_clause_code); -extern void omp_expand_local (basic_block); -extern void free_omp_regions (void); extern tree omp_reduction_init_op (location_t, enum tree_code, tree); extern tree omp_reduction_init (tree, tree); -extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *); -extern void omp_finish_file (void); extern tree omp_member_access_dummy_var (tree); -extern void replace_oacc_fn_attrib (tree, tree); -extern tree build_oacc_routine_dims (tree); -extern tree get_oacc_fn_attrib (tree); -extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *); -extern bool oacc_fn_attrib_kernels_p (tree); -extern int get_oacc_ifn_dim_arg (const gimple *); -extern int get_oacc_fn_dim_size (tree, int); - -extern GTY(()) vec<tree, va_gc> *offload_funcs; -extern GTY(()) vec<tree, va_gc> *offload_vars; +extern tree omp_find_combined_for (gimple_stmt_iterator *gsi_p, + bool *handled_ops_p, + struct walk_stmt_info *wi); + #endif /* GCC_OMP_LOW_H */ diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c new file mode 100644 index 00000000000..fabdf2d21d9 --- /dev/null +++ b/gcc/omp-offload.c @@ -0,0 +1,1718 @@ +/* Bits of OpenMP and OpenACC handling that is specific to device offloading + and a lowering pass for OpenACC device directives. + + Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "cgraph.h" +#include "pretty-print.h" +#include "diagnostic-core.h" +#include "fold-const.h" +#include "internal-fn.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimplify-me.h" +#include "gimple-walk.h" +#include "tree-cfg.h" +#include "tree-into-ssa.h" +#include "common/common-target.h" +#include "omp-general.h" +#include "omp-offload.h" +#include "lto-section-names.h" +#include "gomp-constants.h" +#include "gimple-pretty-print.h" + +/* Describe the OpenACC looping structure of a function. The entire + function is held in a 'NULL' loop. */ + +struct oacc_loop +{ + oacc_loop *parent; /* Containing loop. */ + + oacc_loop *child; /* First inner loop. */ + + oacc_loop *sibling; /* Next loop within same parent. */ + + location_t loc; /* Location of the loop start. */ + + gcall *marker; /* Initial head marker. */ + + gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */ + gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */ + + tree routine; /* Pseudo-loop enclosing a routine. */ + + unsigned mask; /* Partitioning mask. */ + unsigned inner; /* Partitioning of inner loops. */ + unsigned flags; /* Partitioning flags. */ + unsigned ifns; /* Contained loop abstraction functions. */ + tree chunk_size; /* Chunk size. */ + gcall *head_end; /* Final marker of head sequence. */ +}; + +/* Holds offload tables with decls. */ +vec<tree, va_gc> *offload_funcs, *offload_vars; + +/* Return level at which oacc routine may spawn a partitioned loop, or + -1 if it is not a routine (i.e. is an offload fn). */ + +static int +oacc_fn_attrib_level (tree attr) +{ + tree pos = TREE_VALUE (attr); + + if (!TREE_PURPOSE (pos)) + return -1; + + int ix = 0; + for (ix = 0; ix != GOMP_DIM_MAX; + ix++, pos = TREE_CHAIN (pos)) + if (!integer_zerop (TREE_PURPOSE (pos))) + break; + + return ix; +} + +/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and + adds their addresses and sizes to constructor-vector V_CTOR. */ + +static void +add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls, + vec<constructor_elt, va_gc> *v_ctor) +{ + unsigned len = vec_safe_length (v_decls); + for (unsigned i = 0; i < len; i++) + { + tree it = (*v_decls)[i]; + bool is_var = VAR_P (it); + bool is_link_var + = is_var +#ifdef ACCEL_COMPILER + && DECL_HAS_VALUE_EXPR_P (it) +#endif + && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it)); + + tree size = NULL_TREE; + if (is_var) + size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it)); + + tree addr; + if (!is_link_var) + addr = build_fold_addr_expr (it); + else + { +#ifdef ACCEL_COMPILER + /* For "omp declare target link" vars add address of the pointer to + the target table, instead of address of the var. */ + tree value_expr = DECL_VALUE_EXPR (it); + tree link_ptr_decl = TREE_OPERAND (value_expr, 0); + varpool_node::finalize_decl (link_ptr_decl); + addr = build_fold_addr_expr (link_ptr_decl); +#else + addr = build_fold_addr_expr (it); +#endif + + /* Most significant bit of the size marks "omp declare target link" + vars in host and target tables. */ + unsigned HOST_WIDE_INT isize = tree_to_uhwi (size); + isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node) + * BITS_PER_UNIT - 1); + size = wide_int_to_tree (const_ptr_type_node, isize); + } + + CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr); + if (is_var) + CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size); + } +} + +/* Create new symbols containing (address, size) pairs for global variables, + marked with "omp declare target" attribute, as well as addresses for the + functions, which are outlined offloading regions. */ +void +omp_finish_file (void) +{ + unsigned num_funcs = vec_safe_length (offload_funcs); + unsigned num_vars = vec_safe_length (offload_vars); + + if (num_funcs == 0 && num_vars == 0) + return; + + if (targetm_common.have_named_sections) + { + vec<constructor_elt, va_gc> *v_f, *v_v; + vec_alloc (v_f, num_funcs); + vec_alloc (v_v, num_vars * 2); + + add_decls_addresses_to_decl_constructor (offload_funcs, v_f); + add_decls_addresses_to_decl_constructor (offload_vars, v_v); + + tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node, + num_vars * 2); + tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node, + num_funcs); + SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node)); + SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node)); + tree ctor_v = build_constructor (vars_decl_type, v_v); + tree ctor_f = build_constructor (funcs_decl_type, v_f); + TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1; + TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1; + tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (".offload_func_table"), + funcs_decl_type); + tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (".offload_var_table"), + vars_decl_type); + TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1; + /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node), + otherwise a joint table in a binary will contain padding between + tables from multiple object files. */ + DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1; + SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type)); + SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type)); + DECL_INITIAL (funcs_decl) = ctor_f; + DECL_INITIAL (vars_decl) = ctor_v; + set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME); + set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME); + + varpool_node::finalize_decl (vars_decl); + varpool_node::finalize_decl (funcs_decl); + } + else + { + for (unsigned i = 0; i < num_funcs; i++) + { + tree it = (*offload_funcs)[i]; + targetm.record_offload_symbol (it); + } + for (unsigned i = 0; i < num_vars; i++) + { + tree it = (*offload_vars)[i]; + targetm.record_offload_symbol (it); + } + } +} + +/* Find the number of threads (POS = false), or thread number (POS = + true) for an OpenACC region partitioned as MASK. Setup code + required for the calculation is added to SEQ. */ + +static tree +oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) +{ + tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1); + unsigned ix; + + /* Start at gang level, and examine relevant dimension indices. */ + for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) + if (GOMP_DIM_MASK (ix) & mask) + { + tree arg = build_int_cst (unsigned_type_node, ix); + + if (res) + { + /* We had an outer index, so scale that by the size of + this dimension. */ + tree n = create_tmp_var (integer_type_node); + gimple *call + = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg); + + gimple_call_set_lhs (call, n); + gimple_seq_add_stmt (seq, call); + res = fold_build2 (MULT_EXPR, integer_type_node, res, n); + } + if (pos) + { + /* Determine index in this dimension. */ + tree id = create_tmp_var (integer_type_node); + gimple *call = gimple_build_call_internal + (IFN_GOACC_DIM_POS, 1, arg); + + gimple_call_set_lhs (call, id); + gimple_seq_add_stmt (seq, call); + if (res) + res = fold_build2 (PLUS_EXPR, integer_type_node, res, id); + else + res = id; + } + } + + if (res == NULL_TREE) + res = integer_zero_node; + + return res; +} + +/* Transform IFN_GOACC_LOOP calls to actual code. See + expand_oacc_for for where these are generated. At the vector + level, we stride loops, such that each member of a warp will + operate on adjacent iterations. At the worker and gang level, + each gang/warp executes a set of contiguous iterations. Chunking + can override this such that each iteration engine executes a + contiguous chunk, and then moves on to stride to the next chunk. */ + +static void +oacc_xform_loop (gcall *call) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (call); + enum ifn_goacc_loop_kind code + = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0)); + tree dir = gimple_call_arg (call, 1); + tree range = gimple_call_arg (call, 2); + tree step = gimple_call_arg (call, 3); + tree chunk_size = NULL_TREE; + unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5)); + tree lhs = gimple_call_lhs (call); + tree type = TREE_TYPE (lhs); + tree diff_type = TREE_TYPE (range); + tree r = NULL_TREE; + gimple_seq seq = NULL; + bool chunking = false, striding = true; + unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning + unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any) + +#ifdef ACCEL_COMPILER + chunk_size = gimple_call_arg (call, 4); + if (integer_minus_onep (chunk_size) /* Force static allocation. */ + || integer_zerop (chunk_size)) /* Default (also static). */ + { + /* If we're at the gang level, we want each to execute a + contiguous run of iterations. Otherwise we want each element + to stride. */ + striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG)); + chunking = false; + } + else + { + /* Chunk of size 1 is striding. */ + striding = integer_onep (chunk_size); + chunking = !striding; + } +#endif + + /* striding=true, chunking=true + -> invalid. + striding=true, chunking=false + -> chunks=1 + striding=false,chunking=true + -> chunks=ceil (range/(chunksize*threads*step)) + striding=false,chunking=false + -> chunk_size=ceil(range/(threads*step)),chunks=1 */ + push_gimplify_context (true); + + switch (code) + { + default: gcc_unreachable (); + + case IFN_GOACC_LOOP_CHUNKS: + if (!chunking) + r = build_int_cst (type, 1); + else + { + /* chunk_max + = (range - dir) / (chunks * step * num_threads) + dir */ + tree per = oacc_thread_numbers (false, mask, &seq); + per = fold_convert (type, per); + chunk_size = fold_convert (type, chunk_size); + per = fold_build2 (MULT_EXPR, type, per, chunk_size); + per = fold_build2 (MULT_EXPR, type, per, step); + r = build2 (MINUS_EXPR, type, range, dir); + r = build2 (PLUS_EXPR, type, r, per); + r = build2 (TRUNC_DIV_EXPR, type, r, per); + } + break; + + case IFN_GOACC_LOOP_STEP: + { + /* If striding, step by the entire compute volume, otherwise + step by the inner volume. */ + unsigned volume = striding ? mask : inner_mask; + + r = oacc_thread_numbers (false, volume, &seq); + r = build2 (MULT_EXPR, type, fold_convert (type, r), step); + } + break; + + case IFN_GOACC_LOOP_OFFSET: + if (striding) + { + r = oacc_thread_numbers (true, mask, &seq); + r = fold_convert (diff_type, r); + } + else + { + tree inner_size = oacc_thread_numbers (false, inner_mask, &seq); + tree outer_size = oacc_thread_numbers (false, outer_mask, &seq); + tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size), + inner_size, outer_size); + + volume = fold_convert (diff_type, volume); + if (chunking) + chunk_size = fold_convert (diff_type, chunk_size); + else + { + tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); + + chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); + chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); + chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); + } + + tree span = build2 (MULT_EXPR, diff_type, chunk_size, + fold_convert (diff_type, inner_size)); + r = oacc_thread_numbers (true, outer_mask, &seq); + r = fold_convert (diff_type, r); + r = build2 (MULT_EXPR, diff_type, r, span); + + tree inner = oacc_thread_numbers (true, inner_mask, &seq); + inner = fold_convert (diff_type, inner); + r = fold_build2 (PLUS_EXPR, diff_type, r, inner); + + if (chunking) + { + tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6)); + tree per + = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size); + per = build2 (MULT_EXPR, diff_type, per, chunk); + + r = build2 (PLUS_EXPR, diff_type, r, per); + } + } + r = fold_build2 (MULT_EXPR, diff_type, r, step); + if (type != diff_type) + r = fold_convert (type, r); + break; + + case IFN_GOACC_LOOP_BOUND: + if (striding) + r = range; + else + { + tree inner_size = oacc_thread_numbers (false, inner_mask, &seq); + tree outer_size = oacc_thread_numbers (false, outer_mask, &seq); + tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size), + inner_size, outer_size); + + volume = fold_convert (diff_type, volume); + if (chunking) + chunk_size = fold_convert (diff_type, chunk_size); + else + { + tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); + + chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); + chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); + chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); + } + + tree span = build2 (MULT_EXPR, diff_type, chunk_size, + fold_convert (diff_type, inner_size)); + + r = fold_build2 (MULT_EXPR, diff_type, span, step); + + tree offset = gimple_call_arg (call, 6); + r = build2 (PLUS_EXPR, diff_type, r, + fold_convert (diff_type, offset)); + r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, + diff_type, r, range); + } + if (diff_type != type) + r = fold_convert (type, r); + break; + } + + gimplify_assign (lhs, r, &seq); + + pop_gimplify_context (NULL); + + gsi_replace_with_seq (&gsi, seq, true); +} + +/* Default partitioned and minimum partitioned dimensions. */ + +static int oacc_default_dims[GOMP_DIM_MAX]; +static int oacc_min_dims[GOMP_DIM_MAX]; + +/* Parse the default dimension parameter. This is a set of + :-separated optional compute dimensions. Each specified dimension + is a positive integer. When device type support is added, it is + planned to be a comma separated list of such compute dimensions, + with all but the first prefixed by the colon-terminated device + type. */ + +static void +oacc_parse_default_dims (const char *dims) +{ + int ix; + + for (ix = GOMP_DIM_MAX; ix--;) + { + oacc_default_dims[ix] = -1; + oacc_min_dims[ix] = 1; + } + +#ifndef ACCEL_COMPILER + /* Cannot be overridden on the host. */ + dims = NULL; +#endif + if (dims) + { + const char *pos = dims; + + for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++) + { + if (ix) + { + if (*pos != ':') + goto malformed; + pos++; + } + + if (*pos != ':') + { + long val; + const char *eptr; + + errno = 0; + val = strtol (pos, CONST_CAST (char **, &eptr), 10); + if (errno || val <= 0 || (int) val != val) + goto malformed; + pos = eptr; + oacc_default_dims[ix] = (int) val; + } + } + if (*pos) + { + malformed: + error_at (UNKNOWN_LOCATION, + "-fopenacc-dim operand is malformed at '%s'", pos); + } + } + + /* Allow the backend to validate the dimensions. */ + targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1); + targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2); +} + +/* Validate and update the dimensions for offloaded FN. ATTRS is the + raw attribute. DIMS is an array of dimensions, which is filled in. + LEVEL is the partitioning level of a routine, or -1 for an offload + region itself. USED is the mask of partitioned execution in the + function. */ + +static void +oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used) +{ + tree purpose[GOMP_DIM_MAX]; + unsigned ix; + tree pos = TREE_VALUE (attrs); + bool is_kernel = oacc_fn_attrib_kernels_p (attrs); + + /* Make sure the attribute creator attached the dimension + information. */ + gcc_assert (pos); + + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + { + purpose[ix] = TREE_PURPOSE (pos); + tree val = TREE_VALUE (pos); + dims[ix] = val ? TREE_INT_CST_LOW (val) : -1; + pos = TREE_CHAIN (pos); + } + + bool changed = targetm.goacc.validate_dims (fn, dims, level); + + /* Default anything left to 1 or a partitioned default. */ + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + if (dims[ix] < 0) + { + /* The OpenACC spec says 'If the [num_gangs] clause is not + specified, an implementation-defined default will be used; + the default may depend on the code within the construct.' + (2.5.6). Thus an implementation is free to choose + non-unity default for a parallel region that doesn't have + any gang-partitioned loops. However, it appears that there + is a sufficient body of user code that expects non-gang + partitioned regions to not execute in gang-redundant mode. + So we (a) don't warn about the non-portability and (b) pick + the minimum permissible dimension size when there is no + partitioned execution. Otherwise we pick the global + default for the dimension, which the user can control. The + same wording and logic applies to num_workers and + vector_length, however the worker- or vector- single + execution doesn't have the same impact as gang-redundant + execution. (If the minimum gang-level partioning is not 1, + the target is probably too confusing.) */ + dims[ix] = (used & GOMP_DIM_MASK (ix) + ? oacc_default_dims[ix] : oacc_min_dims[ix]); + changed = true; + } + + if (changed) + { + /* Replace the attribute with new values. */ + pos = NULL_TREE; + for (ix = GOMP_DIM_MAX; ix--;) + { + pos = tree_cons (purpose[ix], + build_int_cst (integer_type_node, dims[ix]), + pos); + if (is_kernel) + TREE_PUBLIC (pos) = 1; + } + oacc_replace_fn_attrib (fn, pos); + } +} + +/* Create an empty OpenACC loop structure at LOC. */ + +static oacc_loop * +new_oacc_loop_raw (oacc_loop *parent, location_t loc) +{ + oacc_loop *loop = XCNEW (oacc_loop); + + loop->parent = parent; + loop->child = loop->sibling = NULL; + + if (parent) + { + loop->sibling = parent->child; + parent->child = loop; + } + + loop->loc = loc; + loop->marker = NULL; + memset (loop->heads, 0, sizeof (loop->heads)); + memset (loop->tails, 0, sizeof (loop->tails)); + loop->routine = NULL_TREE; + + loop->mask = loop->flags = loop->inner = 0; + loop->ifns = 0; + loop->chunk_size = 0; + loop->head_end = NULL; + + return loop; +} + +/* Create an outermost, dummy OpenACC loop for offloaded function + DECL. */ + +static oacc_loop * +new_oacc_loop_outer (tree decl) +{ + return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl)); +} + +/* Start a new OpenACC loop structure beginning at head marker HEAD. + Link into PARENT loop. Return the new loop. */ + +static oacc_loop * +new_oacc_loop (oacc_loop *parent, gcall *marker) +{ + oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker)); + + loop->marker = marker; + + /* TODO: This is where device_type flattening would occur for the loop + flags. */ + + loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3)); + + tree chunk_size = integer_zero_node; + if (loop->flags & OLF_GANG_STATIC) + chunk_size = gimple_call_arg (marker, 4); + loop->chunk_size = chunk_size; + + return loop; +} + +/* Create a dummy loop encompassing a call to a openACC routine. + Extract the routine's partitioning requirements. */ + +static void +new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs) +{ + oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call)); + int level = oacc_fn_attrib_level (attrs); + + gcc_assert (level >= 0); + + loop->marker = call; + loop->routine = decl; + loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) + ^ (GOMP_DIM_MASK (level) - 1)); +} + +/* Finish off the current OpenACC loop ending at tail marker TAIL. + Return the parent loop. */ + +static oacc_loop * +finish_oacc_loop (oacc_loop *loop) +{ + /* If the loop has been collapsed, don't partition it. */ + if (!loop->ifns) + loop->mask = loop->flags = 0; + return loop->parent; +} + +/* Free all OpenACC loop structures within LOOP (inclusive). */ + +static void +free_oacc_loop (oacc_loop *loop) +{ + if (loop->sibling) + free_oacc_loop (loop->sibling); + if (loop->child) + free_oacc_loop (loop->child); + + free (loop); +} + +/* Dump out the OpenACC loop head or tail beginning at FROM. */ + +static void +dump_oacc_loop_part (FILE *file, gcall *from, int depth, + const char *title, int level) +{ + enum ifn_unique_kind kind + = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0)); + + fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level); + for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_call_internal_p (stmt, IFN_UNIQUE)) + { + enum ifn_unique_kind k + = ((enum ifn_unique_kind) TREE_INT_CST_LOW + (gimple_call_arg (stmt, 0))); + + if (k == kind && stmt != from) + break; + } + print_gimple_stmt (file, stmt, depth * 2 + 2, 0); + + gsi_next (&gsi); + while (gsi_end_p (gsi)) + gsi = gsi_start_bb (single_succ (gsi_bb (gsi))); + } +} + +/* Dump OpenACC loops LOOP, its siblings and its children. */ + +static void +dump_oacc_loop (FILE *file, oacc_loop *loop, int depth) +{ + int ix; + + fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "", + loop->flags, loop->mask, + LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc)); + + if (loop->marker) + print_gimple_stmt (file, loop->marker, depth * 2, 0); + + if (loop->routine) + fprintf (file, "%*sRoutine %s:%u:%s\n", + depth * 2, "", DECL_SOURCE_FILE (loop->routine), + DECL_SOURCE_LINE (loop->routine), + IDENTIFIER_POINTER (DECL_NAME (loop->routine))); + + for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) + if (loop->heads[ix]) + dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix); + for (ix = GOMP_DIM_MAX; ix--;) + if (loop->tails[ix]) + dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix); + + if (loop->child) + dump_oacc_loop (file, loop->child, depth + 1); + if (loop->sibling) + dump_oacc_loop (file, loop->sibling, depth); +} + +void debug_oacc_loop (oacc_loop *); + +/* Dump loops to stderr. */ + +DEBUG_FUNCTION void +debug_oacc_loop (oacc_loop *loop) +{ + dump_oacc_loop (stderr, loop, 0); +} + +/* DFS walk of basic blocks BB onwards, creating OpenACC loop + structures as we go. By construction these loops are properly + nested. */ + +static void +oacc_loop_discover_walk (oacc_loop *loop, basic_block bb) +{ + int marker = 0; + int remaining = 0; + + if (bb->flags & BB_VISITED) + return; + + follow: + bb->flags |= BB_VISITED; + + /* Scan for loop markers. */ + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (!is_gimple_call (stmt)) + continue; + + gcall *call = as_a <gcall *> (stmt); + + /* If this is a routine, make a dummy loop for it. */ + if (tree decl = gimple_call_fndecl (call)) + if (tree attrs = oacc_get_fn_attrib (decl)) + { + gcc_assert (!marker); + new_oacc_loop_routine (loop, call, decl, attrs); + } + + if (!gimple_call_internal_p (call)) + continue; + + switch (gimple_call_internal_fn (call)) + { + default: + break; + + case IFN_GOACC_LOOP: + /* Count the goacc loop abstraction fns, to determine if the + loop was collapsed already. */ + loop->ifns++; + break; + + case IFN_UNIQUE: + enum ifn_unique_kind kind + = (enum ifn_unique_kind) (TREE_INT_CST_LOW + (gimple_call_arg (call, 0))); + if (kind == IFN_UNIQUE_OACC_HEAD_MARK + || kind == IFN_UNIQUE_OACC_TAIL_MARK) + { + if (gimple_call_num_args (call) == 2) + { + gcc_assert (marker && !remaining); + marker = 0; + if (kind == IFN_UNIQUE_OACC_TAIL_MARK) + loop = finish_oacc_loop (loop); + else + loop->head_end = call; + } + else + { + int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); + + if (!marker) + { + if (kind == IFN_UNIQUE_OACC_HEAD_MARK) + loop = new_oacc_loop (loop, call); + remaining = count; + } + gcc_assert (count == remaining); + if (remaining) + { + remaining--; + if (kind == IFN_UNIQUE_OACC_HEAD_MARK) + loop->heads[marker] = call; + else + loop->tails[remaining] = call; + } + marker++; + } + } + } + } + if (remaining || marker) + { + bb = single_succ (bb); + gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED)); + goto follow; + } + + /* Walk successor blocks. */ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + oacc_loop_discover_walk (loop, e->dest); +} + +/* LOOP is the first sibling. Reverse the order in place and return + the new first sibling. Recurse to child loops. */ + +static oacc_loop * +oacc_loop_sibling_nreverse (oacc_loop *loop) +{ + oacc_loop *last = NULL; + do + { + if (loop->child) + loop->child = oacc_loop_sibling_nreverse (loop->child); + + oacc_loop *next = loop->sibling; + loop->sibling = last; + last = loop; + loop = next; + } + while (loop); + + return last; +} + +/* Discover the OpenACC loops marked up by HEAD and TAIL markers for + the current function. */ + +static oacc_loop * +oacc_loop_discovery () +{ + /* Clear basic block flags, in particular BB_VISITED which we're going to use + in the following. */ + clear_bb_flags (); + + oacc_loop *top = new_oacc_loop_outer (current_function_decl); + oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun)); + + /* The siblings were constructed in reverse order, reverse them so + that diagnostics come out in an unsurprising order. */ + top = oacc_loop_sibling_nreverse (top); + + return top; +} + +/* Transform the abstract internal function markers starting at FROM + to be for partitioning level LEVEL. Stop when we meet another HEAD + or TAIL marker. */ + +static void +oacc_loop_xform_head_tail (gcall *from, int level) +{ + enum ifn_unique_kind kind + = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0)); + tree replacement = build_int_cst (unsigned_type_node, level); + + for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_call_internal_p (stmt, IFN_UNIQUE)) + { + enum ifn_unique_kind k + = ((enum ifn_unique_kind) + TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); + + if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN) + *gimple_call_arg_ptr (stmt, 2) = replacement; + else if (k == kind && stmt != from) + break; + } + else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)) + *gimple_call_arg_ptr (stmt, 3) = replacement; + + gsi_next (&gsi); + while (gsi_end_p (gsi)) + gsi = gsi_start_bb (single_succ (gsi_bb (gsi))); + } +} + +/* Transform the IFN_GOACC_LOOP internal functions by providing the + determined partitioning mask and chunking argument. END_MARKER + points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS + is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is + the replacement partitioning mask and CHUNK_ARG is the replacement + chunking arg. */ + +static void +oacc_loop_xform_loop (gcall *end_marker, unsigned ifns, + tree mask_arg, tree chunk_arg) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (end_marker); + + gcc_checking_assert (ifns); + for (;;) + { + for (; !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (!is_gimple_call (stmt)) + continue; + + gcall *call = as_a <gcall *> (stmt); + + if (!gimple_call_internal_p (call)) + continue; + + if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP) + continue; + + *gimple_call_arg_ptr (call, 5) = mask_arg; + *gimple_call_arg_ptr (call, 4) = chunk_arg; + ifns--; + if (!ifns) + return; + } + + /* The LOOP_BOUND ifn could be in the single successor + block. */ + basic_block bb = single_succ (gsi_bb (gsi)); + gsi = gsi_start_bb (bb); + } +} + +/* Process the discovered OpenACC loops, setting the correct + partitioning level etc. */ + +static void +oacc_loop_process (oacc_loop *loop) +{ + if (loop->child) + oacc_loop_process (loop->child); + + if (loop->mask && !loop->routine) + { + int ix; + unsigned mask = loop->mask; + unsigned dim = GOMP_DIM_GANG; + tree mask_arg = build_int_cst (unsigned_type_node, mask); + tree chunk_arg = loop->chunk_size; + + oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg); + + for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++) + { + while (!(GOMP_DIM_MASK (dim) & mask)) + dim++; + + oacc_loop_xform_head_tail (loop->heads[ix], dim); + oacc_loop_xform_head_tail (loop->tails[ix], dim); + + mask ^= GOMP_DIM_MASK (dim); + } + } + + if (loop->sibling) + oacc_loop_process (loop->sibling); +} + +/* Walk the OpenACC loop heirarchy checking and assigning the + programmer-specified partitionings. OUTER_MASK is the partitioning + this loop is contained within. Return mask of partitioning + encountered. If any auto loops are discovered, set GOMP_DIM_MAX + bit. */ + +static unsigned +oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) +{ + unsigned this_mask = loop->mask; + unsigned mask_all = 0; + bool noisy = true; + +#ifdef ACCEL_COMPILER + /* When device_type is supported, we want the device compiler to be + noisy, if the loop parameters are device_type-specific. */ + noisy = false; +#endif + + if (!loop->routine) + { + bool auto_par = (loop->flags & OLF_AUTO) != 0; + bool seq_par = (loop->flags & OLF_SEQ) != 0; + + this_mask = ((loop->flags >> OLF_DIM_BASE) + & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)); + + if ((this_mask != 0) + auto_par + seq_par > 1) + { + if (noisy) + error_at (loop->loc, + seq_par + ? "%<seq%> overrides other OpenACC loop specifiers" + : "%<auto%> conflicts with other OpenACC loop " + "specifiers"); + auto_par = false; + loop->flags &= ~OLF_AUTO; + if (seq_par) + { + loop->flags &= + ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE); + this_mask = 0; + } + } + if (auto_par && (loop->flags & OLF_INDEPENDENT)) + mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); + } + + if (this_mask & outer_mask) + { + const oacc_loop *outer; + for (outer = loop->parent; outer; outer = outer->parent) + if (outer->mask & this_mask) + break; + + if (noisy) + { + if (outer) + { + error_at (loop->loc, + "%s uses same OpenACC parallelism as containing loop", + loop->routine ? "routine call" : "inner loop"); + inform (outer->loc, "containing loop here"); + } + else + error_at (loop->loc, + "%s uses OpenACC parallelism disallowed by containing " + "routine", loop->routine ? "routine call" : "loop"); + + if (loop->routine) + inform (DECL_SOURCE_LOCATION (loop->routine), + "routine %qD declared here", loop->routine); + } + this_mask &= ~outer_mask; + } + else + { + unsigned outermost = least_bit_hwi (this_mask); + + if (outermost && outermost <= outer_mask) + { + if (noisy) + { + error_at (loop->loc, + "incorrectly nested OpenACC loop parallelism"); + + const oacc_loop *outer; + for (outer = loop->parent; + outer->flags && outer->flags < outermost; + outer = outer->parent) + continue; + inform (outer->loc, "containing loop here"); + } + + this_mask &= ~outermost; + } + } + + loop->mask = this_mask; + mask_all |= this_mask; + + if (loop->child) + { + loop->inner = oacc_loop_fixed_partitions (loop->child, + outer_mask | this_mask); + mask_all |= loop->inner; + } + + if (loop->sibling) + mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask); + + return mask_all; +} + +/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops. + OUTER_MASK is the partitioning this loop is contained within. + Return the cumulative partitioning used by this loop, siblings and + children. */ + +static unsigned +oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) +{ + bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT); + bool noisy = true; + +#ifdef ACCEL_COMPILER + /* When device_type is supported, we want the device compiler to be + noisy, if the loop parameters are device_type-specific. */ + noisy = false; +#endif + + if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1)) + { + /* Allocate the outermost loop at the outermost available + level. */ + unsigned this_mask = outer_mask + 1; + + if (!(this_mask & loop->inner)) + loop->mask = this_mask; + } + + if (loop->child) + { + unsigned child_mask = outer_mask | loop->mask; + + if (loop->mask || assign) + child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX); + + loop->inner = oacc_loop_auto_partitions (loop->child, child_mask); + } + + if (assign && !loop->mask) + { + /* Allocate the loop at the innermost available level. */ + unsigned this_mask = 0; + + /* Determine the outermost partitioning used within this loop. */ + this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX); + this_mask = least_bit_hwi (this_mask); + + /* Pick the partitioning just inside that one. */ + this_mask >>= 1; + + /* And avoid picking one use by an outer loop. */ + this_mask &= ~outer_mask; + + if (!this_mask && noisy) + warning_at (loop->loc, 0, + "insufficient partitioning available to parallelize loop"); + + loop->mask = this_mask; + } + + if (assign && dump_file) + fprintf (dump_file, "Auto loop %s:%d assigned %d\n", + LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc), + loop->mask); + + unsigned inner_mask = 0; + + if (loop->sibling) + inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask); + + inner_mask |= loop->inner | loop->mask; + + return inner_mask; +} + +/* Walk the OpenACC loop heirarchy to check and assign partitioning + axes. Return mask of partitioning. */ + +static unsigned +oacc_loop_partition (oacc_loop *loop, unsigned outer_mask) +{ + unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask); + + if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX)) + { + mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX); + mask_all |= oacc_loop_auto_partitions (loop, outer_mask); + } + return mask_all; +} + +/* Default fork/join early expander. Delete the function calls if + there is no RTL expander. */ + +bool +default_goacc_fork_join (gcall *ARG_UNUSED (call), + const int *ARG_UNUSED (dims), bool is_fork) +{ + if (is_fork) + return targetm.have_oacc_fork (); + else + return targetm.have_oacc_join (); +} + +/* Default goacc.reduction early expander. + + LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET) + If RES_PTR is not integer-zerop: + SETUP - emit 'LHS = *RES_PTR', LHS = NULL + TEARDOWN - emit '*RES_PTR = VAR' + If LHS is not NULL + emit 'LHS = VAR' */ + +void +default_goacc_reduction (gcall *call) +{ + unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0)); + gimple_stmt_iterator gsi = gsi_for_stmt (call); + tree lhs = gimple_call_lhs (call); + tree var = gimple_call_arg (call, 2); + gimple_seq seq = NULL; + + if (code == IFN_GOACC_REDUCTION_SETUP + || code == IFN_GOACC_REDUCTION_TEARDOWN) + { + /* Setup and Teardown need to copy from/to the receiver object, + if there is one. */ + tree ref_to_res = gimple_call_arg (call, 1); + + if (!integer_zerop (ref_to_res)) + { + tree dst = build_simple_mem_ref (ref_to_res); + tree src = var; + + if (code == IFN_GOACC_REDUCTION_SETUP) + { + src = dst; + dst = lhs; + lhs = NULL; + } + gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src)); + } + } + + /* Copy VAR to LHS, if there is an LHS. */ + if (lhs) + gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var)); + + gsi_replace_with_seq (&gsi, seq, true); +} + +/* Main entry point for oacc transformations which run on the device + compiler after LTO, so we know what the target device is at this + point (including the host fallback). */ + +static unsigned int +execute_oacc_device_lower () +{ + tree attrs = oacc_get_fn_attrib (current_function_decl); + + if (!attrs) + /* Not an offloaded function. */ + return 0; + + /* Parse the default dim argument exactly once. */ + if ((const void *)flag_openacc_dims != &flag_openacc_dims) + { + oacc_parse_default_dims (flag_openacc_dims); + flag_openacc_dims = (char *)&flag_openacc_dims; + } + + /* Discover, partition and process the loops. */ + oacc_loop *loops = oacc_loop_discovery (); + int fn_level = oacc_fn_attrib_level (attrs); + + if (dump_file) + fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs) + ? "Function is kernels offload\n" + : fn_level < 0 ? "Function is parallel offload\n" + : "Function is routine level %d\n", fn_level); + + unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0; + unsigned used_mask = oacc_loop_partition (loops, outer_mask); + int dims[GOMP_DIM_MAX]; + + oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask); + + if (dump_file) + { + const char *comma = "Compute dimensions ["; + for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ") + fprintf (dump_file, "%s%d", comma, dims[ix]); + fprintf (dump_file, "]\n"); + } + + oacc_loop_process (loops); + if (dump_file) + { + fprintf (dump_file, "OpenACC loops\n"); + dump_oacc_loop (dump_file, loops, 0); + fprintf (dump_file, "\n"); + } + + /* Offloaded targets may introduce new basic blocks, which require + dominance information to update SSA. */ + calculate_dominance_info (CDI_DOMINATORS); + + /* Now lower internal loop functions to target-specific code + sequences. */ + basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) + { + gimple *stmt = gsi_stmt (gsi); + if (!is_gimple_call (stmt)) + { + gsi_next (&gsi); + continue; + } + + gcall *call = as_a <gcall *> (stmt); + if (!gimple_call_internal_p (call)) + { + gsi_next (&gsi); + continue; + } + + /* Rewind to allow rescan. */ + gsi_prev (&gsi); + bool rescan = false, remove = false; + enum internal_fn ifn_code = gimple_call_internal_fn (call); + + switch (ifn_code) + { + default: break; + + case IFN_GOACC_LOOP: + oacc_xform_loop (call); + rescan = true; + break; + + case IFN_GOACC_REDUCTION: + /* Mark the function for SSA renaming. */ + mark_virtual_operands_for_renaming (cfun); + + /* If the level is -1, this ended up being an unused + axis. Handle as a default. */ + if (integer_minus_onep (gimple_call_arg (call, 3))) + default_goacc_reduction (call); + else + targetm.goacc.reduction (call); + rescan = true; + break; + + case IFN_UNIQUE: + { + enum ifn_unique_kind kind + = ((enum ifn_unique_kind) + TREE_INT_CST_LOW (gimple_call_arg (call, 0))); + + switch (kind) + { + default: + gcc_unreachable (); + + case IFN_UNIQUE_OACC_FORK: + case IFN_UNIQUE_OACC_JOIN: + if (integer_minus_onep (gimple_call_arg (call, 2))) + remove = true; + else if (!targetm.goacc.fork_join + (call, dims, kind == IFN_UNIQUE_OACC_FORK)) + remove = true; + break; + + case IFN_UNIQUE_OACC_HEAD_MARK: + case IFN_UNIQUE_OACC_TAIL_MARK: + remove = true; + break; + } + break; + } + } + + if (gsi_end_p (gsi)) + /* We rewound past the beginning of the BB. */ + gsi = gsi_start_bb (bb); + else + /* Undo the rewind. */ + gsi_next (&gsi); + + if (remove) + { + if (gimple_vdef (call)) + replace_uses_by (gimple_vdef (call), gimple_vuse (call)); + if (gimple_call_lhs (call)) + { + /* Propagate the data dependency var. */ + gimple *ass = gimple_build_assign (gimple_call_lhs (call), + gimple_call_arg (call, 1)); + gsi_replace (&gsi, ass, false); + } + else + gsi_remove (&gsi, true); + } + else if (!rescan) + /* If not rescanning, advance over the call. */ + gsi_next (&gsi); + } + + free_oacc_loop (loops); + + return 0; +} + +/* Default launch dimension validator. Force everything to 1. A + backend that wants to provide larger dimensions must override this + hook. */ + +bool +default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims, + int ARG_UNUSED (fn_level)) +{ + bool changed = false; + + for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) + { + if (dims[ix] != 1) + { + dims[ix] = 1; + changed = true; + } + } + + return changed; +} + +/* Default dimension bound is unknown on accelerator and 1 on host. */ + +int +default_goacc_dim_limit (int ARG_UNUSED (axis)) +{ +#ifdef ACCEL_COMPILER + return 0; +#else + return 1; +#endif +} + +namespace { + +const pass_data pass_data_oacc_device_lower = +{ + GIMPLE_PASS, /* type */ + "oaccdevlow", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg, /* properties_required */ + 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ +}; + +class pass_oacc_device_lower : public gimple_opt_pass +{ +public: + pass_oacc_device_lower (gcc::context *ctxt) + : gimple_opt_pass (pass_data_oacc_device_lower, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return flag_openacc; }; + + virtual unsigned int execute (function *) + { + return execute_oacc_device_lower (); + } + +}; // class pass_oacc_device_lower + +} // anon namespace + +gimple_opt_pass * +make_pass_oacc_device_lower (gcc::context *ctxt) +{ + return new pass_oacc_device_lower (ctxt); +} + +/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets, + VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and + LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT + internal functions on non-SIMT targets, and likewise some SIMD internal + functions on SIMT targets. */ + +static unsigned int +execute_omp_device_lower () +{ + int vf = targetm.simt.vf ? targetm.simt.vf () : 1; + basic_block bb; + gimple_stmt_iterator gsi; + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) + continue; + tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE; + tree type = lhs ? TREE_TYPE (lhs) : integer_type_node; + switch (gimple_call_internal_fn (stmt)) + { + case IFN_GOMP_USE_SIMT: + rhs = vf == 1 ? integer_zero_node : integer_one_node; + break; + case IFN_GOMP_SIMT_LANE: + case IFN_GOMP_SIMT_LAST_LANE: + rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE; + break; + case IFN_GOMP_SIMT_VF: + rhs = build_int_cst (type, vf); + break; + case IFN_GOMP_SIMT_ORDERED_PRED: + rhs = vf == 1 ? integer_zero_node : NULL_TREE; + if (rhs || !lhs) + unlink_stmt_vdef (stmt); + break; + case IFN_GOMP_SIMT_VOTE_ANY: + case IFN_GOMP_SIMT_XCHG_BFLY: + case IFN_GOMP_SIMT_XCHG_IDX: + rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE; + break; + case IFN_GOMP_SIMD_LANE: + case IFN_GOMP_SIMD_LAST_LANE: + rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE; + break; + case IFN_GOMP_SIMD_VF: + rhs = vf != 1 ? build_one_cst (type) : NULL_TREE; + break; + default: + continue; + } + if (lhs && !rhs) + continue; + stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop (); + gsi_replace (&gsi, stmt, false); + } + if (vf != 1) + cfun->has_force_vectorize_loops = false; + return 0; +} + +namespace { + +const pass_data pass_data_omp_device_lower = +{ + GIMPLE_PASS, /* type */ + "ompdevlow", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg, /* properties_required */ + PROP_gimple_lomp_dev, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_omp_device_lower : public gimple_opt_pass +{ +public: + pass_omp_device_lower (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_device_lower, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *ARG_UNUSED (fun)) + { + /* FIXME: this should use PROP_gimple_lomp_dev. */ +#ifdef ACCEL_COMPILER + return true; +#else + return ENABLE_OFFLOADING && (flag_openmp || in_lto_p); +#endif + } + virtual unsigned int execute (function *) + { + return execute_omp_device_lower (); + } + +}; // class pass_expand_omp_ssa + +} // anon namespace + +gimple_opt_pass * +make_pass_omp_device_lower (gcc::context *ctxt) +{ + return new pass_omp_device_lower (ctxt); +} + +/* "omp declare target link" handling pass. */ + +namespace { + +const pass_data pass_data_omp_target_link = +{ + GIMPLE_PASS, /* type */ + "omptargetlink", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_omp_target_link : public gimple_opt_pass +{ +public: + pass_omp_target_link (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_target_link, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fun) + { +#ifdef ACCEL_COMPILER + tree attrs = DECL_ATTRIBUTES (fun->decl); + return lookup_attribute ("omp declare target", attrs) + || lookup_attribute ("omp target entrypoint", attrs); +#else + (void) fun; + return false; +#endif + } + + virtual unsigned execute (function *); +}; + +/* Callback for walk_gimple_stmt used to scan for link var operands. */ + +static tree +find_link_var_op (tree *tp, int *walk_subtrees, void *) +{ + tree t = *tp; + + if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t) + && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t))) + { + *walk_subtrees = 0; + return t; + } + + return NULL_TREE; +} + +unsigned +pass_omp_target_link::execute (function *fun) +{ + basic_block bb; + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL)) + gimple_regimplify_operands (gsi_stmt (gsi), &gsi); + } + + return 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_omp_target_link (gcc::context *ctxt) +{ + return new pass_omp_target_link (ctxt); +} diff --git a/gcc/omp-offload.h b/gcc/omp-offload.h new file mode 100644 index 00000000000..a14d9fefea2 --- /dev/null +++ b/gcc/omp-offload.h @@ -0,0 +1,30 @@ +/* Bits of OpenMP and OpenACC handling that is specific to device offloading + and a lowering pass for OpenACC device directives. + + Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_OMP_DEVICE_H +#define GCC_OMP_DEVICE_H + +extern GTY(()) vec<tree, va_gc> *offload_funcs; +extern GTY(()) vec<tree, va_gc> *offload_vars; + +extern void omp_finish_file (void); + +#endif /* GCC_OMP_DEVICE_H */ diff --git a/gcc/toplev.c b/gcc/toplev.c index 5af02ea34e8..79d7a6fda1f 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -76,7 +76,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "gcse.h" #include "tree-chkp.h" -#include "omp-low.h" +#include "omp-offload.h" #include "hsa.h" #include "edit-context.h" diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 6cb5b6f5b5f..d4a7db8f6ec 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -54,7 +54,8 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "tree-inline.h" #include "tree-ssa-live.h" -#include "omp-low.h" +#include "omp-general.h" +#include "omp-expand.h" #include "tree-cfgcleanup.h" #include "gimplify.h" #include "attribs.h" @@ -863,7 +864,7 @@ make_edges_bb (basic_block bb, struct omp_region **pcur_region, int *pomp_index) break; CASE_GIMPLE_OMP: - fallthru = make_gimple_omp_edges (bb, pcur_region, pomp_index); + fallthru = omp_make_gimple_edges (bb, pcur_region, pomp_index); break; case GIMPLE_TRANSACTION: @@ -1006,7 +1007,7 @@ make_edges (void) XDELETE (bb_to_omp_idx); - free_omp_regions (); + omp_free_regions (); } /* Add SEQ after GSI. Start new bb after GSI, and created further bbs as diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 4779441b0cf..238017a0031 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-vectorizer.h" #include "tree-hasher.h" #include "tree-parloops.h" +#include "omp-general.h" #include "omp-low.h" #include "tree-ssa.h" #include "params.h" @@ -2045,7 +2046,7 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data, tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS); OMP_CLAUSE_NUM_GANGS_EXPR (clause) = build_int_cst (integer_type_node, n_threads); - set_oacc_fn_attrib (cfun->decl, clause, true, NULL); + oacc_set_fn_attrib (cfun->decl, clause, true, NULL); } else { @@ -3199,7 +3200,7 @@ parallelize_loops (bool oacc_kernels_p) /* Do not parallelize loops in offloaded functions. */ if (!oacc_kernels_p - && get_oacc_fn_attrib (cfun->decl) != NULL) + && oacc_get_fn_attrib (cfun->decl) != NULL) return false; if (cfun->has_nonlocal_label) diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 24c05852b06..84f13ada299 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -36,7 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" -#include "omp-low.h" +#include "omp-general.h" #include "diagnostic-core.h" @@ -152,7 +152,7 @@ gate_oacc_kernels (function *fn) if (!flag_openacc) return false; - tree oacc_function_attr = get_oacc_fn_attrib (fn->decl); + tree oacc_function_attr = oacc_get_fn_attrib (fn->decl); if (oacc_function_attr == NULL_TREE) return false; if (!oacc_fn_attrib_kernels_p (oacc_function_attr)) diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 3535d7a4d83..97e9953a139 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -55,7 +55,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-threadupdate.h" #include "tree-ssa-scopedtables.h" #include "tree-ssa-threadedge.h" -#include "omp-low.h" +#include "omp-general.h" #include "target.h" #include "case-cfn-macros.h" #include "params.h" @@ -4003,8 +4003,8 @@ extract_range_basic (value_range *vr, gimple *stmt) and pos is [0,N-1]. */ { bool is_pos = cfn == CFN_GOACC_DIM_POS; - int axis = get_oacc_ifn_dim_arg (stmt); - int size = get_oacc_fn_dim_size (current_function_decl, axis); + int axis = oacc_get_ifn_dim_arg (stmt); + int size = oacc_get_fn_dim_size (current_function_decl, axis); if (!size) /* If it's dynamic, the backend might know a hardware diff --git a/gcc/varpool.c b/gcc/varpool.c index 71fb4b88df1..d5b2b9e25a3 100644 --- a/gcc/varpool.c +++ b/gcc/varpool.c @@ -31,7 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "varasm.h" #include "debug.h" #include "output.h" -#include "omp-low.h" +#include "omp-offload.h" #include "context.h" const char * const tls_model_names[]={"none", "emulated", |