summaryrefslogtreecommitdiff
path: root/gcc/omp-offload.c
diff options
context:
space:
mode:
authorcltang <cltang@138bc75d-0d04-0410-961f-82ee72b054a4>2017-02-09 13:46:20 +0000
committercltang <cltang@138bc75d-0d04-0410-961f-82ee72b054a4>2017-02-09 13:46:20 +0000
commit719a7570163e74e56635612198e66b3b4cd609a7 (patch)
treea62233f5023008dd44b52a218e6c0fd79f1116fc /gcc/omp-offload.c
parent37a23d2eeaf1b8690534a7ee6e06e671a1aa96e0 (diff)
downloadgcc-719a7570163e74e56635612198e66b3b4cd609a7.tar.gz
2017-02-09 Nathan Sidwell <nathan@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com> Joseph Myers <joseph@codesourcery.com> Chung-Lin Tang <cltang@codesourcery.com> gcc/ * gimplify.c (gimplify_scan_omp_clauses): No special handling for OMP_CLAUSE_TILE. (gimplify_adjust_omp_clauses): Don't delete TILE. (gimplify_omp_for): Deal with TILE. * internal-fn.c (expand_GOACC_TILE): New function. * internal-fn.def (GOACC_DIM_POS): Comment may be overly conservative. (GOACC_TILE): New. * omp-expand.c (struct oacc_collapse): Add tile and outer fields. (expand_oacc_collapse_init): Add LOC paramter. Initialize tile element fields. (expand_oacc_collapse_vars): Add INNER parm, adjust for tiling, avoid DIV for outermost collapse var. (expand_oacc_for): Insert tile element loop as needed. Adjust. Remove out of date comments, fix whitespace. * omp-general.c (omp_extract_for_data): Deal with tiling. * omp-general.h (enum oacc_loop_flags): Add OLF_TILE flag, adjust OLF_DIM_BASE value. (struct omp_for_data): Add tiling field. * omp-low.c (scan_sharing_clauses): Allow OMP_CLAUSE_TILE. (lower_oacc_head_mark): Add OLF_TILE as appropriate. Ensure 2 levels for auto loops. Remove default auto determining, moved to oacc_loop_fixed_partitions. * omp-offload.c (struct oacc_loop): Change 'ifns' to vector of call stmts, add e_mask field. (oacc_dim_call): New function, abstracted out from oacc_thread_numbers. (oacc_thread_numbers): Use oacc_dim_call. (oacc_xform_tile): New. (new_oacc_loop_raw): Initialize e_mask, adjust for ifns vector. (finish_oacc_loop): Adjust for ifns vector. (oacc_loop_discover_walk): Append loop abstraction sites to list, add case for GOACC_TILE fns. (oacc_loop_xform_loop): Delete. (oacc_loop_process): Iterate over call list directly, and add handling for GOACC_TILE fns. (oacc_loop_fixed_partitions): Determine default auto, deal with TILE, dump partitioning. (oacc_loop_auto_partitions): Add outer_assign parm. Assign all but vector partitioning to outer loops. Assign 2 partitions to loops when available. Add TILE handling. (oacc_loop_partition): Adjust oacc_loop_auto_partitions call. (execite_oacc_device_lower): Process GOACC_TILE fns, ignore unknown specs. * tree-nested.c (convert_nonlocal_omp_clauses): Allow OMP_CLAUSE_TILE. * tree.c (omp_clause_num_ops): Adjust TILE ops. * tree.h (OMP_CLAUSE_TILE_ITERVAR, OMP_CLAUSE_TILE_COUNT): New. gcc/c/ * c-parser.c (c_parser_omp_clause_collapse): Disallow tile. (c_parser_oacc_clause_tile): Disallow collapse. Fix parsing and semantic checking. * c-parser.c (c_parser_omp_for_loop): Accept tiling constructs. gcc/cp/ * parser.c (cp_parser_oacc_clause_tile): Disallow collapse. Fix parsing. Parse constant expression. Remove semantic checking. (cp_parser_omp_clause_collapse): Disallow tile. (cp_parser_omp_for_loop): Deal with tile clause. Don't emit a parse error about missing for after already emitting one. Use more conventional for idiom for unbounded loop. * pt.c (tsubst_omp_clauses): Handle OMP_CLAUSE_TILE. * semantics.c (finish_omp_clauses): Correct TILE semantic check. (finish_omp_for): Deal with tile clause. gcc/fortran/ * openmp.c (resolve_omp_clauses): Error on directives containing both tile and collapse clauses. (resolve_oacc_loop_blocks): Represent '*' tile arguments as zero. * trans-openmp.c (gfc_trans_omp_do): Lower tiled loops like collapsed loops. gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: Remove xfail. * c-c++-common/goacc/loop-auto-1.c: Adjust and add additional case. * c-c++-common/goacc/loop-auto-2.c: New. * c-c++-common/goacc/tile.c: Include stdbool, fix expected errors. * c-c++-common/goacc/tile-2.c: New. * g++.dg/goacc/template.C: Test tile subst. Adjust erroneous uses. * g++.dg/goacc/tile-1.C: New, check tile subst. * gcc.dg/goacc/loop-processing-1.c: Adjust dg-final pattern. * gfortran.dg/goacc/combined-directives.f90: Remove xfail. * gfortran.dg/goacc/tile-1.f90: New test. * gfortran.dg/goacc/tile-2.f90: New test. * gfortran.dg/goacc/tile-lowering.f95: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/tile-1.c: New. * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust and add additional case. * testsuite/libgomp.oacc-c-c++-common/vprop.c: XFAIL under "openacc_nvidia_accel_selected". * libgomp.oacc-fortran/nested-function-1.f90 (test2): Add num_workers(8) clause. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@245300 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/omp-offload.c')
-rw-r--r--gcc/omp-offload.c346
1 files changed, 243 insertions, 103 deletions
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 6ff6bc2eeb9..e4ce48cb8e5 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -67,9 +67,10 @@ struct oacc_loop
tree routine; /* Pseudo-loop enclosing a routine. */
unsigned mask; /* Partitioning mask. */
+ unsigned e_mask; /* Partitioning of element loops (when tiling). */
unsigned inner; /* Partitioning of inner loops. */
unsigned flags; /* Partitioning flags. */
- unsigned ifns; /* Contained loop abstraction functions. */
+ vec<gcall *> ifns; /* Contained loop abstraction functions. */
tree chunk_size; /* Chunk size. */
gcall *head_end; /* Final marker of head sequence. */
};
@@ -217,6 +218,23 @@ omp_finish_file (void)
}
}
+/* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
+ axis DIM. Return a tmp var holding the result. */
+
+static tree
+oacc_dim_call (bool pos, int dim, gimple_seq *seq)
+{
+ tree arg = build_int_cst (unsigned_type_node, dim);
+ tree size = create_tmp_var (integer_type_node);
+ enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
+ gimple *call = gimple_build_call_internal (fn, 1, arg);
+
+ gimple_call_set_lhs (call, size);
+ gimple_seq_add_stmt (seq, call);
+
+ return size;
+}
+
/* Find the number of threads (POS = false), or thread number (POS =
true) for an OpenACC region partitioned as MASK. Setup code
required for the calculation is added to SEQ. */
@@ -231,29 +249,17 @@ oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
if (GOMP_DIM_MASK (ix) & mask)
{
- tree arg = build_int_cst (unsigned_type_node, ix);
-
if (res)
{
/* We had an outer index, so scale that by the size of
this dimension. */
- tree n = create_tmp_var (integer_type_node);
- gimple *call
- = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
-
- gimple_call_set_lhs (call, n);
- gimple_seq_add_stmt (seq, call);
+ tree n = oacc_dim_call (false, ix, seq);
res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
}
if (pos)
{
/* Determine index in this dimension. */
- tree id = create_tmp_var (integer_type_node);
- gimple *call = gimple_build_call_internal
- (IFN_GOACC_DIM_POS, 1, arg);
-
- gimple_call_set_lhs (call, id);
- gimple_seq_add_stmt (seq, call);
+ tree id = oacc_dim_call (true, ix, seq);
if (res)
res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
else
@@ -452,6 +458,85 @@ oacc_xform_loop (gcall *call)
gsi_replace_with_seq (&gsi, seq, true);
}
+/* Transform a GOACC_TILE call. Determines the element loop span for
+ the specified loop of the nest. This is 1 if we're not tiling.
+
+ GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
+
+static void
+oacc_xform_tile (gcall *call)
+{
+ gimple_stmt_iterator gsi = gsi_for_stmt (call);
+ unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
+ /* Inner loops have higher loop_nos. */
+ unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
+ tree tile_size = gimple_call_arg (call, 2);
+ unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
+ tree lhs = gimple_call_lhs (call);
+ tree type = TREE_TYPE (lhs);
+ gimple_seq seq = NULL;
+ tree span = build_int_cst (type, 1);
+
+ gcc_assert (!(e_mask
+ & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
+ | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
+ push_gimplify_context (!seen_error ());
+
+#ifndef ACCEL_COMPILER
+ /* Partitioning disabled on host compilers. */
+ e_mask = 0;
+#endif
+ if (!e_mask)
+ /* Not paritioning. */
+ span = integer_one_node;
+ else if (!integer_zerop (tile_size))
+ /* User explicitly specified size. */
+ span = tile_size;
+ else
+ {
+ /* Pick a size based on the paritioning of the element loop and
+ the number of loop nests. */
+ tree first_size = NULL_TREE;
+ tree second_size = NULL_TREE;
+
+ if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
+ first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
+ if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
+ second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
+
+ if (!first_size)
+ {
+ first_size = second_size;
+ second_size = NULL_TREE;
+ }
+
+ if (loop_no + 1 == collapse)
+ {
+ span = first_size;
+ if (!loop_no && second_size)
+ span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
+ span, second_size);
+ }
+ else if (loop_no + 2 == collapse)
+ span = second_size;
+ else
+ span = NULL_TREE;
+
+ if (!span)
+ /* There's no obvious element size for this loop. Options
+ are 1, first_size or some non-unity constant (32 is my
+ favourite). We should gather some statistics. */
+ span = first_size;
+ }
+
+ span = fold_convert (type, span);
+ gimplify_assign (lhs, span, &seq);
+
+ pop_gimplify_context (NULL);
+
+ gsi_replace_with_seq (&gsi, seq, true);
+}
+
/* Default partitioned and minimum partitioned dimensions. */
static int oacc_default_dims[GOMP_DIM_MAX];
@@ -610,8 +695,7 @@ new_oacc_loop_raw (oacc_loop *parent, location_t loc)
memset (loop->tails, 0, sizeof (loop->tails));
loop->routine = NULL_TREE;
- loop->mask = loop->flags = loop->inner = 0;
- loop->ifns = 0;
+ loop->mask = loop->e_mask = loop->flags = loop->inner = 0;
loop->chunk_size = 0;
loop->head_end = NULL;
@@ -674,7 +758,7 @@ static oacc_loop *
finish_oacc_loop (oacc_loop *loop)
{
/* If the loop has been collapsed, don't partition it. */
- if (!loop->ifns)
+ if (loop->ifns.is_empty ())
loop->mask = loop->flags = 0;
return loop->parent;
}
@@ -810,9 +894,10 @@ oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
break;
case IFN_GOACC_LOOP:
- /* Count the goacc loop abstraction fns, to determine if the
- loop was collapsed already. */
- loop->ifns++;
+ case IFN_GOACC_TILE:
+ /* Record the abstraction function, so we can manipulate it
+ later. */
+ loop->ifns.safe_push (call);
break;
case IFN_UNIQUE:
@@ -947,51 +1032,6 @@ oacc_loop_xform_head_tail (gcall *from, int level)
}
}
-/* Transform the IFN_GOACC_LOOP internal functions by providing the
- determined partitioning mask and chunking argument. END_MARKER
- points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS
- is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is
- the replacement partitioning mask and CHUNK_ARG is the replacement
- chunking arg. */
-
-static void
-oacc_loop_xform_loop (gcall *end_marker, unsigned ifns,
- tree mask_arg, tree chunk_arg)
-{
- gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
-
- gcc_checking_assert (ifns);
- for (;;)
- {
- for (; !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
-
- if (!is_gimple_call (stmt))
- continue;
-
- gcall *call = as_a <gcall *> (stmt);
-
- if (!gimple_call_internal_p (call))
- continue;
-
- if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
- continue;
-
- *gimple_call_arg_ptr (call, 5) = mask_arg;
- *gimple_call_arg_ptr (call, 4) = chunk_arg;
- ifns--;
- if (!ifns)
- return;
- }
-
- /* The LOOP_BOUND ifn could be in the single successor
- block. */
- basic_block bb = single_succ (gsi_bb (gsi));
- gsi = gsi_start_bb (bb);
- }
-}
-
/* Process the discovered OpenACC loops, setting the correct
partitioning level etc. */
@@ -1004,13 +1044,34 @@ oacc_loop_process (oacc_loop *loop)
if (loop->mask && !loop->routine)
{
int ix;
- unsigned mask = loop->mask;
- unsigned dim = GOMP_DIM_GANG;
- tree mask_arg = build_int_cst (unsigned_type_node, mask);
+ tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
+ tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
tree chunk_arg = loop->chunk_size;
+ gcall *call;
+
+ for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
+ switch (gimple_call_internal_fn (call))
+ {
+ case IFN_GOACC_LOOP:
+ {
+ bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
+ gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
+ if (!is_e)
+ gimple_call_set_arg (call, 4, chunk_arg);
+ }
+ break;
- oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg);
+ case IFN_GOACC_TILE:
+ gimple_call_set_arg (call, 3, mask_arg);
+ gimple_call_set_arg (call, 4, e_mask_arg);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ unsigned dim = GOMP_DIM_GANG;
+ unsigned mask = loop->mask | loop->e_mask;
for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
{
while (!(GOMP_DIM_MASK (dim) & mask))
@@ -1050,10 +1111,16 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
{
bool auto_par = (loop->flags & OLF_AUTO) != 0;
bool seq_par = (loop->flags & OLF_SEQ) != 0;
-
+ bool tiling = (loop->flags & OLF_TILE) != 0;
+
this_mask = ((loop->flags >> OLF_DIM_BASE)
& (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
+ /* Apply auto partitioning if this is a non-partitioned regular
+ loop, or (no more than) single axis tiled loop. */
+ bool maybe_auto
+ = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
+
if ((this_mask != 0) + auto_par + seq_par > 1)
{
if (noisy)
@@ -1062,7 +1129,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
? "%<seq%> overrides other OpenACC loop specifiers"
: "%<auto%> conflicts with other OpenACC loop "
"specifiers");
- auto_par = false;
+ maybe_auto = false;
loop->flags &= ~OLF_AUTO;
if (seq_par)
{
@@ -1071,15 +1138,19 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
this_mask = 0;
}
}
- if (auto_par && (loop->flags & OLF_INDEPENDENT))
- mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
+
+ if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
+ {
+ loop->flags |= OLF_AUTO;
+ mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
+ }
}
if (this_mask & outer_mask)
{
const oacc_loop *outer;
for (outer = loop->parent; outer; outer = outer->parent)
- if (outer->mask & this_mask)
+ if ((outer->mask | outer->e_mask) & this_mask)
break;
if (noisy)
@@ -1125,13 +1196,33 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
}
}
- loop->mask = this_mask;
mask_all |= this_mask;
+ if (loop->flags & OLF_TILE)
+ {
+ /* When tiling, vector goes to the element loop, and failing
+ that we put worker there. The std doesn't contemplate
+ specifying all three. We choose to put worker and vector on
+ the element loops in that case. */
+ unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
+ if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
+ this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
+
+ loop->e_mask = this_e_mask;
+ this_mask ^= this_e_mask;
+ }
+
+ loop->mask = this_mask;
+
+ if (dump_file)
+ fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
+ LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
+ loop->mask, loop->e_mask);
+
if (loop->child)
{
- loop->inner = oacc_loop_fixed_partitions (loop->child,
- outer_mask | this_mask);
+ unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
+ loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
mask_all |= loop->inner;
}
@@ -1143,14 +1234,17 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
OUTER_MASK is the partitioning this loop is contained within.
+ OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
Return the cumulative partitioning used by this loop, siblings and
children. */
static unsigned
-oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
+oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
+ bool outer_assign)
{
bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
bool noisy = true;
+ bool tiling = loop->flags & OLF_TILE;
#ifdef ACCEL_COMPILER
/* When device_type is supported, we want the device compiler to be
@@ -1158,29 +1252,50 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
noisy = false;
#endif
- if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
+ if (assign && (!outer_assign | loop->inner))
{
- /* Allocate the outermost loop at the outermost available
- level. */
- unsigned this_mask = outer_mask + 1;
+ /* Allocate outermost and non-innermost loops at the outermost
+ non-innermost available level. */
+ unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
+
+ /* Find the first outermost available partition. */
+ while (this_mask <= outer_mask)
+ this_mask <<= 1;
+
+ /* Grab two axes if tiling, and we've not assigned anything */
+ if (tiling && !(loop->mask | loop->e_mask))
+ this_mask |= this_mask << 1;
+
+ /* Prohibit the innermost partitioning at the moment. */
+ this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
+
+ /* Don't use any dimension explicitly claimed by an inner loop. */
+ this_mask &= ~loop->inner;
+
+ if (tiling && !loop->e_mask)
+ {
+ /* If we got two axes, allocate the inner one to the element
+ loop. */
+ loop->e_mask = this_mask & (this_mask << 1);
+ this_mask ^= loop->e_mask;
+ }
- if (!(this_mask & loop->inner))
- loop->mask = this_mask;
+ loop->mask |= this_mask;
}
if (loop->child)
{
- unsigned child_mask = outer_mask | loop->mask;
-
- if (loop->mask || assign)
- child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
-
- loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
+ unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
+ loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
+ outer_assign | assign);
}
- if (assign && !loop->mask)
+ if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
{
- /* Allocate the loop at the innermost available level. */
+ /* Allocate the loop at the innermost available level. Note
+ that we do this even if we already assigned this loop the
+ outermost available level above. That way we'll partition
+ this along 2 axes, if they are available. */
unsigned this_mask = 0;
/* Determine the outermost partitioning used within this loop. */
@@ -1193,24 +1308,44 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
/* And avoid picking one use by an outer loop. */
this_mask &= ~outer_mask;
- if (!this_mask && noisy)
- warning_at (loop->loc, 0,
- "insufficient partitioning available to parallelize loop");
+ /* If tiling and we failed completely above, grab the next one
+ too. Making sure it doesn't hit an outer loop. */
+ if (tiling)
+ {
+ this_mask &= ~(loop->e_mask | loop->mask);
+ unsigned tile_mask = ((this_mask >> 1)
+ & ~(outer_mask | loop->e_mask | loop->mask));
+
+ if (tile_mask || loop->mask)
+ {
+ loop->e_mask |= this_mask;
+ this_mask = tile_mask;
+ }
+ if (!loop->e_mask && noisy)
+ warning_at (loop->loc, 0,
+ "insufficient partitioning available"
+ " to parallelize element loop");
+ }
- loop->mask = this_mask;
+ loop->mask |= this_mask;
+ if (!loop->mask && noisy)
+ warning_at (loop->loc, 0,
+ "insufficient partitioning available"
+ " to parallelize%s loop", tiling ? " tile" : "");
}
if (assign && dump_file)
- fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
+ fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
- loop->mask);
+ loop->mask, loop->e_mask);
unsigned inner_mask = 0;
if (loop->sibling)
- inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
+ inner_mask |= oacc_loop_auto_partitions (loop->sibling,
+ outer_mask, outer_assign);
- inner_mask |= loop->inner | loop->mask;
+ inner_mask |= loop->inner | loop->mask | loop->e_mask;
return inner_mask;
}
@@ -1226,7 +1361,7 @@ oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
{
mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
- mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
+ mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
}
return mask_all;
}
@@ -1376,6 +1511,11 @@ execute_oacc_device_lower ()
{
default: break;
+ case IFN_GOACC_TILE:
+ oacc_xform_tile (call);
+ rescan = true;
+ break;
+
case IFN_GOACC_LOOP:
oacc_xform_loop (call);
rescan = true;
@@ -1403,7 +1543,7 @@ execute_oacc_device_lower ()
switch (kind)
{
default:
- gcc_unreachable ();
+ break;
case IFN_UNIQUE_OACC_FORK:
case IFN_UNIQUE_OACC_JOIN: