diff options
Diffstat (limited to 'gcc/tree-loop-distribution.c')
-rw-r--r-- | gcc/tree-loop-distribution.c | 756 |
1 files changed, 389 insertions, 367 deletions
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 7482d8c68bc..45efad3f110 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -44,6 +44,16 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" +#include "tree.h" +#include "gimple.h" +#include "gimple-ssa.h" +#include "tree-cfg.h" +#include "tree-phinodes.h" +#include "ssa-iterators.h" +#include "tree-ssanames.h" +#include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop.h" +#include "tree-into-ssa.h" #include "tree-ssa.h" #include "cfgloop.h" #include "tree-chrec.h" @@ -51,6 +61,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "tree-pass.h" #include "gimple-pretty-print.h" +#include "tree-vectorizer.h" /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ @@ -85,15 +96,6 @@ enum rdg_dep_type /* Read After Write (RAW). */ flow_dd = 'f', - /* Write After Read (WAR). */ - anti_dd = 'a', - - /* Write After Write (WAW). */ - output_dd = 'o', - - /* Read After Read (RAR). */ - input_dd = 'i', - /* Control dependence (execute conditional on). */ control_dd = 'c' }; @@ -104,19 +106,9 @@ typedef struct rdg_edge { /* Type of the dependence. */ enum rdg_dep_type type; - - /* Levels of the dependence: the depth of the loops that carry the - dependence. */ - unsigned level; - - /* Dependence relation between data dependences, NULL when one of - the vertices is a scalar. */ - ddr_p relation; } *rdg_edge_p; #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type -#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level -#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation /* Dump vertex I in RDG to FILE. */ @@ -204,23 +196,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg) for (e = v->succ; e; e = e->succ_next) switch (RDGE_TYPE (e)) { - case input_dd: - fprintf (file, "%d -> %d [label=input] \n", i, e->dest); - break; - - case output_dd: - fprintf (file, "%d -> %d [label=output] \n", i, e->dest); - break; - case flow_dd: /* These are the most common dependences: don't print these. */ fprintf (file, "%d -> %d \n", i, e->dest); break; - case anti_dd: - fprintf (file, "%d -> %d [label=anti] \n", i, e->dest); - break; - case control_dd: fprintf (file, "%d -> %d [label=control] \n", i, e->dest); break; @@ -240,7 +220,7 @@ dot_rdg (struct graph *rdg) { /* When debugging, you may want to enable the following code. */ #if 1 - FILE *file = popen("dot -Tx11", "w"); + FILE *file = popen ("dot -Tx11", "w"); if (!file) return; dot_rdg_1 (file, rdg); @@ -262,52 +242,6 @@ rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt) return index; } -/* Creates an edge in RDG for each distance vector from DDR. The - order that we keep track of in the RDG is the order in which - statements have to be executed. */ - -static void -create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr) -{ - struct graph_edge *e; - int va, vb; - data_reference_p dra = DDR_A (ddr); - data_reference_p drb = DDR_B (ddr); - unsigned level = ddr_dependence_level (ddr); - - /* For non scalar dependences, when the dependence is REVERSED, - statement B has to be executed before statement A. */ - if (level > 0 - && !DDR_REVERSED_P (ddr)) - { - data_reference_p tmp = dra; - dra = drb; - drb = tmp; - } - - va = rdg_vertex_for_stmt (rdg, DR_STMT (dra)); - vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb)); - - if (va < 0 || vb < 0) - return; - - e = add_edge (rdg, va, vb); - e->data = XNEW (struct rdg_edge); - - RDGE_LEVEL (e) = level; - RDGE_RELATION (e) = ddr; - - /* Determines the type of the data dependence. */ - if (DR_IS_READ (dra) && DR_IS_READ (drb)) - RDGE_TYPE (e) = input_dd; - else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)) - RDGE_TYPE (e) = output_dd; - else if (DR_IS_WRITE (dra) && DR_IS_READ (drb)) - RDGE_TYPE (e) = flow_dd; - else if (DR_IS_READ (dra) && DR_IS_WRITE (drb)) - RDGE_TYPE (e) = anti_dd; -} - /* Creates dependence edges in RDG for all the uses of DEF. IDEF is the index of DEF in RDG. */ @@ -328,7 +262,6 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) e = add_edge (rdg, idef, use); e->data = XNEW (struct rdg_edge); RDGE_TYPE (e) = flow_dd; - RDGE_RELATION (e) = NULL; } } @@ -355,7 +288,6 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb, e = add_edge (rdg, c, v); e->data = XNEW (struct rdg_edge); RDGE_TYPE (e) = control_dd; - RDGE_RELATION (e) = NULL; } } } @@ -363,38 +295,38 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb, /* Creates the edges of the reduced dependence graph RDG. */ static void -create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd) +create_rdg_flow_edges (struct graph *rdg) { int i; - struct data_dependence_relation *ddr; def_operand_p def_p; ssa_op_iter iter; - FOR_EACH_VEC_ELT (ddrs, i, ddr) - if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) - create_rdg_edge_for_ddr (rdg, ddr); - else - free_dependence_relation (ddr); - for (i = 0; i < rdg->n_vertices; i++) FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), iter, SSA_OP_DEF) create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); +} - if (cd) - for (i = 0; i < rdg->n_vertices; i++) - { - gimple stmt = RDG_STMT (rdg, i); - if (gimple_code (stmt) == GIMPLE_PHI) - { - edge_iterator ei; - edge e; - FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) +/* Creates the edges of the reduced dependence graph RDG. */ + +static void +create_rdg_cd_edges (struct graph *rdg, control_dependences *cd) +{ + int i; + + for (i = 0; i < rdg->n_vertices; i++) + { + gimple stmt = RDG_STMT (rdg, i); + if (gimple_code (stmt) == GIMPLE_PHI) + { + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) create_edge_for_control_dependence (rdg, e->src, i, cd); - } - else - create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); - } + } + else + create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); + } } /* Build the vertices of the reduced dependence graph RDG. Return false @@ -470,17 +402,6 @@ stmts_from_loop (struct loop *loop, vec<gimple> *stmts) free (bbs); } -/* Build the Reduced Dependence Graph (RDG) with one vertex per - statement of the loop nest, and one edge per data dependence or - scalar dependence. */ - -struct graph * -build_empty_rdg (int n_stmts) -{ - struct graph *rdg = new_graph (n_stmts); - return rdg; -} - /* Free the reduced dependence graph RDG. */ static void @@ -494,10 +415,7 @@ free_rdg (struct graph *rdg) struct graph_edge *e; for (e = v->succ; e; e = e->succ_next) - { - free_dependence_relation (RDGE_RELATION (e)); - free (e->data); - } + free (e->data); if (v->data) { @@ -518,37 +436,25 @@ static struct graph * build_rdg (vec<loop_p> loop_nest, control_dependences *cd) { struct graph *rdg; - vec<gimple> stmts; vec<data_reference_p> datarefs; - vec<ddr_p> dependence_relations; /* Create the RDG vertices from the stmts of the loop nest. */ - stmts.create (10); + stack_vec<gimple, 10> stmts; stmts_from_loop (loop_nest[0], &stmts); - rdg = build_empty_rdg (stmts.length ()); + rdg = new_graph (stmts.length ()); datarefs.create (10); if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) { - stmts.release (); datarefs.release (); free_rdg (rdg); return NULL; } stmts.release (); - /* Create the RDG edges from the data dependences in the loop nest. */ - dependence_relations.create (100); - if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest, - false) - || !known_dependences_p (dependence_relations)) - { - free_dependence_relations (dependence_relations); - datarefs.release (); - free_rdg (rdg); - return NULL; - } - create_rdg_edges (rdg, dependence_relations, cd); - dependence_relations.release (); + create_rdg_flow_edges (rdg); + if (cd) + create_rdg_cd_edges (rdg, cd); + datarefs.release (); return rdg; @@ -557,18 +463,19 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd) enum partition_kind { - PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY + PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY }; typedef struct partition_s { bitmap stmts; bitmap loops; - bool has_writes; + bool reduction_p; enum partition_kind kind; /* data-references a kind != PKIND_NORMAL partition is about. */ data_reference_p main_dr; data_reference_p secondary_dr; + tree niter; } *partition_t; @@ -580,7 +487,7 @@ partition_alloc (bitmap stmts, bitmap loops) partition_t partition = XCNEW (struct partition_s); partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); partition->loops = loops ? loops : BITMAP_ALLOC (NULL); - partition->has_writes = false; + partition->reduction_p = false; partition->kind = PKIND_NORMAL; return partition; } @@ -600,17 +507,29 @@ partition_free (partition_t partition) static bool partition_builtin_p (partition_t partition) { - return partition->kind > PKIND_REDUCTION; + return partition->kind != PKIND_NORMAL; } -/* Returns true if the partition has an writes. */ +/* Returns true if the partition contains a reduction. */ static bool -partition_has_writes (partition_t partition) +partition_reduction_p (partition_t partition) +{ + return partition->reduction_p; +} + +/* Merge PARTITION into the partition DEST. */ + +static void +partition_merge_into (partition_t dest, partition_t partition) { - return partition->has_writes; + dest->kind = PKIND_NORMAL; + bitmap_ior_into (dest->stmts, partition->stmts); + if (partition_reduction_p (partition)) + dest->reduction_p = true; } + /* Returns true when DEF is an SSA_NAME defined in LOOP and used after the LOOP. */ @@ -848,21 +767,17 @@ generate_memset_builtin (struct loop *loop, partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; - tree nb_iter, mem, fn, nb_bytes; + tree mem, fn, nb_bytes; location_t loc; tree val; stmt = DR_STMT (partition->main_dr); loc = gimple_location (stmt); - if (gimple_bb (stmt) == loop->latch) - nb_iter = number_of_latch_executions (loop); - else - nb_iter = number_of_exit_cond_executions (loop); /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); - nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); + nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter); nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, false, GSI_CONTINUE_LINKING); mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); @@ -908,21 +823,17 @@ generate_memcpy_builtin (struct loop *loop, partition_t partition) { gimple_stmt_iterator gsi; gimple stmt, fn_call; - tree nb_iter, dest, src, fn, nb_bytes; + tree dest, src, fn, nb_bytes; location_t loc; enum built_in_function kind; stmt = DR_STMT (partition->main_dr); loc = gimple_location (stmt); - if (gimple_bb (stmt) == loop->latch) - nb_iter = number_of_latch_executions (loop); - else - nb_iter = number_of_exit_cond_executions (loop); /* The new statements will be placed before LOOP. */ gsi = gsi_last_bb (loop_preheader_edge (loop)->src); - nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); + nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter); nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, false, GSI_CONTINUE_LINKING); dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); @@ -1005,108 +916,52 @@ generate_code_for_partition (struct loop *loop, { switch (partition->kind) { + case PKIND_NORMAL: + /* Reductions all have to be in the last partition. */ + gcc_assert (!partition_reduction_p (partition) + || !copy_p); + generate_loops_for_partition (loop, partition, copy_p); + return; + case PKIND_MEMSET: generate_memset_builtin (loop, partition); - /* If this is the last partition for which we generate code, we have - to destroy the loop. */ - if (!copy_p) - destroy_loop (loop); break; case PKIND_MEMCPY: generate_memcpy_builtin (loop, partition); - /* If this is the last partition for which we generate code, we have - to destroy the loop. */ - if (!copy_p) - destroy_loop (loop); - break; - - case PKIND_REDUCTION: - /* Reductions all have to be in the last partition. */ - gcc_assert (!copy_p); - case PKIND_NORMAL: - generate_loops_for_partition (loop, partition, copy_p); break; default: gcc_unreachable (); } -} - - -/* Returns true if the node V of RDG cannot be recomputed. */ - -static bool -rdg_cannot_recompute_vertex_p (struct graph *rdg, int v) -{ - if (RDG_MEM_WRITE_STMT (rdg, v)) - return true; - - return false; -} -/* Returns true when the vertex V has already been generated in the - current partition (V is in PROCESSED), or when V belongs to another - partition and cannot be recomputed (V is not in REMAINING_STMTS). */ - -static inline bool -already_processed_vertex_p (bitmap processed, int v) -{ - return bitmap_bit_p (processed, v); + /* Common tail for partitions we turn into a call. If this was the last + partition for which we generate code, we have to destroy the loop. */ + if (!copy_p) + destroy_loop (loop); } -static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t, - bitmap); -/* Flag V from RDG as part of PARTITION, and also flag its loop number - in LOOPS. */ - -static void -rdg_flag_vertex (struct graph *rdg, int v, partition_t partition) -{ - struct loop *loop; - - if (!bitmap_set_bit (partition->stmts, v)) - return; - - loop = loop_containing_stmt (RDG_STMT (rdg, v)); - bitmap_set_bit (partition->loops, loop->num); - - if (rdg_cannot_recompute_vertex_p (rdg, v)) - partition->has_writes = true; -} - -/* Flag in the bitmap PARTITION the vertex V and all its predecessors. - Also flag their loop number in LOOPS. */ +/* Returns a partition with all the statements needed for computing + the vertex V of the RDG, also including the loop exit conditions. */ -static void -rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition, - bitmap processed) +static partition_t +build_rdg_partition_for_vertex (struct graph *rdg, int v) { + partition_t partition = partition_alloc (NULL, NULL); + stack_vec<int, 3> nodes; unsigned i; - vec<int> nodes; - nodes.create (3); int x; graphds_dfs (rdg, &v, 1, &nodes, false, NULL); FOR_EACH_VEC_ELT (nodes, i, x) - if (bitmap_set_bit (processed, x)) - rdg_flag_vertex (rdg, x, partition); - - nodes.release (); -} - -/* Returns a partition with all the statements needed for computing - the vertex V of the RDG, also including the loop exit conditions. */ + { + bitmap_set_bit (partition->stmts, x); + bitmap_set_bit (partition->loops, + loop_containing_stmt (RDG_STMT (rdg, x))->num); + } -static partition_t -build_rdg_partition_for_vertex (struct graph *rdg, int v) -{ - partition_t partition = partition_alloc (NULL, NULL); - bitmap processed = BITMAP_ALLOC (NULL); - rdg_flag_vertex_and_dependent (rdg, v, partition, processed); - BITMAP_FREE (processed); return partition; } @@ -1125,6 +980,7 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) partition->kind = PKIND_NORMAL; partition->main_dr = NULL; partition->secondary_dr = NULL; + partition->niter = NULL_TREE; EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) { @@ -1133,15 +989,10 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) if (gimple_has_volatile_ops (stmt)) volatiles_p = true; - /* If the stmt has uses outside of the loop fail. - ??? If the stmt is generated in another partition that - is not created as builtin we can ignore this. */ + /* If the stmt has uses outside of the loop mark it as reduction. */ if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "not generating builtin, partition has " - "scalar uses outside of the loop\n"); - partition->kind = PKIND_REDUCTION; + partition->reduction_p = true; return; } } @@ -1151,10 +1002,6 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) || !flag_tree_loop_distribute_patterns) return; - nb_iter = number_of_exit_cond_executions (loop); - if (!nb_iter || nb_iter == chrec_dont_know) - return; - /* Detect memset and memcpy. */ single_load = NULL; single_store = NULL; @@ -1193,6 +1040,17 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) } } + if (!single_store) + return; + + if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, + gimple_bb (DR_STMT (single_store)))) + nb_iter = number_of_latch_executions (loop); + else + nb_iter = number_of_exit_cond_executions (loop); + if (!nb_iter || nb_iter == chrec_dont_know) + return; + if (single_store && !single_load) { gimple stmt = DR_STMT (single_store); @@ -1206,10 +1064,13 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) && !SSA_NAME_IS_DEFAULT_DEF (rhs) && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) return; - if (!adjacent_dr_p (single_store)) + if (!adjacent_dr_p (single_store) + || !dominated_by_p (CDI_DOMINATORS, + loop->latch, gimple_bb (stmt))) return; partition->kind = PKIND_MEMSET; partition->main_dr = single_store; + partition->niter = nb_iter; } else if (single_store && single_load) { @@ -1222,7 +1083,9 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) if (!adjacent_dr_p (single_store) || !adjacent_dr_p (single_load) || !operand_equal_p (DR_STEP (single_store), - DR_STEP (single_load), 0)) + DR_STEP (single_load), 0) + || !dominated_by_p (CDI_DOMINATORS, + loop->latch, gimple_bb (store))) return; /* Now check that if there is a dependence this dependence is of a suitable form for memmove. */ @@ -1264,6 +1127,7 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) partition->kind = PKIND_MEMCPY; partition->main_dr = single_store; partition->secondary_dr = single_load; + partition->niter = nb_iter; } } @@ -1333,54 +1197,34 @@ rdg_build_partitions (struct graph *rdg, bitmap processed = BITMAP_ALLOC (NULL); int i; gimple stmt; - partition_t partition = partition_alloc (NULL, NULL); FOR_EACH_VEC_ELT (starting_stmts, i, stmt) { - partition_t np; int v = rdg_vertex_for_stmt (rdg, stmt); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "ldist asked to generate code for vertex %d\n", v); + /* If the vertex is already contained in another partition so + is the partition rooted at it. */ if (bitmap_bit_p (processed, v)) continue; - np = build_rdg_partition_for_vertex (rdg, v); - bitmap_ior_into (partition->stmts, np->stmts); - partition->has_writes = partition_has_writes (np); - bitmap_ior_into (processed, np->stmts); - partition_free (np); - - if (partition_has_writes (partition)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "ldist useful partition:\n"); - dump_bitmap (dump_file, partition->stmts); - } - - partitions->safe_push (partition); - partition = partition_alloc (NULL, NULL); - } - } + partition_t partition = build_rdg_partition_for_vertex (rdg, v); + bitmap_ior_into (processed, partition->stmts); - /* All vertices should have been assigned to at least one partition now, - other than vertices belonging to dead code. */ - - if (!bitmap_empty_p (partition->stmts)) - { if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "remaining partition:\n"); + fprintf (dump_file, "ldist useful partition:\n"); dump_bitmap (dump_file, partition->stmts); } partitions->safe_push (partition); } - else - partition_free (partition); + + /* All vertices should have been assigned to at least one partition now, + other than vertices belonging to dead code. */ BITMAP_FREE (processed); } @@ -1465,6 +1309,79 @@ partition_contains_all_rw (struct graph *rdg, return false; } +/* Compute partition dependence created by the data references in DRS1 + and DRS2 and modify and return DIR according to that. */ + +static int +pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir, + vec<data_reference_p> drs1, + vec<data_reference_p> drs2) +{ + data_reference_p dr1, dr2; + + /* dependence direction - 0 is no dependence, -1 is back, + 1 is forth, 2 is both (we can stop then, merging will occur). */ + for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) + for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) + { + int this_dir = 1; + ddr_p ddr; + /* Re-shuffle data-refs to be in dominator order. */ + if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) + > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) + { + data_reference_p tem = dr1; + dr1 = dr2; + dr2 = tem; + this_dir = -this_dir; + } + ddr = initialize_data_dependence_relation (dr1, dr2, loops); + compute_affine_dependence (ddr, loops[0]); + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + this_dir = 2; + else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) + { + if (DDR_REVERSED_P (ddr)) + { + data_reference_p tem = dr1; + dr1 = dr2; + dr2 = tem; + this_dir = -this_dir; + } + /* Known dependences can still be unordered througout the + iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ + if (DDR_NUM_DIST_VECTS (ddr) != 1) + this_dir = 2; + /* If the overlap is exact preserve stmt order. */ + else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) + ; + else + { + /* Else as the distance vector is lexicographic positive + swap the dependence direction. */ + this_dir = -this_dir; + } + } + else + this_dir = 0; + free_dependence_relation (ddr); + if (dir == 0) + dir = this_dir; + else if (dir != this_dir) + return 2; + } + return dir; +} + +/* Compare postorder number of the partition graph vertices V1 and V2. */ + +static int +pgcmp (const void *v1_, const void *v2_) +{ + const vertex *v1 = (const vertex *)v1_; + const vertex *v2 = (const vertex *)v2_; + return v2->post - v1->post; +} /* Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate @@ -1473,21 +1390,20 @@ partition_contains_all_rw (struct graph *rdg, static int distribute_loop (struct loop *loop, vec<gimple> stmts, - control_dependences *cd) + control_dependences *cd, int *nb_calls) { struct graph *rdg; - vec<loop_p> loop_nest; vec<partition_t> partitions; partition_t partition; bool any_builtin; int i, nbp; + graph *pg = NULL; + int num_sccs = 1; - loop_nest.create (3); + *nb_calls = 0; + stack_vec<loop_p, 3> loop_nest; if (!find_loop_nest (loop, &loop_nest)) - { - loop_nest.release (); - return 0; - } + return 0; rdg = build_rdg (loop_nest, cd); if (!rdg) @@ -1497,7 +1413,6 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, "Loop %d not distributed: failed to build the RDG.\n", loop->num); - loop_nest.release (); return 0; } @@ -1514,96 +1429,197 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, any_builtin |= partition_builtin_p (partition); } + /* If we are only distributing patterns but did not detect any, + simply bail out. */ + if (!flag_tree_loop_distribution + && !any_builtin) + { + nbp = 0; + goto ldist_done; + } + /* If we are only distributing patterns fuse all partitions that - were not properly classified as builtins. Else fuse partitions - with similar memory accesses. */ + were not classified as builtins. This also avoids chopping + a loop into pieces, separated by builtin calls. That is, we + only want no or a single loop body remaining. */ + partition_t into; if (!flag_tree_loop_distribution) { - partition_t into; - /* If we did not detect any builtin simply bail out. */ - if (!any_builtin) - { - nbp = 0; - goto ldist_done; - } - /* Only fuse adjacent non-builtin partitions, see PR53616. - ??? Use dependence information to improve partition ordering. */ - i = 0; - do - { - for (; partitions.iterate (i, &into); ++i) - if (!partition_builtin_p (into)) - break; - for (++i; partitions.iterate (i, &partition); ++i) - if (!partition_builtin_p (partition)) + for (i = 0; partitions.iterate (i, &into); ++i) + if (!partition_builtin_p (into)) + break; + for (++i; partitions.iterate (i, &partition); ++i) + if (!partition_builtin_p (partition)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) { - bitmap_ior_into (into->stmts, partition->stmts); - if (partition->kind == PKIND_REDUCTION) - into->kind = PKIND_REDUCTION; - partitions.ordered_remove (i); - partition_free (partition); - i--; + fprintf (dump_file, "fusing non-builtin partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); } - else - break; - } - while ((unsigned) i < partitions.length ()); + partition_merge_into (into, partition); + partitions.unordered_remove (i); + partition_free (partition); + i--; + } } - else + + /* Due to limitations in the transform phase we have to fuse all + reduction partitions into the last partition so the existing + loop will contain all loop-closed PHI nodes. */ + for (i = 0; partitions.iterate (i, &into); ++i) + if (partition_reduction_p (into)) + break; + for (i = i + 1; partitions.iterate (i, &partition); ++i) + if (partition_reduction_p (partition)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they have reductions\n"); + } + partition_merge_into (into, partition); + partitions.unordered_remove (i); + partition_free (partition); + i--; + } + + /* Apply our simple cost model - fuse partitions with similar + memory accesses. */ + for (i = 0; partitions.iterate (i, &into); ++i) { - partition_t into; - int j; - for (i = 0; partitions.iterate (i, &into); ++i) + if (partition_builtin_p (into)) + continue; + for (int j = i + 1; + partitions.iterate (j, &partition); ++j) { - if (partition_builtin_p (into)) - continue; - for (j = i + 1; - partitions.iterate (j, &partition); ++j) + if (!partition_builtin_p (partition) + && similar_memory_accesses (rdg, into, partition)) { - if (!partition_builtin_p (partition) - && similar_memory_accesses (rdg, into, partition)) + if (dump_file && (dump_flags & TDF_DETAILS)) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "fusing partitions\n"); - dump_bitmap (dump_file, into->stmts); - dump_bitmap (dump_file, partition->stmts); - fprintf (dump_file, "because they have similar " - "memory accesses\n"); - } - bitmap_ior_into (into->stmts, partition->stmts); - if (partition->kind == PKIND_REDUCTION) - into->kind = PKIND_REDUCTION; - partitions.ordered_remove (j); - partition_free (partition); - j--; + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, into->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they have similar " + "memory accesses\n"); } + partition_merge_into (into, partition); + partitions.unordered_remove (j); + partition_free (partition); + j--; } } } - /* Fuse all reduction partitions into the last. */ + /* Build the partition dependency graph. */ if (partitions.length () > 1) { - partition_t into = partitions.last (); - for (i = partitions.length () - 2; i >= 0; --i) + pg = new_graph (partitions.length ()); + struct pgdata { + partition_t partition; + vec<data_reference_p> writes; + vec<data_reference_p> reads; + }; +#define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) + for (i = 0; partitions.iterate (i, &partition); ++i) { - partition_t what = partitions[i]; - if (what->kind == PKIND_REDUCTION) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "fusing partitions\n"); - dump_bitmap (dump_file, into->stmts); - dump_bitmap (dump_file, what->stmts); - fprintf (dump_file, "because the latter has reductions\n"); - } - bitmap_ior_into (into->stmts, what->stmts); - into->kind = PKIND_REDUCTION; - partitions.ordered_remove (i); - partition_free (what); - } + vertex *v = &pg->vertices[i]; + pgdata *data = new pgdata; + data_reference_p dr; + /* FIXME - leaks. */ + v->data = data; + bitmap_iterator bi; + unsigned j; + data->partition = partition; + data->reads = vNULL; + data->writes = vNULL; + EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) + for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) + if (DR_IS_READ (dr)) + data->reads.safe_push (dr); + else + data->writes.safe_push (dr); } + partition_t partition1, partition2; + for (i = 0; partitions.iterate (i, &partition1); ++i) + for (int j = i + 1; partitions.iterate (j, &partition2); ++j) + { + /* dependence direction - 0 is no dependence, -1 is back, + 1 is forth, 2 is both (we can stop then, merging will occur). */ + int dir = 0; + dir = pg_add_dependence_edges (rdg, loop_nest, dir, + PGDATA(i)->writes, + PGDATA(j)->reads); + if (dir != 2) + dir = pg_add_dependence_edges (rdg, loop_nest, dir, + PGDATA(i)->reads, + PGDATA(j)->writes); + if (dir != 2) + dir = pg_add_dependence_edges (rdg, loop_nest, dir, + PGDATA(i)->writes, + PGDATA(j)->writes); + if (dir == 1 || dir == 2) + add_edge (pg, i, j); + if (dir == -1 || dir == 2) + add_edge (pg, j, i); + } + + /* Add edges to the reduction partition (if any) to force it last. */ + unsigned j; + for (j = 0; partitions.iterate (j, &partition); ++j) + if (partition_reduction_p (partition)) + break; + if (j < partitions.length ()) + { + for (unsigned i = 0; partitions.iterate (i, &partition); ++i) + if (i != j) + add_edge (pg, i, j); + } + + /* Compute partitions we cannot separate and fuse them. */ + num_sccs = graphds_scc (pg, NULL); + for (i = 0; i < num_sccs; ++i) + { + partition_t first; + int j; + for (j = 0; partitions.iterate (j, &first); ++j) + if (pg->vertices[j].component == i) + break; + for (j = j + 1; partitions.iterate (j, &partition); ++j) + if (pg->vertices[j].component == i) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "fusing partitions\n"); + dump_bitmap (dump_file, first->stmts); + dump_bitmap (dump_file, partition->stmts); + fprintf (dump_file, "because they are in the same " + "dependence SCC\n"); + } + partition_merge_into (first, partition); + partitions[j] = NULL; + partition_free (partition); + PGDATA (j)->partition = NULL; + } + } + + /* Now order the remaining nodes in postorder. */ + qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); + partitions.truncate (0); + for (i = 0; i < pg->n_vertices; ++i) + { + pgdata *data = PGDATA (i); + if (data->partition) + partitions.safe_push (data->partition); + data->reads.release (); + data->writes.release (); + delete data; + } + gcc_assert (partitions.length () == (unsigned)num_sccs); + free_graph (pg); } nbp = partitions.length (); @@ -1619,7 +1635,11 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, dump_rdg_partitions (dump_file, partitions); FOR_EACH_VEC_ELT (partitions, i, partition) - generate_code_for_partition (loop, partition, i < nbp - 1); + { + if (partition_builtin_p (partition)) + (*nb_calls)++; + generate_code_for_partition (loop, partition, i < nbp - 1); + } ldist_done: @@ -1628,8 +1648,7 @@ distribute_loop (struct loop *loop, vec<gimple> stmts, partitions.release (); free_rdg (rdg); - loop_nest.release (); - return nbp; + return nbp - *nb_calls; } /* Distribute all loops in the current function. */ @@ -1659,7 +1678,6 @@ tree_loop_distribution (void) vec<gimple> work_list = vNULL; basic_block *bbs; int num = loop->num; - int nb_generated_loops = 0; unsigned int i; /* If the loop doesn't have a single exit we will fail anyway, @@ -1714,28 +1732,32 @@ tree_loop_distribution (void) out: free (bbs); + int nb_generated_loops = 0; + int nb_generated_calls = 0; + location_t loc = find_loop_location (loop); if (work_list.length () > 0) { if (!cd) { + calculate_dominance_info (CDI_DOMINATORS); calculate_dominance_info (CDI_POST_DOMINATORS); cd = new control_dependences (create_edge_list ()); free_dominance_info (CDI_POST_DOMINATORS); } - nb_generated_loops = distribute_loop (loop, work_list, cd); + nb_generated_loops = distribute_loop (loop, work_list, cd, + &nb_generated_calls); } - if (nb_generated_loops > 0) - changed = true; - - if (dump_file && (dump_flags & TDF_DETAILS)) + if (nb_generated_loops + nb_generated_calls > 0) { - if (nb_generated_loops > 1) - fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", - num, nb_generated_loops); - else - fprintf (dump_file, "Loop %d is the same.\n", num); + changed = true; + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, + loc, "Loop %d distributed: split to %d loops " + "and %d library calls.\n", + num, nb_generated_loops, nb_generated_calls); } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d is the same.\n", num); work_list.release (); } @@ -1783,8 +1805,8 @@ const pass_data pass_data_loop_distribution = class pass_loop_distribution : public gimple_opt_pass { public: - pass_loop_distribution(gcc::context *ctxt) - : gimple_opt_pass(pass_data_loop_distribution, ctxt) + pass_loop_distribution (gcc::context *ctxt) + : gimple_opt_pass (pass_data_loop_distribution, ctxt) {} /* opt_pass methods: */ |