diff options
Diffstat (limited to 'gcc/tree-ssa-loop-ivopts.c')
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 459 |
1 files changed, 244 insertions, 215 deletions
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 81bec044c5f..716d113e75e 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -1494,6 +1494,11 @@ may_be_unaligned_p (tree ref) int unsignedp, volatilep; unsigned base_align; + /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, + thus they are not missaligned. */ + if (TREE_CODE (ref) == TARGET_MEM_REF) + return false; + /* The test below is basically copy of what expr.c:normal_inner_ref does to check whether the object must be loaded by parts when STRICT_ALIGNMENT is true. */ @@ -1516,7 +1521,7 @@ may_be_unaligned_p (tree ref) static void find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p) { - tree base = unshare_expr (*op_p), step = NULL; + tree base = *op_p, step = NULL; struct iv *civ; struct ifs_ivopts_data ifs_ivopts_data; @@ -1535,17 +1540,63 @@ find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p) && may_be_unaligned_p (base)) goto fail; - ifs_ivopts_data.ivopts_data = data; - ifs_ivopts_data.stmt = stmt; - ifs_ivopts_data.step_p = &step; - if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) - || zero_p (step)) - goto fail; + base = unshare_expr (base); + + if (TREE_CODE (base) == TARGET_MEM_REF) + { + tree type = build_pointer_type (TREE_TYPE (base)); + tree astep; + + if (TMR_BASE (base) + && TREE_CODE (TMR_BASE (base)) == SSA_NAME) + { + civ = get_iv (data, TMR_BASE (base)); + if (!civ) + goto fail; + + TMR_BASE (base) = civ->base; + step = civ->step; + } + if (TMR_INDEX (base) + && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) + { + civ = get_iv (data, TMR_INDEX (base)); + if (!civ) + goto fail; - gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF); - gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF); + TMR_INDEX (base) = civ->base; + astep = civ->step; - base = build_fold_addr_expr (base); + if (astep) + { + if (TMR_STEP (base)) + astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); + + if (step) + step = fold_build2 (PLUS_EXPR, type, step, astep); + else + step = astep; + } + } + + if (zero_p (step)) + goto fail; + base = tree_mem_ref_addr (type, base); + } + else + { + ifs_ivopts_data.ivopts_data = data; + ifs_ivopts_data.stmt = stmt; + ifs_ivopts_data.step_p = &step; + if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) + || zero_p (step)) + goto fail; + + gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF); + gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF); + + base = build_fold_addr_expr (base); + } civ = alloc_iv (base, step); record_use (data, op_p, civ, stmt, USE_ADDRESS); @@ -2614,33 +2665,6 @@ constant_multiple_of (tree type, tree top, tree bot) } } -/* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements - to make things simpler; this is sufficient in most cases. */ - -#define MAX_AFF_ELTS 8 - -struct affine_tree_combination -{ - /* Type of the result of the combination. */ - tree type; - - /* Mask modulo that the operations are performed. */ - unsigned HOST_WIDE_INT mask; - - /* Constant offset. */ - unsigned HOST_WIDE_INT offset; - - /* Number of elements of the combination. */ - unsigned n; - - /* Elements and their coefficients. */ - tree elts[MAX_AFF_ELTS]; - unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS]; - - /* Remainder of the expression. */ - tree rest; -}; - /* Sets COMB to CST. */ static void @@ -2893,6 +2917,19 @@ add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale, return fold_build2 (code, type, expr, elt); } +/* Copies the tree elements of COMB to ensure that they are not shared. */ + +static void +unshare_aff_combination (struct affine_tree_combination *comb) +{ + unsigned i; + + for (i = 0; i < comb->n; i++) + comb->elts[i] = unshare_expr (comb->elts[i]); + if (comb->rest) + comb->rest = unshare_expr (comb->rest); +} + /* Makes tree from the affine combination COMB. */ static tree @@ -2903,6 +2940,11 @@ aff_combination_to_tree (struct affine_tree_combination *comb) unsigned i; unsigned HOST_WIDE_INT off, sgn; + /* Handle the special case produced by get_computation_aff when + the type does not fit in HOST_WIDE_INT. */ + if (comb->n == 0 && comb->offset == 0) + return fold_convert (type, expr); + gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE); for (i = 0; i < comb->n; i++) @@ -2924,49 +2966,14 @@ aff_combination_to_tree (struct affine_tree_combination *comb) comb->mask); } -/* Folds X + RATIO * Y in TYPE. */ - -static tree -fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio) -{ - enum tree_code code; - tree cst; - struct affine_tree_combination cx, cy; - - if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT) - { - if (ratio == 1) - return fold_build2 (PLUS_EXPR, type, x, y); - if (ratio == -1) - return fold_build2 (MINUS_EXPR, type, x, y); - - if (ratio < 0) - { - code = MINUS_EXPR; - ratio = -ratio; - } - else - code = PLUS_EXPR; - - cst = build_int_cst_type (type, ratio); - y = fold_build2 (MULT_EXPR, type, y, cst); - return fold_build2 (code, type, x, y); - } - - tree_to_aff_combination (x, type, &cx); - tree_to_aff_combination (y, type, &cy); - aff_combination_scale (&cy, ratio); - aff_combination_add (&cx, &cy); - - return aff_combination_to_tree (&cx); -} - /* Determines the expression by that USE is expressed from induction variable - CAND at statement AT in LOOP. */ + CAND at statement AT in LOOP. The expression is stored in a decomposed + form into AFF. Returns false if USE cannot be expressed using CAND. */ -static tree -get_computation_at (struct loop *loop, - struct iv_use *use, struct iv_cand *cand, tree at) +static bool +get_computation_aff (struct loop *loop, + struct iv_use *use, struct iv_cand *cand, tree at, + struct affine_tree_combination *aff) { tree ubase = use->iv->base; tree ustep = use->iv->step; @@ -2978,11 +2985,12 @@ get_computation_at (struct loop *loop, tree ratio; unsigned HOST_WIDE_INT ustepi, cstepi; HOST_WIDE_INT ratioi; + struct affine_tree_combination cbase_aff, expr_aff; if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) { /* We do not have a precision to express the values of use. */ - return NULL_TREE; + return false; } expr = var_at_stmt (loop, cand, at); @@ -3020,7 +3028,7 @@ get_computation_at (struct loop *loop, /* TODO maybe consider case when ustep divides cstep and the ratio is a power of 2 (so that the division is fast to execute)? We would need to be much more careful with overflows etc. then. */ - return NULL_TREE; + return false; } ratio = build_int_cst_type (uutype, ratioi); @@ -3029,7 +3037,7 @@ get_computation_at (struct loop *loop, { ratio = constant_multiple_of (uutype, ustep, cstep); if (!ratio) - return NULL_TREE; + return false; /* Ratioi is only used to detect special cases when the multiplicative factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT, @@ -3058,34 +3066,71 @@ get_computation_at (struct loop *loop, happen, fold is able to apply the distributive law to obtain this form anyway. */ - if (ratioi == 1) + if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT) { - delta = fold_affine_sum (uutype, ubase, cbase, -1); - expr = fold_build2 (PLUS_EXPR, uutype, expr, delta); - } - else if (ratioi == -1) - { - delta = fold_affine_sum (uutype, ubase, cbase, 1); - expr = fold_build2 (MINUS_EXPR, uutype, delta, expr); - } - else - { - if (ratioi) - delta = fold_affine_sum (uutype, ubase, cbase, -ratioi); + /* Let's compute in trees and just return the result in AFF. This case + should not be very common, and fold itself is not that bad either, + so making the aff. functions more complicated to handle this case + is not that urgent. */ + if (ratioi == 1) + { + delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase); + expr = fold_build2 (PLUS_EXPR, uutype, expr, delta); + } + else if (ratioi == -1) + { + delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase); + expr = fold_build2 (MINUS_EXPR, uutype, delta, expr); + } else { - delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase); - delta = fold_affine_sum (uutype, ubase, delta, -1); + delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio); + delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta); + expr = fold_build2 (MULT_EXPR, uutype, ratio, expr); + expr = fold_build2 (PLUS_EXPR, uutype, delta, expr); } - expr = fold_build2 (MULT_EXPR, uutype, ratio, expr); - expr = fold_build2 (PLUS_EXPR, uutype, delta, expr); + + aff->type = uutype; + aff->n = 0; + aff->offset = 0; + aff->mask = 0; + aff->rest = expr; + return true; } - return fold_convert (utype, expr); + /* If we got here, the types fits in HOST_WIDE_INT, thus it must be + possible to compute ratioi. */ + gcc_assert (ratioi); + + tree_to_aff_combination (ubase, uutype, aff); + tree_to_aff_combination (cbase, uutype, &cbase_aff); + tree_to_aff_combination (expr, uutype, &expr_aff); + aff_combination_scale (&cbase_aff, -ratioi); + aff_combination_scale (&expr_aff, ratioi); + aff_combination_add (aff, &cbase_aff); + aff_combination_add (aff, &expr_aff); + + return true; +} + +/* Determines the expression by that USE is expressed from induction variable + CAND at statement AT in LOOP. The computation is unshared. */ + +static tree +get_computation_at (struct loop *loop, + struct iv_use *use, struct iv_cand *cand, tree at) +{ + struct affine_tree_combination aff; + tree type = TREE_TYPE (use->iv->base); + + if (!get_computation_aff (loop, use, cand, at, &aff)) + return NULL_TREE; + unshare_aff_combination (&aff); + return fold_convert (type, aff_combination_to_tree (&aff)); } /* Determines the expression by that USE is expressed from induction variable - CAND in LOOP. */ + CAND in LOOP. The computation is unshared. */ static tree get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) @@ -3157,7 +3202,7 @@ mbc_entry_eq (const void *entry1, const void *entry2) /* Returns cost of multiplication by constant CST in MODE. */ -static unsigned +unsigned multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode) { static htab_t costs; @@ -3195,6 +3240,47 @@ multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode) return cost; } +/* Returns true if multiplying by RATIO is allowed in address. */ + +bool +multiplier_allowed_in_address_p (HOST_WIDE_INT ratio) +{ +#define MAX_RATIO 128 + static sbitmap valid_mult; + + if (!valid_mult) + { + rtx reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER); + rtx addr; + HOST_WIDE_INT i; + + valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); + sbitmap_zero (valid_mult); + addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX); + for (i = -MAX_RATIO; i <= MAX_RATIO; i++) + { + XEXP (addr, 1) = gen_int_mode (i, Pmode); + if (memory_address_p (Pmode, addr)) + SET_BIT (valid_mult, i + MAX_RATIO); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " allowed multipliers:"); + for (i = -MAX_RATIO; i <= MAX_RATIO; i++) + if (TEST_BIT (valid_mult, i + MAX_RATIO)) + fprintf (dump_file, " %d", (int) i); + fprintf (dump_file, "\n"); + fprintf (dump_file, "\n"); + } + } + + if (ratio > MAX_RATIO || ratio < -MAX_RATIO) + return false; + + return TEST_BIT (valid_mult, ratio + MAX_RATIO); +} + /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, variable is omitted. The created memory accesses MODE. @@ -3205,8 +3291,7 @@ static unsigned get_address_cost (bool symbol_present, bool var_present, unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio) { -#define MAX_RATIO 128 - static sbitmap valid_mult; + static bool initialized = false; static HOST_WIDE_INT rat, off; static HOST_WIDE_INT min_offset, max_offset; static unsigned costs[2][2][2][2]; @@ -3218,9 +3303,10 @@ get_address_cost (bool symbol_present, bool var_present, unsigned HOST_WIDE_INT mask; unsigned bits; - if (!valid_mult) + if (!initialized) { HOST_WIDE_INT i; + initialized = true; reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER); @@ -3249,29 +3335,13 @@ get_address_cost (bool symbol_present, bool var_present, fprintf (dump_file, " max offset %d\n", (int) max_offset); } - valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); - sbitmap_zero (valid_mult); rat = 1; - addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX); - for (i = -MAX_RATIO; i <= MAX_RATIO; i++) - { - XEXP (addr, 1) = GEN_INT (i); - if (memory_address_p (Pmode, addr)) - { - SET_BIT (valid_mult, i + MAX_RATIO); - rat = i; - } - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " allowed multipliers:"); - for (i = -MAX_RATIO; i <= MAX_RATIO; i++) - if (TEST_BIT (valid_mult, i + MAX_RATIO)) - fprintf (dump_file, " %d", (int) i); - fprintf (dump_file, "\n"); - fprintf (dump_file, "\n"); - } + for (i = 2; i <= MAX_RATIO; i++) + if (multiplier_allowed_in_address_p (i)) + { + rat = i; + break; + } } bits = GET_MODE_BITSIZE (Pmode); @@ -3285,8 +3355,7 @@ get_address_cost (bool symbol_present, bool var_present, offset_p = (s_offset != 0 && min_offset <= s_offset && s_offset <= max_offset); ratio_p = (ratio != 1 - && -MAX_RATIO <= ratio && ratio <= MAX_RATIO - && TEST_BIT (valid_mult, ratio + MAX_RATIO)); + && multiplier_allowed_in_address_p (ratio)); if (ratio != 1 && !ratio_p) cost += multiply_by_cost (ratio, Pmode); @@ -5262,8 +5331,7 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data, return; } - comp = unshare_expr (get_computation (data->current_loop, - use, cand)); + comp = get_computation (data->current_loop, use, cand); switch (TREE_CODE (use->stmt)) { case PHI_NODE: @@ -5348,78 +5416,60 @@ unshare_and_remove_ssa_names (tree ref) return ref; } -/* Rewrites base of memory access OP with expression WITH in statement - pointed to by BSI. */ +/* Extract the alias analysis info for the memory reference REF. There are + several ways how this information may be stored and what precisely is + its semantics depending on the type of the reference, but there always is + somewhere hidden one _DECL node that is used to determine the set of + virtual operands for the reference. The code below deciphers this jungle + and extracts this single useful piece of information. */ -static void -rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with) +static tree +get_ref_tag (tree ref) { - tree bvar, var, new_name, copy, name; - tree orig; - - var = bvar = get_base_address (*op); + tree var = get_base_address (ref); + tree tag; - if (!var || TREE_CODE (with) != SSA_NAME) - goto do_rewrite; + if (!var) + return NULL_TREE; - gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF); - gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF); if (TREE_CODE (var) == INDIRECT_REF) var = TREE_OPERAND (var, 0); if (TREE_CODE (var) == SSA_NAME) { - name = var; + if (SSA_NAME_PTR_INFO (var)) + { + tag = SSA_NAME_PTR_INFO (var)->name_mem_tag; + if (tag) + return tag; + } + var = SSA_NAME_VAR (var); } - else if (DECL_P (var)) - name = NULL_TREE; - else - goto do_rewrite; - - /* We need to add a memory tag for the variable. But we do not want - to add it to the temporary used for the computations, since this leads - to problems in redundancy elimination when there are common parts - in two computations referring to the different arrays. So we copy - the variable to a new temporary. */ - copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with); - - if (name) - new_name = duplicate_ssa_name (name, copy); - else + + if (DECL_P (var)) { - tree tag = var_ann (var)->type_mem_tag; - tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp"); - add_referenced_tmp_var (new_ptr); + tag = var_ann (var)->type_mem_tag; if (tag) - var_ann (new_ptr)->type_mem_tag = tag; - else - add_type_alias (new_ptr, var); - new_name = make_ssa_name (new_ptr, copy); - } - - TREE_OPERAND (copy, 0) = new_name; - bsi_insert_before (bsi, copy, BSI_SAME_STMT); - with = new_name; + return tag; -do_rewrite: - - orig = NULL_TREE; - gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF); - gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF); - - if (TREE_CODE (*op) == INDIRECT_REF) - orig = REF_ORIGINAL (*op); - if (!orig) - orig = unshare_and_remove_ssa_names (*op); + return var; + } - *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with); + return NULL_TREE; +} - /* Record the original reference, for purposes of alias analysis. */ - REF_ORIGINAL (*op) = orig; +/* Copies the reference information from OLD_REF to NEW_REF. */ - /* Virtual operands in the original statement may have to be renamed - because of the replacement. */ - mark_new_vars_to_rename (bsi_stmt (*bsi)); +static void +copy_ref_info (tree new_ref, tree old_ref) +{ + if (TREE_CODE (old_ref) == TARGET_MEM_REF) + copy_mem_ref_info (new_ref, old_ref); + else + { + TMR_TAG (new_ref) = get_ref_tag (old_ref); + TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref); + } } /* Rewrites USE (address that is an iv) using candidate CAND. */ @@ -5428,16 +5478,16 @@ static void rewrite_use_address (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) { - tree comp = unshare_expr (get_computation (data->current_loop, - use, cand)); + struct affine_tree_combination aff; block_stmt_iterator bsi = bsi_for_stmt (use->stmt); - tree stmts; - tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE); + tree ref; - if (stmts) - bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); + get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); + unshare_aff_combination (&aff); - rewrite_address_base (&bsi, use->op_p, op); + ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff); + copy_ref_info (ref, *use->op_p); + *use->op_p = ref; } /* Rewrites USE (the condition such that one of the arguments is an iv) using @@ -5474,7 +5524,7 @@ rewrite_use_compare (struct ivopts_data *data, /* The induction variable elimination failed; just express the original giv. */ - comp = unshare_expr (get_computation (data->current_loop, use, cand)); + comp = get_computation (data->current_loop, use, cand); cond = *use->op_p; op_p = &TREE_OPERAND (cond, 0); @@ -5630,7 +5680,6 @@ rewrite_use_outer (struct ivopts_data *data, value = get_computation_at (data->current_loop, use, cand, last_stmt (exit->src)); - value = unshare_expr (value); op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt)); /* If we will preserve the iv anyway and we would need to perform @@ -5935,25 +5984,5 @@ tree_ssa_iv_optimize (struct loops *loops) loop = loop->outer; } - /* FIXME. IV opts introduces new aliases and call-clobbered - variables, which need to be renamed. However, when we call the - renamer, not all statements will be scanned for operands. In - particular, the newly introduced aliases may appear in statements - that are considered "unmodified", so the renamer will not get a - chance to rename those operands. - - Work around this problem by forcing an operand re-scan on every - statement. This will not be necessary once the new operand - scanner is implemented. */ - if (need_ssa_update_p ()) - { - basic_block bb; - block_stmt_iterator si; - FOR_EACH_BB (bb) - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - update_stmt (bsi_stmt (si)); - } - - rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); tree_ssa_iv_optimize_finalize (loops, &data); } |