diff options
Diffstat (limited to 'gcc/expmed.c')
-rw-r--r-- | gcc/expmed.c | 344 |
1 files changed, 210 insertions, 134 deletions
diff --git a/gcc/expmed.c b/gcc/expmed.c index 989f86cbe7a..6c94a90fb84 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -101,7 +101,6 @@ struct init_expmed_rtl struct rtx_def mult; rtunion mult_fld1; struct rtx_def sdiv; rtunion sdiv_fld1; struct rtx_def udiv; rtunion udiv_fld1; - struct rtx_def zext; struct rtx_def sdiv_32; rtunion sdiv_32_fld1; struct rtx_def smod_32; rtunion smod_32_fld1; struct rtx_def wide_mult; rtunion wide_mult_fld1; @@ -112,16 +111,41 @@ struct init_expmed_rtl struct rtx_def shift_add; rtunion shift_add_fld1; struct rtx_def shift_sub0; rtunion shift_sub0_fld1; struct rtx_def shift_sub1; rtunion shift_sub1_fld1; + struct rtx_def zext; + struct rtx_def trunc; rtx pow2[MAX_BITS_PER_WORD]; rtx cint[MAX_BITS_PER_WORD]; }; static void +init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode, + enum machine_mode from_mode, bool speed) +{ + int to_size, from_size; + rtx which; + + /* We're given no information about the true size of a partial integer, + only the size of the "full" integer it requires for storage. For + comparison purposes here, reduce the bit size by one in that case. */ + to_size = (GET_MODE_BITSIZE (to_mode) + - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT)); + from_size = (GET_MODE_BITSIZE (from_mode) + - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT)); + + /* Assume cost of zero-extend and sign-extend is the same. */ + which = (to_size < from_size ? &all->trunc : &all->zext); + + PUT_MODE (&all->reg, from_mode); + set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed)); +} + +static void init_expmed_one_mode (struct init_expmed_rtl *all, enum machine_mode mode, int speed) { int m, n, mode_bitsize; + enum machine_mode mode_from; mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); @@ -139,21 +163,27 @@ init_expmed_one_mode (struct init_expmed_rtl *all, PUT_MODE (&all->shift_add, mode); PUT_MODE (&all->shift_sub0, mode); PUT_MODE (&all->shift_sub1, mode); + PUT_MODE (&all->zext, mode); + PUT_MODE (&all->trunc, mode); - add_cost[speed][mode] = set_src_cost (&all->plus, speed); - neg_cost[speed][mode] = set_src_cost (&all->neg, speed); - mul_cost[speed][mode] = set_src_cost (&all->mult, speed); - sdiv_cost[speed][mode] = set_src_cost (&all->sdiv, speed); - udiv_cost[speed][mode] = set_src_cost (&all->udiv, speed); + set_add_cost (speed, mode, set_src_cost (&all->plus, speed)); + set_neg_cost (speed, mode, set_src_cost (&all->neg, speed)); + set_mul_cost (speed, mode, set_src_cost (&all->mult, speed)); + set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed)); + set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed)); - sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all->sdiv_32, speed) - <= 2 * add_cost[speed][mode]); - smod_pow2_cheap[speed][mode] = (set_src_cost (&all->smod_32, speed) - <= 4 * add_cost[speed][mode]); + set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed) + <= 2 * add_cost (speed, mode))); + set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed) + <= 4 * add_cost (speed, mode))); - shift_cost[speed][mode][0] = 0; - shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0] - = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode]; + set_shift_cost (speed, mode, 0, 0); + { + int cost = add_cost (speed, mode); + set_shiftadd_cost (speed, mode, 0, cost); + set_shiftsub0_cost (speed, mode, 0, cost); + set_shiftsub1_cost (speed, mode, 0, cost); + } n = MIN (MAX_BITS_PER_WORD, mode_bitsize); for (m = 1; m < n; m++) @@ -161,16 +191,21 @@ init_expmed_one_mode (struct init_expmed_rtl *all, XEXP (&all->shift, 1) = all->cint[m]; XEXP (&all->shift_mult, 1) = all->pow2[m]; - shift_cost[speed][mode][m] = set_src_cost (&all->shift, speed); - shiftadd_cost[speed][mode][m] = set_src_cost (&all->shift_add, speed); - shiftsub0_cost[speed][mode][m] = set_src_cost (&all->shift_sub0, speed); - shiftsub1_cost[speed][mode][m] = set_src_cost (&all->shift_sub1, speed); + set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed)); + set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed)); + set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed)); + set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed)); } if (SCALAR_INT_MODE_P (mode)) { - enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); - + for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; + mode_from = (enum machine_mode)(mode_from + 1)) + init_expmed_one_conv (all, mode, mode_from, speed); + } + if (GET_MODE_CLASS (mode) == MODE_INT) + { + enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); if (wider_mode != VOIDmode) { PUT_MODE (&all->zext, wider_mode); @@ -178,10 +213,10 @@ init_expmed_one_mode (struct init_expmed_rtl *all, PUT_MODE (&all->wide_lshr, wider_mode); XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize); - mul_widen_cost[speed][wider_mode] - = set_src_cost (&all->wide_mult, speed); - mul_highpart_cost[speed][mode] - = set_src_cost (&all->wide_trunc, speed); + set_mul_widen_cost (speed, wider_mode, + set_src_cost (&all->wide_mult, speed)); + set_mul_highpart_cost (speed, mode, + set_src_cost (&all->wide_trunc, speed)); } } } @@ -262,26 +297,36 @@ init_expmed (void) XEXP (&all.shift_sub1, 0) = &all.reg; XEXP (&all.shift_sub1, 1) = &all.shift_mult; + PUT_CODE (&all.trunc, TRUNCATE); + XEXP (&all.trunc, 0) = &all.reg; + for (speed = 0; speed < 2; speed++) { crtl->maybe_hot_insn_p = speed; - zero_cost[speed] = set_src_cost (const0_rtx, speed); + set_zero_cost (speed, set_src_cost (const0_rtx, speed)); - for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); - mode != VOIDmode; - mode = GET_MODE_WIDER_MODE (mode)) + for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; + mode = (enum machine_mode)(mode + 1)) init_expmed_one_mode (&all, mode, speed); - for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT); - mode != VOIDmode; - mode = GET_MODE_WIDER_MODE (mode)) - init_expmed_one_mode (&all, mode, speed); + if (MIN_MODE_PARTIAL_INT != VOIDmode) + for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; + mode = (enum machine_mode)(mode + 1)) + init_expmed_one_mode (&all, mode, speed); + + if (MIN_MODE_VECTOR_INT != VOIDmode) + for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; + mode = (enum machine_mode)(mode + 1)) + init_expmed_one_mode (&all, mode, speed); } - if (alg_hash_used_p) - memset (alg_hash, 0, sizeof (alg_hash)); + if (alg_hash_used_p ()) + { + struct alg_hash_entry *p = alg_hash_entry_ptr (0); + memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); + } else - alg_hash_used_p = true; + set_alg_hash_used_p (true); default_rtl_profile (); } @@ -2229,8 +2274,9 @@ expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted, && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_PRECISION (mode) && INTVAL (op1) < MAX_BITS_PER_WORD - && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode] - && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST) + && (shift_cost (speed, mode, INTVAL (op1)) + > INTVAL (op1) * add_cost (speed, mode)) + && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) { int i; for (i = 0; i < INTVAL (op1); i++) @@ -2381,8 +2427,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, const struct algorithm *, enum mult_variant); static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static rtx extract_high_half (enum machine_mode, rtx); -static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); -static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, +static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); +static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, int, int); /* Compute and return the best algorithm for multiplying by T. The algorithm must cost less than cost_limit @@ -2406,6 +2452,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, enum alg_code cache_alg = alg_zero; bool speed = optimize_insn_for_speed_p (); enum machine_mode imode; + struct alg_hash_entry *entry_ptr; /* Indicate that no algorithm is yet found. If no algorithm is found, this value will be returned and indicate failure. */ @@ -2440,13 +2487,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, fail now. */ if (t == 0) { - if (MULT_COST_LESS (cost_limit, zero_cost[speed])) + if (MULT_COST_LESS (cost_limit, zero_cost (speed))) return; else { alg_out->ops = 1; - alg_out->cost.cost = zero_cost[speed]; - alg_out->cost.latency = zero_cost[speed]; + alg_out->cost.cost = zero_cost (speed); + alg_out->cost.latency = zero_cost (speed); alg_out->op[0] = alg_zero; return; } @@ -2462,19 +2509,20 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; /* See if we already know what to do for T. */ - if (alg_hash[hash_index].t == t - && alg_hash[hash_index].mode == mode - && alg_hash[hash_index].mode == mode - && alg_hash[hash_index].speed == speed - && alg_hash[hash_index].alg != alg_unknown) + entry_ptr = alg_hash_entry_ptr (hash_index); + if (entry_ptr->t == t + && entry_ptr->mode == mode + && entry_ptr->mode == mode + && entry_ptr->speed == speed + && entry_ptr->alg != alg_unknown) { - cache_alg = alg_hash[hash_index].alg; + cache_alg = entry_ptr->alg; if (cache_alg == alg_impossible) { /* The cache tells us that it's impossible to synthesize - multiplication by T within alg_hash[hash_index].cost. */ - if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit)) + multiplication by T within entry_ptr->cost. */ + if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) /* COST_LIMIT is at least as restrictive as the one recorded in the hash table, in which case we have no hope of synthesizing a multiplication. Just @@ -2488,7 +2536,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, } else { - if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost)) + if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) /* The cached algorithm shows that this multiplication requires more cost than COST_LIMIT. Just return. This way, we don't clobber this cache entry with @@ -2534,10 +2582,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, q = t >> m; /* The function expand_shift will choose between a shift and a sequence of additions, so the observed cost is given as - MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */ - op_cost = m * add_cost[speed][mode]; - if (shift_cost[speed][mode][m] < op_cost) - op_cost = shift_cost[speed][mode][m]; + MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ + op_cost = m * add_cost (speed, mode); + if (shift_cost (speed, mode, m) < op_cost) + op_cost = shift_cost (speed, mode, m); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, q, &new_limit, mode); @@ -2564,11 +2612,11 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, q = ~(~orig_t >> m); /* The function expand_shift will choose between a shift and a sequence of additions, so the observed cost is - given as MIN (m * add_cost[speed][mode], - shift_cost[speed][mode][m]). */ - op_cost = m * add_cost[speed][mode]; - if (shift_cost[speed][mode][m] < op_cost) - op_cost = shift_cost[speed][mode][m]; + given as MIN (m * add_cost(speed, mode), + shift_cost(speed, mode, m)). */ + op_cost = m * add_cost (speed, mode); + if (shift_cost (speed, mode, m) < op_cost) + op_cost = shift_cost (speed, mode, m); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, q, &new_limit, mode); @@ -2610,7 +2658,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ - op_cost = add_cost[speed][mode]; + op_cost = add_cost (speed, mode); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, t + 1, &new_limit, mode); @@ -2630,7 +2678,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ - op_cost = add_cost[speed][mode]; + op_cost = add_cost (speed, mode); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, t - 1, &new_limit, mode); @@ -2652,7 +2700,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (-orig_t + 1); if (m >= 0 && m < maxm) { - op_cost = shiftsub1_cost[speed][mode][m]; + op_cost = shiftsub1_cost (speed, mode, m); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, @@ -2699,14 +2747,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, equal to its cost, otherwise assume that on superscalar hardware the shift may be executed concurrently with the earlier steps in the algorithm. */ - op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; - if (shiftadd_cost[speed][mode][m] < op_cost) + op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); + if (shiftadd_cost (speed, mode, m) < op_cost) { - op_cost = shiftadd_cost[speed][mode][m]; + op_cost = shiftadd_cost (speed, mode, m); op_latency = op_cost; } else - op_latency = add_cost[speed][mode]; + op_latency = add_cost (speed, mode); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2738,14 +2786,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, equal to it's cost, otherwise assume that on superscalar hardware the shift may be executed concurrently with the earlier steps in the algorithm. */ - op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; - if (shiftsub0_cost[speed][mode][m] < op_cost) + op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); + if (shiftsub0_cost (speed, mode, m) < op_cost) { - op_cost = shiftsub0_cost[speed][mode][m]; + op_cost = shiftsub0_cost (speed, mode, m); op_latency = op_cost; } else - op_latency = add_cost[speed][mode]; + op_latency = add_cost (speed, mode); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2779,7 +2827,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (q); if (m >= 0 && m < maxm) { - op_cost = shiftadd_cost[speed][mode][m]; + op_cost = shiftadd_cost (speed, mode, m); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); @@ -2804,7 +2852,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (q); if (m >= 0 && m < maxm) { - op_cost = shiftsub0_cost[speed][mode][m]; + op_cost = shiftsub0_cost (speed, mode, m); new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_cost; synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); @@ -2833,23 +2881,23 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, we are asked to find an algorithm for T within the same or lower COST_LIMIT, we can immediately return to the caller. */ - alg_hash[hash_index].t = t; - alg_hash[hash_index].mode = mode; - alg_hash[hash_index].speed = speed; - alg_hash[hash_index].alg = alg_impossible; - alg_hash[hash_index].cost = *cost_limit; + entry_ptr->t = t; + entry_ptr->mode = mode; + entry_ptr->speed = speed; + entry_ptr->alg = alg_impossible; + entry_ptr->cost = *cost_limit; return; } /* Cache the result. */ if (!cache_hit) { - alg_hash[hash_index].t = t; - alg_hash[hash_index].mode = mode; - alg_hash[hash_index].speed = speed; - alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; - alg_hash[hash_index].cost.cost = best_cost.cost; - alg_hash[hash_index].cost.latency = best_cost.latency; + entry_ptr->t = t; + entry_ptr->mode = mode; + entry_ptr->speed = speed; + entry_ptr->alg = best_alg->op[best_alg->ops]; + entry_ptr->cost.cost = best_cost.cost; + entry_ptr->cost.latency = best_cost.latency; } /* If we are getting a too long sequence for `struct algorithm' @@ -2895,7 +2943,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, /* Ensure that mult_cost provides a reasonable upper bound. Any constant multiplication can be performed with less than 2 * bits additions. */ - op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost[speed][mode]; + op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); if (mult_cost > op_cost) mult_cost = op_cost; @@ -2908,7 +2956,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, `unsigned int' */ if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) { - op_cost = neg_cost[speed][mode]; + op_cost = neg_cost(speed, mode); if (MULT_COST_LESS (&alg->cost, mult_cost)) { limit.cost = alg->cost.cost - op_cost; @@ -2928,7 +2976,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, } /* This proves very useful for division-by-constant. */ - op_cost = add_cost[speed][mode]; + op_cost = add_cost (speed, mode); if (MULT_COST_LESS (&alg->cost, mult_cost)) { limit.cost = alg->cost.cost - op_cost; @@ -3169,7 +3217,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, coeff = INTVAL (scalar_op1); is_neg = coeff < 0; } - else if (CONST_DOUBLE_P (scalar_op1)) + else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) { /* If we are multiplying in DImode, it may still be a win to try to work with shifts and adds. */ @@ -3192,6 +3240,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, } goto skip_synth; } + else + goto skip_synth; } else goto skip_synth; @@ -3211,7 +3261,6 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, /* Attempt to handle multiplication of DImode values by negative coefficients, by performing the multiplication by a positive multiplier and then inverting the result. */ - /* ??? How is this not slightly redundant with the neg variant? */ if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) { /* Its safe to use -coeff even for INT_MIN, as the @@ -3219,7 +3268,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed) - - neg_cost[speed][mode]); + - neg_cost(speed, mode)); if (max_cost > 0 && choose_mult_variant (mode, -coeff, &algorithm, &variant, max_cost)) @@ -3228,6 +3277,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, &algorithm, variant); return expand_unop (mode, neg_optab, temp, target, 0); } + goto skip_synth; } /* Exclude cost of op0 from max_cost to match the cost @@ -3240,7 +3290,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, skip_synth: /* Expand x*2.0 as x+x. */ - if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode)) + if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)) { REAL_VALUE_TYPE d; REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1); @@ -3262,6 +3312,24 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, return op0; } +/* Return a cost estimate for multiplying a register by the given + COEFFicient in the given MODE and SPEED. */ + +int +mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed) +{ + int max_cost; + struct algorithm algorithm; + enum mult_variant variant; + + rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); + max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed); + if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) + return algorithm.cost.cost; + else + return max_cost; +} + /* Perform a widening multiplication and return an rtx for the result. MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); TARGET is a suggestion for where to store the result (an rtx). @@ -3302,7 +3370,7 @@ expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, /* Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ - max_cost = mul_widen_cost[speed][mode]; + max_cost = mul_widen_cost (speed, mode); if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) { @@ -3477,7 +3545,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, return target; } -/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ +/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ static rtx extract_high_half (enum machine_mode mode, rtx op) @@ -3495,11 +3563,11 @@ extract_high_half (enum machine_mode mode, rtx op) return convert_modes (mode, wider_mode, op, 0); } -/* Like expand_mult_highpart, but only consider using a multiplication +/* Like expmed_mult_highpart, but only consider using a multiplication optab. OP1 is an rtx for the constant operand. */ static rtx -expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, +expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp, int max_cost) { rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); @@ -3516,7 +3584,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ - if (mul_highpart_cost[speed][mode] < max_cost) + if (mul_highpart_cost (speed, mode) < max_cost) { moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -3528,8 +3596,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Secondly, same as above, but use sign flavor opposite of unsignedp. Need to adjust the result after the multiplication. */ if (size - 1 < BITS_PER_WORD - && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1] - + 4 * add_cost[speed][mode] < max_cost)) + && (mul_highpart_cost (speed, mode) + + 2 * shift_cost (speed, mode, size-1) + + 4 * add_cost (speed, mode) < max_cost)) { moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -3543,7 +3612,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening multiplication. */ moptab = unsignedp ? umul_widen_optab : smul_widen_optab; if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing - && mul_widen_cost[speed][wider_mode] < max_cost) + && mul_widen_cost (speed, wider_mode) < max_cost) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, unsignedp, OPTAB_WIDEN); @@ -3554,7 +3623,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening the mode and perform a non-widening multiplication. */ if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost) + && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) + < max_cost)) { rtx insns, wop0, wop1; @@ -3581,8 +3651,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, moptab = unsignedp ? smul_widen_optab : umul_widen_optab; if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1] - + 4 * add_cost[speed][mode] < max_cost)) + && (mul_widen_cost (speed, wider_mode) + + 2 * shift_cost (speed, mode, size-1) + + 4 * add_cost (speed, mode) < max_cost)) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, NULL_RTX, ! unsignedp, OPTAB_WIDEN); @@ -3610,7 +3681,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, MAX_COST is the total allowed cost for the expanded RTL. */ static rtx -expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, +expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp, int max_cost) { enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); @@ -3633,16 +3704,16 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, mode == word_mode, however all the cost calculations in synth_mult etc. assume single-word operations. */ if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) - return expand_mult_highpart_optab (mode, op0, op1, target, + return expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, max_cost); - extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; + extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); /* Check whether we try to multiply by a negative constant. */ if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) { sign_adjust = true; - extra_cost += add_cost[speed][mode]; + extra_cost += add_cost (speed, mode); } /* See whether shift/add multiplication is cheap enough. */ @@ -3651,7 +3722,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, { /* See whether the specialized multiplication optabs are cheaper than the shift/add version. */ - tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, + tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, alg.cost.cost + extra_cost); if (tem) return tem; @@ -3666,7 +3737,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, return tem; } - return expand_mult_highpart_optab (mode, op0, op1, target, + return expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, max_cost); } @@ -3832,7 +3903,8 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) temp = gen_reg_rtx (mode); temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); - if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1)) + if (shift_cost (optimize_insn_for_speed_p (), mode, ushift) + > COSTS_N_INSNS (1)) temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), NULL_RTX, 0, OPTAB_LIB_WIDEN); else @@ -3940,7 +4012,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, In all cases but EXACT_DIV_EXPR, this multiplication requires the upper half of the product. Different strategies for generating the product are - implemented in expand_mult_highpart. + implemented in expmed_mult_highpart. If what we actually want is the remainder, we generate that by another by-constant multiplication and a subtraction. */ @@ -3990,7 +4062,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, mode for which we can do the operation with a library call. */ /* We might want to refine this now that we have division-by-constant - optimization. Since expand_mult_highpart tries so many variants, it is + optimization. Since expmed_mult_highpart tries so many variants, it is not straightforward to generalize this. Maybe we should make an array of possible modes in init_expmed? Save this for GCC 2.7. */ @@ -4035,10 +4107,13 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, /* Only deduct something for a REM if the last divide done was for a different constant. Then set the constant of the last divide. */ - max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode]; + max_cost = (unsignedp + ? udiv_cost (speed, compute_mode) + : sdiv_cost (speed, compute_mode)); if (rem_flag && ! (last_div_const != 0 && op1_is_constant && INTVAL (op1) == last_div_const)) - max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode]; + max_cost -= (mul_cost (speed, compute_mode) + + add_cost (speed, compute_mode)); last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; @@ -4152,10 +4227,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; extra_cost - = (shift_cost[speed][compute_mode][post_shift - 1] - + shift_cost[speed][compute_mode][1] - + 2 * add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, + = (shift_cost (speed, compute_mode, post_shift - 1) + + shift_cost (speed, compute_mode, 1) + + 2 * add_cost (speed, compute_mode)); + t1 = expmed_mult_highpart (compute_mode, op0, GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); @@ -4185,9 +4260,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, (RSHIFT_EXPR, compute_mode, op0, pre_shift, NULL_RTX, 1); extra_cost - = (shift_cost[speed][compute_mode][pre_shift] - + shift_cost[speed][compute_mode][post_shift]); - t2 = expand_mult_highpart (compute_mode, t1, + = (shift_cost (speed, compute_mode, pre_shift) + + shift_cost (speed, compute_mode, post_shift)); + t2 = expmed_mult_highpart (compute_mode, t1, GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); @@ -4245,8 +4320,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; } else if (EXACT_POWER_OF_2_OR_ZERO_P (d) - && (rem_flag ? smod_pow2_cheap[speed][compute_mode] - : sdiv_pow2_cheap[speed][compute_mode]) + && (rem_flag + ? smod_pow2_cheap (speed, compute_mode) + : sdiv_pow2_cheap (speed, compute_mode)) /* We assume that cheap metric is true if the optab has an expander for this mode. */ && ((optab_handler ((rem_flag ? smod_optab @@ -4266,7 +4342,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, return gen_lowpart (mode, remainder); } - if (sdiv_pow2_cheap[speed][compute_mode] + if (sdiv_pow2_cheap (speed, compute_mode) && ((optab_handler (sdiv_optab, compute_mode) != CODE_FOR_nothing) || (optab_handler (sdivmod_optab, compute_mode) @@ -4310,10 +4386,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, || size - 1 >= BITS_PER_WORD) goto fail1; - extra_cost = (shift_cost[speed][compute_mode][post_shift] - + shift_cost[speed][compute_mode][size - 1] - + add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, + extra_cost = (shift_cost (speed, compute_mode, post_shift) + + shift_cost (speed, compute_mode, size - 1) + + add_cost (speed, compute_mode)); + t1 = expmed_mult_highpart (compute_mode, op0, GEN_INT (ml), NULL_RTX, 0, max_cost - extra_cost); if (t1 == 0) @@ -4345,10 +4421,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); mlr = gen_int_mode (ml, compute_mode); - extra_cost = (shift_cost[speed][compute_mode][post_shift] - + shift_cost[speed][compute_mode][size - 1] - + 2 * add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, mlr, + extra_cost = (shift_cost (speed, compute_mode, post_shift) + + shift_cost (speed, compute_mode, size - 1) + + 2 * add_cost (speed, compute_mode)); + t1 = expmed_mult_highpart (compute_mode, op0, mlr, NULL_RTX, 0, max_cost - extra_cost); if (t1 == 0) @@ -4433,10 +4509,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, size - 1, NULL_RTX, 0); t2 = expand_binop (compute_mode, xor_optab, op0, t1, NULL_RTX, 0, OPTAB_WIDEN); - extra_cost = (shift_cost[speed][compute_mode][post_shift] - + shift_cost[speed][compute_mode][size - 1] - + 2 * add_cost[speed][compute_mode]); - t3 = expand_mult_highpart (compute_mode, t2, + extra_cost = (shift_cost (speed, compute_mode, post_shift) + + shift_cost (speed, compute_mode, size - 1) + + 2 * add_cost (speed, compute_mode)); + t3 = expmed_mult_highpart (compute_mode, t2, GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); if (t3 != 0) |