summaryrefslogtreecommitdiff
path: root/gcc/expmed.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/expmed.c')
-rw-r--r--gcc/expmed.c344
1 files changed, 210 insertions, 134 deletions
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 989f86cbe7a..6c94a90fb84 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -101,7 +101,6 @@ struct init_expmed_rtl
struct rtx_def mult; rtunion mult_fld1;
struct rtx_def sdiv; rtunion sdiv_fld1;
struct rtx_def udiv; rtunion udiv_fld1;
- struct rtx_def zext;
struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
struct rtx_def smod_32; rtunion smod_32_fld1;
struct rtx_def wide_mult; rtunion wide_mult_fld1;
@@ -112,16 +111,41 @@ struct init_expmed_rtl
struct rtx_def shift_add; rtunion shift_add_fld1;
struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
+ struct rtx_def zext;
+ struct rtx_def trunc;
rtx pow2[MAX_BITS_PER_WORD];
rtx cint[MAX_BITS_PER_WORD];
};
static void
+init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
+ enum machine_mode from_mode, bool speed)
+{
+ int to_size, from_size;
+ rtx which;
+
+ /* We're given no information about the true size of a partial integer,
+ only the size of the "full" integer it requires for storage. For
+ comparison purposes here, reduce the bit size by one in that case. */
+ to_size = (GET_MODE_BITSIZE (to_mode)
+ - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
+ from_size = (GET_MODE_BITSIZE (from_mode)
+ - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
+
+ /* Assume cost of zero-extend and sign-extend is the same. */
+ which = (to_size < from_size ? &all->trunc : &all->zext);
+
+ PUT_MODE (&all->reg, from_mode);
+ set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
+}
+
+static void
init_expmed_one_mode (struct init_expmed_rtl *all,
enum machine_mode mode, int speed)
{
int m, n, mode_bitsize;
+ enum machine_mode mode_from;
mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
@@ -139,21 +163,27 @@ init_expmed_one_mode (struct init_expmed_rtl *all,
PUT_MODE (&all->shift_add, mode);
PUT_MODE (&all->shift_sub0, mode);
PUT_MODE (&all->shift_sub1, mode);
+ PUT_MODE (&all->zext, mode);
+ PUT_MODE (&all->trunc, mode);
- add_cost[speed][mode] = set_src_cost (&all->plus, speed);
- neg_cost[speed][mode] = set_src_cost (&all->neg, speed);
- mul_cost[speed][mode] = set_src_cost (&all->mult, speed);
- sdiv_cost[speed][mode] = set_src_cost (&all->sdiv, speed);
- udiv_cost[speed][mode] = set_src_cost (&all->udiv, speed);
+ set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
+ set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
+ set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
+ set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
+ set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
- sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all->sdiv_32, speed)
- <= 2 * add_cost[speed][mode]);
- smod_pow2_cheap[speed][mode] = (set_src_cost (&all->smod_32, speed)
- <= 4 * add_cost[speed][mode]);
+ set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
+ <= 2 * add_cost (speed, mode)));
+ set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
+ <= 4 * add_cost (speed, mode)));
- shift_cost[speed][mode][0] = 0;
- shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
- = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
+ set_shift_cost (speed, mode, 0, 0);
+ {
+ int cost = add_cost (speed, mode);
+ set_shiftadd_cost (speed, mode, 0, cost);
+ set_shiftsub0_cost (speed, mode, 0, cost);
+ set_shiftsub1_cost (speed, mode, 0, cost);
+ }
n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
for (m = 1; m < n; m++)
@@ -161,16 +191,21 @@ init_expmed_one_mode (struct init_expmed_rtl *all,
XEXP (&all->shift, 1) = all->cint[m];
XEXP (&all->shift_mult, 1) = all->pow2[m];
- shift_cost[speed][mode][m] = set_src_cost (&all->shift, speed);
- shiftadd_cost[speed][mode][m] = set_src_cost (&all->shift_add, speed);
- shiftsub0_cost[speed][mode][m] = set_src_cost (&all->shift_sub0, speed);
- shiftsub1_cost[speed][mode][m] = set_src_cost (&all->shift_sub1, speed);
+ set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
+ set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
+ set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
+ set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
}
if (SCALAR_INT_MODE_P (mode))
{
- enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
-
+ for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
+ mode_from = (enum machine_mode)(mode_from + 1))
+ init_expmed_one_conv (all, mode, mode_from, speed);
+ }
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
PUT_MODE (&all->zext, wider_mode);
@@ -178,10 +213,10 @@ init_expmed_one_mode (struct init_expmed_rtl *all,
PUT_MODE (&all->wide_lshr, wider_mode);
XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
- mul_widen_cost[speed][wider_mode]
- = set_src_cost (&all->wide_mult, speed);
- mul_highpart_cost[speed][mode]
- = set_src_cost (&all->wide_trunc, speed);
+ set_mul_widen_cost (speed, wider_mode,
+ set_src_cost (&all->wide_mult, speed));
+ set_mul_highpart_cost (speed, mode,
+ set_src_cost (&all->wide_trunc, speed));
}
}
}
@@ -262,26 +297,36 @@ init_expmed (void)
XEXP (&all.shift_sub1, 0) = &all.reg;
XEXP (&all.shift_sub1, 1) = &all.shift_mult;
+ PUT_CODE (&all.trunc, TRUNCATE);
+ XEXP (&all.trunc, 0) = &all.reg;
+
for (speed = 0; speed < 2; speed++)
{
crtl->maybe_hot_insn_p = speed;
- zero_cost[speed] = set_src_cost (const0_rtx, speed);
+ set_zero_cost (speed, set_src_cost (const0_rtx, speed));
- for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
- mode != VOIDmode;
- mode = GET_MODE_WIDER_MODE (mode))
+ for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
+ mode = (enum machine_mode)(mode + 1))
init_expmed_one_mode (&all, mode, speed);
- for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
- mode != VOIDmode;
- mode = GET_MODE_WIDER_MODE (mode))
- init_expmed_one_mode (&all, mode, speed);
+ if (MIN_MODE_PARTIAL_INT != VOIDmode)
+ for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
+ mode = (enum machine_mode)(mode + 1))
+ init_expmed_one_mode (&all, mode, speed);
+
+ if (MIN_MODE_VECTOR_INT != VOIDmode)
+ for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
+ mode = (enum machine_mode)(mode + 1))
+ init_expmed_one_mode (&all, mode, speed);
}
- if (alg_hash_used_p)
- memset (alg_hash, 0, sizeof (alg_hash));
+ if (alg_hash_used_p ())
+ {
+ struct alg_hash_entry *p = alg_hash_entry_ptr (0);
+ memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
+ }
else
- alg_hash_used_p = true;
+ set_alg_hash_used_p (true);
default_rtl_profile ();
}
@@ -2229,8 +2274,9 @@ expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
&& INTVAL (op1) > 0
&& INTVAL (op1) < GET_MODE_PRECISION (mode)
&& INTVAL (op1) < MAX_BITS_PER_WORD
- && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
- && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
+ && (shift_cost (speed, mode, INTVAL (op1))
+ > INTVAL (op1) * add_cost (speed, mode))
+ && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
{
int i;
for (i = 0; i < INTVAL (op1); i++)
@@ -2381,8 +2427,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx);
-static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
-static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
+static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
int, int);
/* Compute and return the best algorithm for multiplying by T.
The algorithm must cost less than cost_limit
@@ -2406,6 +2452,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
enum alg_code cache_alg = alg_zero;
bool speed = optimize_insn_for_speed_p ();
enum machine_mode imode;
+ struct alg_hash_entry *entry_ptr;
/* Indicate that no algorithm is yet found. If no algorithm
is found, this value will be returned and indicate failure. */
@@ -2440,13 +2487,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
fail now. */
if (t == 0)
{
- if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
+ if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
return;
else
{
alg_out->ops = 1;
- alg_out->cost.cost = zero_cost[speed];
- alg_out->cost.latency = zero_cost[speed];
+ alg_out->cost.cost = zero_cost (speed);
+ alg_out->cost.latency = zero_cost (speed);
alg_out->op[0] = alg_zero;
return;
}
@@ -2462,19 +2509,20 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
/* See if we already know what to do for T. */
- if (alg_hash[hash_index].t == t
- && alg_hash[hash_index].mode == mode
- && alg_hash[hash_index].mode == mode
- && alg_hash[hash_index].speed == speed
- && alg_hash[hash_index].alg != alg_unknown)
+ entry_ptr = alg_hash_entry_ptr (hash_index);
+ if (entry_ptr->t == t
+ && entry_ptr->mode == mode
+ && entry_ptr->mode == mode
+ && entry_ptr->speed == speed
+ && entry_ptr->alg != alg_unknown)
{
- cache_alg = alg_hash[hash_index].alg;
+ cache_alg = entry_ptr->alg;
if (cache_alg == alg_impossible)
{
/* The cache tells us that it's impossible to synthesize
- multiplication by T within alg_hash[hash_index].cost. */
- if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
+ multiplication by T within entry_ptr->cost. */
+ if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
/* COST_LIMIT is at least as restrictive as the one
recorded in the hash table, in which case we have no
hope of synthesizing a multiplication. Just
@@ -2488,7 +2536,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
}
else
{
- if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
+ if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
/* The cached algorithm shows that this multiplication
requires more cost than COST_LIMIT. Just return. This
way, we don't clobber this cache entry with
@@ -2534,10 +2582,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
q = t >> m;
/* The function expand_shift will choose between a shift and
a sequence of additions, so the observed cost is given as
- MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
- op_cost = m * add_cost[speed][mode];
- if (shift_cost[speed][mode][m] < op_cost)
- op_cost = shift_cost[speed][mode][m];
+ MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
+ op_cost = m * add_cost (speed, mode);
+ if (shift_cost (speed, mode, m) < op_cost)
+ op_cost = shift_cost (speed, mode, m);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, q, &new_limit, mode);
@@ -2564,11 +2612,11 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
q = ~(~orig_t >> m);
/* The function expand_shift will choose between a shift
and a sequence of additions, so the observed cost is
- given as MIN (m * add_cost[speed][mode],
- shift_cost[speed][mode][m]). */
- op_cost = m * add_cost[speed][mode];
- if (shift_cost[speed][mode][m] < op_cost)
- op_cost = shift_cost[speed][mode][m];
+ given as MIN (m * add_cost(speed, mode),
+ shift_cost(speed, mode, m)). */
+ op_cost = m * add_cost (speed, mode);
+ if (shift_cost (speed, mode, m) < op_cost)
+ op_cost = shift_cost (speed, mode, m);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, q, &new_limit, mode);
@@ -2610,7 +2658,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
- op_cost = add_cost[speed][mode];
+ op_cost = add_cost (speed, mode);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t + 1, &new_limit, mode);
@@ -2630,7 +2678,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
- op_cost = add_cost[speed][mode];
+ op_cost = add_cost (speed, mode);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t - 1, &new_limit, mode);
@@ -2652,7 +2700,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (-orig_t + 1);
if (m >= 0 && m < maxm)
{
- op_cost = shiftsub1_cost[speed][mode][m];
+ op_cost = shiftsub1_cost (speed, mode, m);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
@@ -2699,14 +2747,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
equal to its cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
- op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
- if (shiftadd_cost[speed][mode][m] < op_cost)
+ op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
+ if (shiftadd_cost (speed, mode, m) < op_cost)
{
- op_cost = shiftadd_cost[speed][mode][m];
+ op_cost = shiftadd_cost (speed, mode, m);
op_latency = op_cost;
}
else
- op_latency = add_cost[speed][mode];
+ op_latency = add_cost (speed, mode);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
@@ -2738,14 +2786,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
equal to it's cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
- op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
- if (shiftsub0_cost[speed][mode][m] < op_cost)
+ op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
+ if (shiftsub0_cost (speed, mode, m) < op_cost)
{
- op_cost = shiftsub0_cost[speed][mode][m];
+ op_cost = shiftsub0_cost (speed, mode, m);
op_latency = op_cost;
}
else
- op_latency = add_cost[speed][mode];
+ op_latency = add_cost (speed, mode);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
@@ -2779,7 +2827,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
- op_cost = shiftadd_cost[speed][mode][m];
+ op_cost = shiftadd_cost (speed, mode, m);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
@@ -2804,7 +2852,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
- op_cost = shiftsub0_cost[speed][mode][m];
+ op_cost = shiftsub0_cost (speed, mode, m);
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
@@ -2833,23 +2881,23 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
we are asked to find an algorithm for T within the same or
lower COST_LIMIT, we can immediately return to the
caller. */
- alg_hash[hash_index].t = t;
- alg_hash[hash_index].mode = mode;
- alg_hash[hash_index].speed = speed;
- alg_hash[hash_index].alg = alg_impossible;
- alg_hash[hash_index].cost = *cost_limit;
+ entry_ptr->t = t;
+ entry_ptr->mode = mode;
+ entry_ptr->speed = speed;
+ entry_ptr->alg = alg_impossible;
+ entry_ptr->cost = *cost_limit;
return;
}
/* Cache the result. */
if (!cache_hit)
{
- alg_hash[hash_index].t = t;
- alg_hash[hash_index].mode = mode;
- alg_hash[hash_index].speed = speed;
- alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
- alg_hash[hash_index].cost.cost = best_cost.cost;
- alg_hash[hash_index].cost.latency = best_cost.latency;
+ entry_ptr->t = t;
+ entry_ptr->mode = mode;
+ entry_ptr->speed = speed;
+ entry_ptr->alg = best_alg->op[best_alg->ops];
+ entry_ptr->cost.cost = best_cost.cost;
+ entry_ptr->cost.latency = best_cost.latency;
}
/* If we are getting a too long sequence for `struct algorithm'
@@ -2895,7 +2943,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
/* Ensure that mult_cost provides a reasonable upper bound.
Any constant multiplication can be performed with less
than 2 * bits additions. */
- op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost[speed][mode];
+ op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
if (mult_cost > op_cost)
mult_cost = op_cost;
@@ -2908,7 +2956,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
{
- op_cost = neg_cost[speed][mode];
+ op_cost = neg_cost(speed, mode);
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
@@ -2928,7 +2976,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
}
/* This proves very useful for division-by-constant. */
- op_cost = add_cost[speed][mode];
+ op_cost = add_cost (speed, mode);
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
@@ -3169,7 +3217,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
coeff = INTVAL (scalar_op1);
is_neg = coeff < 0;
}
- else if (CONST_DOUBLE_P (scalar_op1))
+ else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
{
/* If we are multiplying in DImode, it may still be a win
to try to work with shifts and adds. */
@@ -3192,6 +3240,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
}
goto skip_synth;
}
+ else
+ goto skip_synth;
}
else
goto skip_synth;
@@ -3211,7 +3261,6 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
/* Attempt to handle multiplication of DImode values by negative
coefficients, by performing the multiplication by a positive
multiplier and then inverting the result. */
- /* ??? How is this not slightly redundant with the neg variant? */
if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
{
/* Its safe to use -coeff even for INT_MIN, as the
@@ -3219,7 +3268,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
- - neg_cost[speed][mode]);
+ - neg_cost(speed, mode));
if (max_cost > 0
&& choose_mult_variant (mode, -coeff, &algorithm,
&variant, max_cost))
@@ -3228,6 +3277,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
&algorithm, variant);
return expand_unop (mode, neg_optab, temp, target, 0);
}
+ goto skip_synth;
}
/* Exclude cost of op0 from max_cost to match the cost
@@ -3240,7 +3290,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
skip_synth:
/* Expand x*2.0 as x+x. */
- if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
+ if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
{
REAL_VALUE_TYPE d;
REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
@@ -3262,6 +3312,24 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
return op0;
}
+/* Return a cost estimate for multiplying a register by the given
+ COEFFicient in the given MODE and SPEED. */
+
+int
+mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
+{
+ int max_cost;
+ struct algorithm algorithm;
+ enum mult_variant variant;
+
+ rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+ max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
+ if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
+ return algorithm.cost.cost;
+ else
+ return max_cost;
+}
+
/* Perform a widening multiplication and return an rtx for the result.
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
TARGET is a suggestion for where to store the result (an rtx).
@@ -3302,7 +3370,7 @@ expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
/* Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = mul_widen_cost[speed][mode];
+ max_cost = mul_widen_cost (speed, mode);
if (choose_mult_variant (mode, coeff, &algorithm, &variant,
max_cost))
{
@@ -3477,7 +3545,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
return target;
}
-/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
+/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
static rtx
extract_high_half (enum machine_mode mode, rtx op)
@@ -3495,11 +3563,11 @@ extract_high_half (enum machine_mode mode, rtx op)
return convert_modes (mode, wider_mode, op, 0);
}
-/* Like expand_mult_highpart, but only consider using a multiplication
+/* Like expmed_mult_highpart, but only consider using a multiplication
optab. OP1 is an rtx for the constant operand. */
static rtx
-expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost)
{
rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
@@ -3516,7 +3584,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
- if (mul_highpart_cost[speed][mode] < max_cost)
+ if (mul_highpart_cost (speed, mode) < max_cost)
{
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@@ -3528,8 +3596,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
- && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
- + 4 * add_cost[speed][mode] < max_cost))
+ && (mul_highpart_cost (speed, mode)
+ + 2 * shift_cost (speed, mode, size-1)
+ + 4 * add_cost (speed, mode) < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@@ -3543,7 +3612,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
- && mul_widen_cost[speed][wider_mode] < max_cost)
+ && mul_widen_cost (speed, wider_mode) < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
unsignedp, OPTAB_WIDEN);
@@ -3554,7 +3623,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening the mode and perform a non-widening multiplication. */
if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
- && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
+ && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
+ < max_cost))
{
rtx insns, wop0, wop1;
@@ -3581,8 +3651,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
- && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
- + 4 * add_cost[speed][mode] < max_cost))
+ && (mul_widen_cost (speed, wider_mode)
+ + 2 * shift_cost (speed, mode, size-1)
+ + 4 * add_cost (speed, mode) < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
@@ -3610,7 +3681,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
MAX_COST is the total allowed cost for the expanded RTL. */
static rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost)
{
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
@@ -3633,16 +3704,16 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
mode == word_mode, however all the cost calculations in
synth_mult etc. assume single-word operations. */
if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
- return expand_mult_highpart_optab (mode, op0, op1, target,
+ return expmed_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
- extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
+ extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
/* Check whether we try to multiply by a negative constant. */
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
- extra_cost += add_cost[speed][mode];
+ extra_cost += add_cost (speed, mode);
}
/* See whether shift/add multiplication is cheap enough. */
@@ -3651,7 +3722,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
{
/* See whether the specialized multiplication optabs are
cheaper than the shift/add version. */
- tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
+ tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
alg.cost.cost + extra_cost);
if (tem)
return tem;
@@ -3666,7 +3737,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
return tem;
}
- return expand_mult_highpart_optab (mode, op0, op1, target,
+ return expmed_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
}
@@ -3832,7 +3903,8 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
temp = gen_reg_rtx (mode);
temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
- if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
+ if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
+ > COSTS_N_INSNS (1))
temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
else
@@ -3940,7 +4012,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
half of the product. Different strategies for generating the product are
- implemented in expand_mult_highpart.
+ implemented in expmed_mult_highpart.
If what we actually want is the remainder, we generate that by another
by-constant multiplication and a subtraction. */
@@ -3990,7 +4062,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
mode for which we can do the operation with a library call. */
/* We might want to refine this now that we have division-by-constant
- optimization. Since expand_mult_highpart tries so many variants, it is
+ optimization. Since expmed_mult_highpart tries so many variants, it is
not straightforward to generalize this. Maybe we should make an array
of possible modes in init_expmed? Save this for GCC 2.7. */
@@ -4035,10 +4107,13 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
- max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
+ max_cost = (unsignedp
+ ? udiv_cost (speed, compute_mode)
+ : sdiv_cost (speed, compute_mode));
if (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const))
- max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
+ max_cost -= (mul_cost (speed, compute_mode)
+ + add_cost (speed, compute_mode));
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
@@ -4152,10 +4227,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
extra_cost
- = (shift_cost[speed][compute_mode][post_shift - 1]
- + shift_cost[speed][compute_mode][1]
- + 2 * add_cost[speed][compute_mode]);
- t1 = expand_mult_highpart (compute_mode, op0,
+ = (shift_cost (speed, compute_mode, post_shift - 1)
+ + shift_cost (speed, compute_mode, 1)
+ + 2 * add_cost (speed, compute_mode));
+ t1 = expmed_mult_highpart (compute_mode, op0,
GEN_INT (ml),
NULL_RTX, 1,
max_cost - extra_cost);
@@ -4185,9 +4260,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
(RSHIFT_EXPR, compute_mode, op0,
pre_shift, NULL_RTX, 1);
extra_cost
- = (shift_cost[speed][compute_mode][pre_shift]
- + shift_cost[speed][compute_mode][post_shift]);
- t2 = expand_mult_highpart (compute_mode, t1,
+ = (shift_cost (speed, compute_mode, pre_shift)
+ + shift_cost (speed, compute_mode, post_shift));
+ t2 = expmed_mult_highpart (compute_mode, t1,
GEN_INT (ml),
NULL_RTX, 1,
max_cost - extra_cost);
@@ -4245,8 +4320,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
}
else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
- && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
- : sdiv_pow2_cheap[speed][compute_mode])
+ && (rem_flag
+ ? smod_pow2_cheap (speed, compute_mode)
+ : sdiv_pow2_cheap (speed, compute_mode))
/* We assume that cheap metric is true if the
optab has an expander for this mode. */
&& ((optab_handler ((rem_flag ? smod_optab
@@ -4266,7 +4342,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
return gen_lowpart (mode, remainder);
}
- if (sdiv_pow2_cheap[speed][compute_mode]
+ if (sdiv_pow2_cheap (speed, compute_mode)
&& ((optab_handler (sdiv_optab, compute_mode)
!= CODE_FOR_nothing)
|| (optab_handler (sdivmod_optab, compute_mode)
@@ -4310,10 +4386,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|| size - 1 >= BITS_PER_WORD)
goto fail1;
- extra_cost = (shift_cost[speed][compute_mode][post_shift]
- + shift_cost[speed][compute_mode][size - 1]
- + add_cost[speed][compute_mode]);
- t1 = expand_mult_highpart (compute_mode, op0,
+ extra_cost = (shift_cost (speed, compute_mode, post_shift)
+ + shift_cost (speed, compute_mode, size - 1)
+ + add_cost (speed, compute_mode));
+ t1 = expmed_mult_highpart (compute_mode, op0,
GEN_INT (ml), NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0)
@@ -4345,10 +4421,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
mlr = gen_int_mode (ml, compute_mode);
- extra_cost = (shift_cost[speed][compute_mode][post_shift]
- + shift_cost[speed][compute_mode][size - 1]
- + 2 * add_cost[speed][compute_mode]);
- t1 = expand_mult_highpart (compute_mode, op0, mlr,
+ extra_cost = (shift_cost (speed, compute_mode, post_shift)
+ + shift_cost (speed, compute_mode, size - 1)
+ + 2 * add_cost (speed, compute_mode));
+ t1 = expmed_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0)
@@ -4433,10 +4509,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
size - 1, NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
- extra_cost = (shift_cost[speed][compute_mode][post_shift]
- + shift_cost[speed][compute_mode][size - 1]
- + 2 * add_cost[speed][compute_mode]);
- t3 = expand_mult_highpart (compute_mode, t2,
+ extra_cost = (shift_cost (speed, compute_mode, post_shift)
+ + shift_cost (speed, compute_mode, size - 1)
+ + 2 * add_cost (speed, compute_mode));
+ t3 = expmed_mult_highpart (compute_mode, t2,
GEN_INT (ml), NULL_RTX, 1,
max_cost - extra_cost);
if (t3 != 0)