summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/rs6000/rs6000.c105
-rw-r--r--gcc/config/rs6000/rs6000.md1
2 files changed, 49 insertions, 57 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4cd6a1b58cd..c589118a797 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -32769,29 +32769,6 @@ rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
emit_move_insn (target, dst);
}
-/* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
-
-static void
-rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
-{
- machine_mode mode = GET_MODE (target);
- rtx dst;
-
- /* Altivec does not support fms directly;
- generate in terms of fma in that case. */
- if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
- dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
- else
- {
- a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
- dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
- }
- gcc_assert (dst != NULL);
-
- if (dst != target)
- emit_move_insn (target, dst);
-}
-
/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
static void
@@ -32890,15 +32867,16 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
}
-/* Newton-Raphson approximation of single/double-precision floating point
- rsqrt. Assumes no trapping math and finite arguments. */
+/* Goldschmidt's Algorithm for single/double-precision floating point
+ sqrt and rsqrt. Assumes no trapping math and finite arguments. */
void
rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
{
machine_mode mode = GET_MODE (src);
- rtx x0 = gen_reg_rtx (mode);
- rtx y = gen_reg_rtx (mode);
+ rtx e = gen_reg_rtx (mode);
+ rtx g = gen_reg_rtx (mode);
+ rtx h = gen_reg_rtx (mode);
/* Low precision estimates guarantee 5 bits of accuracy. High
precision estimates guarantee 14 bits of accuracy. SFmode
@@ -32909,55 +32887,68 @@ rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
if (mode == DFmode || mode == V2DFmode)
passes++;
- REAL_VALUE_TYPE dconst3_2;
int i;
- rtx halfthree;
+ rtx mhalf;
enum insn_code code = optab_handler (smul_optab, mode);
insn_gen_fn gen_mul = GEN_FCN (code);
gcc_assert (code != CODE_FOR_nothing);
- /* Load up the constant 1.5 either as a scalar, or as a vector. */
- real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
- SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
+ mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
- halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
-
- /* x0 = rsqrt estimate */
- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
- UNSPEC_RSQRT)));
+ /* e = rsqrt estimate */
+ emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
+ UNSPEC_RSQRT)));
/* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
if (!recip)
{
rtx zero = force_reg (mode, CONST0_RTX (mode));
- rtx target = emit_conditional_move (x0, GT, src, zero, mode,
- x0, zero, mode, 0);
- if (target != x0)
- emit_move_insn (x0, target);
+ rtx target = emit_conditional_move (e, GT, src, zero, mode,
+ e, zero, mode, 0);
+ if (target != e)
+ emit_move_insn (e, target);
}
- /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
- rs6000_emit_msub (y, src, halfthree, src);
+ /* g = sqrt estimate. */
+ emit_insn (gen_mul (g, e, src));
+ /* h = 1/(2*sqrt) estimate. */
+ emit_insn (gen_mul (h, e, mhalf));
- for (i = 0; i < passes; i++)
+ if (recip)
{
- rtx x1 = gen_reg_rtx (mode);
- rtx u = gen_reg_rtx (mode);
- rtx v = gen_reg_rtx (mode);
+ if (passes == 1)
+ {
+ rtx t = gen_reg_rtx (mode);
+ rs6000_emit_nmsub (t, g, h, mhalf);
+ /* Apply correction directly to 1/rsqrt estimate. */
+ rs6000_emit_madd (dst, e, t, e);
+ }
+ else
+ {
+ for (i = 0; i < passes; i++)
+ {
+ rtx t1 = gen_reg_rtx (mode);
+ rtx g1 = gen_reg_rtx (mode);
+ rtx h1 = gen_reg_rtx (mode);
- /* x1 = x0 * (1.5 - y * (x0 * x0)) */
- emit_insn (gen_mul (u, x0, x0));
- rs6000_emit_nmsub (v, y, u, halfthree);
- emit_insn (gen_mul (x1, x0, v));
- x0 = x1;
- }
+ rs6000_emit_nmsub (t1, g, h, mhalf);
+ rs6000_emit_madd (g1, g, t1, g);
+ rs6000_emit_madd (h1, h, t1, h);
- /* If not reciprocal, multiply by src to produce sqrt. */
- if (!recip)
- emit_insn (gen_mul (dst, src, x0));
+ g = g1;
+ h = h1;
+ }
+ /* Multiply by 2 for 1/rsqrt. */
+ emit_insn (gen_add3_insn (dst, h, h));
+ }
+ }
else
- emit_move_insn (dst, x0);
+ {
+ rtx t = gen_reg_rtx (mode);
+ rs6000_emit_nmsub (t, g, h, mhalf);
+ rs6000_emit_madd (dst, g, t, g);
+ }
return;
}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dbcf583475d..d8834a27314 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4444,6 +4444,7 @@
&& (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
{
if (<MODE>mode == SFmode
+ && TARGET_RECIP_PRECISION
&& RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)
&& !optimize_function_for_size_p (cfun)
&& flag_finite_math_only && !flag_trapping_math