summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c100
-rw-r--r--gcc/config/i386/i386.md32
3 files changed, 117 insertions, 16 deletions
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 996dcdbd097..4cb110c9833 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -159,6 +159,7 @@ extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
extern void ix86_expand_lround (rtx, rtx);
extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0291a97a8ae..a12453604fe 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19225,13 +19225,44 @@ asm_preferred_eh_data_format (int code, int global)
}
/* Expand copysign from SIGN to the positive value ABS_VALUE
- storing in RESULT. */
+ storing in RESULT. If MASK is non-null, it shall be a mask to mask out
+ the sign-bit. */
static void
-ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
{
enum machine_mode mode = GET_MODE (sign);
rtx sgn = gen_reg_rtx (mode);
- rtx mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+ if (mask == NULL_RTX)
+ {
+ mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+ if (!VECTOR_MODE_P (mode))
+ {
+ /* We need to generate a scalar mode mask in this case. */
+ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+ mask = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+ }
+ }
+ else
+ mask = gen_rtx_NOT (mode, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, sgn,
+ gen_rtx_AND (mode, mask, sign)));
+ emit_insn (gen_rtx_SET (VOIDmode, result,
+ gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result. The
+ mask for masking out the sign-bit is stored in *SMASK, if that is
+ non-null. */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ rtx xa, mask;
+
+ xa = gen_reg_rtx (mode);
+ mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
if (!VECTOR_MODE_P (mode))
{
/* We need to generate a scalar mode mask in this case. */
@@ -19240,10 +19271,13 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
mask = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
}
- emit_insn (gen_rtx_SET (VOIDmode, sgn,
- gen_rtx_AND (mode, mask, sign)));
- emit_insn (gen_rtx_SET (VOIDmode, result,
- gen_rtx_IOR (mode, abs_value, sgn)));
+ emit_insn (gen_rtx_SET (VOIDmode, xa,
+ gen_rtx_AND (mode, op0, mask)));
+
+ if (smask)
+ *smask = mask;
+
+ return xa;
}
/* Expands a comparison of OP0 with OP1 using comparison code CODE,
@@ -19276,6 +19310,21 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
return label;
}
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+ of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+ REAL_VALUE_TYPE TWO52r;
+ rtx TWO52;
+
+ real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+ TWO52 = const_double_from_real_value (TWO52r, mode);
+ TWO52 = force_reg (mode, TWO52);
+
+ return TWO52;
+}
+
/* Expand SSE sequence for computing lround from OP1 storing
into OP0. */
void
@@ -19297,7 +19346,7 @@ ix86_expand_lround (rtx op0, rtx op1)
/* adj = copysign (0.5, op1) */
adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
- ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1));
+ ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
/* adj = op1 + adj */
expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
@@ -19339,4 +19388,39 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
emit_move_insn (op0, ireg);
}
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+ result in OPERAND0. */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+ /* C code for the stuff we're doing below:
+ if (!isless (fabs (operand1), 2**52))
+ return operand1;
+ tmp = copysign (2**52, operand1);
+ return operand1 + tmp - tmp;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx res, xa, label, TWO52, mask;
+
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ TWO52 = ix86_gen_TWO52 (mode);
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
+
+ expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
+ expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
#include "gt-i386.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index abceb151c67..5aaf43ba322 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17160,10 +17160,17 @@
(define_expand "rintdf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
-{
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math)"
+{
+ if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math)
+ ix86_expand_rint (operand0, operand1);
+ else
+ {
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
@@ -17171,16 +17178,24 @@
emit_insn (gen_frndintxf2 (op0, op1));
emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ }
DONE;
})
(define_expand "rintsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
-{
+ "(TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math)"
+{
+ if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math)
+ ix86_expand_rint (operand0, operand1);
+ else
+ {
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
@@ -17188,6 +17203,7 @@
emit_insn (gen_frndintxf2 (op0, op1));
emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ }
DONE;
})