summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000
diff options
context:
space:
mode:
authordje <dje@138bc75d-0d04-0410-961f-82ee72b054a4>2007-09-24 15:39:18 +0000
committerdje <dje@138bc75d-0d04-0410-961f-82ee72b054a4>2007-09-24 15:39:18 +0000
commit7679d16d42d91bbc96aa7730cc8c644ebf8f0d07 (patch)
treecba47602080f01479272aef009c94899cd5da45f /gcc/config/rs6000
parent743f8dd158dfe7981d3596c8e7c3e742c9fcc990 (diff)
downloadgcc-7679d16d42d91bbc96aa7730cc8c644ebf8f0d07.tar.gz
* config/rs6000/rs6000-protos.h (rs6000_emit_swrsqrtsf): Declare.
* config/rs6000/rs6000.opt (swdiv): Change option to ... (recip): this. * config/rs6000/rs6000.c (rs6000_builtin_reciprocal): New function. (TARGET_BUILTIN_RECIPROCAL): Use it. (rs6000_builtin_expand): Expand recip, recipf, and rsqrtf. (rs6000_init_builtins): Initialize recip, recipf, and rsqrtf. (rs6000_emit_swrsqrtsf): New. * config/rs6000/rs6000.h (rs6000_builtins): Add recip, recipf, and rsqrtf. * config/rs6000/rs6000.md (UNSPEC_RSQRT): Define. (divsf3): Remove swdiv support. (recipsf3): New. (rsqrtsf2): New. (rsqrt_internal1): New. (divdf3): Remove swdiv support. (reciptdf3): New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@128719 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c159
-rw-r--r--gcc/config/rs6000/rs6000.h4
-rw-r--r--gcc/config/rs6000/rs6000.md90
-rw-r--r--gcc/config/rs6000/rs6000.opt6
5 files changed, 221 insertions, 39 deletions
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index c73da36eb9a..799a15ac0a4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -89,6 +89,7 @@ extern void rs6000_split_compare_and_swapqhi (rtx, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_split_lock_test_and_set (rtx, rtx, rtx, rtx);
extern void rs6000_emit_swdivsf (rtx, rtx, rtx);
extern void rs6000_emit_swdivdf (rtx, rtx, rtx);
+extern void rs6000_emit_swrsqrtsf (rtx, rtx);
extern void output_toc (FILE *, rtx, int, enum machine_mode);
extern void rs6000_initialize_trampoline (rtx, rtx, rtx);
extern rtx rs6000_longcall_ref (rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5ada791b7dc..0e9db742af3 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -799,6 +799,7 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
static int rs6000_use_sched_lookahead (void);
static int rs6000_use_sched_lookahead_guard (rtx);
+static tree rs6000_builtin_reciprocal (unsigned int, bool, bool);
static tree rs6000_builtin_mask_for_load (void);
static tree rs6000_builtin_mul_widen_even (tree);
static tree rs6000_builtin_mul_widen_odd (tree);
@@ -1213,6 +1214,9 @@ static const char alt_reg_names[][8] =
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
+
struct gcc_target targetm = TARGET_INITIALIZER;
@@ -8652,6 +8656,15 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
rtx ret;
bool success;
+ if (fcode == RS6000_BUILTIN_RECIP)
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
+
+ if (fcode == RS6000_BUILTIN_RECIPF)
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+
+ if (fcode == RS6000_BUILTIN_RSQRTF)
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
+
if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD
|| fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
{
@@ -8858,6 +8871,31 @@ rs6000_init_builtins (void)
altivec_init_builtins ();
if (TARGET_ALTIVEC || TARGET_SPE || TARGET_PAIRED_FLOAT)
rs6000_common_init_builtins ();
+ if (TARGET_PPC_GFXOPT)
+ {
+ tree ftype = build_function_type_list (float_type_node,
+ float_type_node,
+ float_type_node,
+ NULL_TREE);
+ def_builtin (MASK_PPC_GFXOPT, "__builtin_recipdivf", ftype,
+ RS6000_BUILTIN_RECIPF);
+
+ ftype = build_function_type_list (float_type_node,
+ float_type_node,
+ NULL_TREE);
+ def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrtf", ftype,
+ RS6000_BUILTIN_RSQRTF);
+ }
+ if (TARGET_POPCNTB)
+ {
+ tree ftype = build_function_type_list (double_type_node,
+ double_type_node,
+ double_type_node,
+ NULL_TREE);
+ def_builtin (MASK_POPCNTB, "__builtin_recipdiv", ftype,
+ RS6000_BUILTIN_RECIP);
+
+ }
#if TARGET_XCOFF
/* AIX libm provides clog as __clog. */
@@ -20874,11 +20912,36 @@ rs6000_memory_move_cost (enum machine_mode mode, enum reg_class class,
return 4 + rs6000_register_move_cost (mode, class, GENERAL_REGS);
}
+/* Returns a code for a target-specific builtin that implements
+ reciprocal of the function, or NULL_TREE if not available. */
+
+static tree
+rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
+ bool sqrt ATTRIBUTE_UNUSED)
+{
+ if (! (TARGET_RECIP && TARGET_PPC_GFXOPT && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations))
+ return NULL_TREE;
+
+ if (md_fn)
+ return NULL_TREE;
+ else
+ switch (fn)
+ {
+ case BUILT_IN_SQRTF:
+ return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
+
+ default:
+ return NULL_TREE;
+ }
+}
+
/* Newton-Raphson approximation of single-precision floating point divide n/d.
Assumes no trapping math and finite arguments. */
void
-rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
+rs6000_emit_swdivsf (rtx dst, rtx n, rtx d)
{
rtx x0, e0, e1, y1, u0, v0, one;
@@ -20913,8 +20976,8 @@ rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (SFmode, n,
gen_rtx_MULT (SFmode, d, u0))));
- /* res = u0 + v0 * y1 */
- emit_insn (gen_rtx_SET (VOIDmode, res,
+ /* dst = u0 + v0 * y1 */
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
gen_rtx_PLUS (SFmode,
gen_rtx_MULT (SFmode, v0, y1), u0)));
}
@@ -20923,7 +20986,7 @@ rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
Assumes no trapping math and finite arguments. */
void
-rs6000_emit_swdivdf (rtx res, rtx n, rtx d)
+rs6000_emit_swdivdf (rtx dst, rtx n, rtx d)
{
rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
@@ -20971,13 +21034,97 @@ rs6000_emit_swdivdf (rtx res, rtx n, rtx d)
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (DFmode, n,
gen_rtx_MULT (DFmode, d, u0))));
- /* res = u0 + v0 * y3 */
- emit_insn (gen_rtx_SET (VOIDmode, res,
+ /* dst = u0 + v0 * y3 */
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
gen_rtx_PLUS (DFmode,
gen_rtx_MULT (DFmode, v0, y3), u0)));
}
+/* Newton-Raphson approximation of single-precision floating point rsqrt.
+ Assumes no trapping math and finite arguments. */
+
+void
+rs6000_emit_swrsqrtsf (rtx dst, rtx src)
+{
+ rtx x0, x1, x2, y1, u0, u1, u2, v0, v1, v2, t0,
+ half, one, halfthree, c1, cond, label;
+
+ x0 = gen_reg_rtx (SFmode);
+ x1 = gen_reg_rtx (SFmode);
+ x2 = gen_reg_rtx (SFmode);
+ y1 = gen_reg_rtx (SFmode);
+ u0 = gen_reg_rtx (SFmode);
+ u1 = gen_reg_rtx (SFmode);
+ u2 = gen_reg_rtx (SFmode);
+ v0 = gen_reg_rtx (SFmode);
+ v1 = gen_reg_rtx (SFmode);
+ v2 = gen_reg_rtx (SFmode);
+ t0 = gen_reg_rtx (SFmode);
+ halfthree = gen_reg_rtx (SFmode);
+ cond = gen_rtx_REG (CCFPmode, CR1_REGNO);
+ label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+
+ /* check 0.0, 1.0, NaN, Inf by testing src * src = src */
+ emit_insn (gen_rtx_SET (VOIDmode, t0,
+ gen_rtx_MULT (SFmode, src, src)));
+
+ emit_insn (gen_rtx_SET (VOIDmode, cond,
+ gen_rtx_COMPARE (CCFPmode, t0, src)));
+ c1 = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+ emit_unlikely_jump (c1, label);
+
+ half = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode));
+ one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode));
+
+ /* halfthree = 1.5 = 1.0 + 0.5 */
+ emit_insn (gen_rtx_SET (VOIDmode, halfthree,
+ gen_rtx_PLUS (SFmode, one, half)));
+
+ /* x0 = rsqrt estimate */
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (SFmode, gen_rtvec (1, src),
+ UNSPEC_RSQRT)));
+
+ /* y1 = 0.5 * src = 1.5 * src - src -> fewer constants */
+ emit_insn (gen_rtx_SET (VOIDmode, y1,
+ gen_rtx_MINUS (SFmode,
+ gen_rtx_MULT (SFmode, src, halfthree),
+ src)));
+
+ /* x1 = x0 * (1.5 - y1 * (x0 * x0)) */
+ emit_insn (gen_rtx_SET (VOIDmode, u0,
+ gen_rtx_MULT (SFmode, x0, x0)));
+ emit_insn (gen_rtx_SET (VOIDmode, v0,
+ gen_rtx_MINUS (SFmode,
+ halfthree,
+ gen_rtx_MULT (SFmode, y1, u0))));
+ emit_insn (gen_rtx_SET (VOIDmode, x1,
+ gen_rtx_MULT (SFmode, x0, v0)));
+
+ /* x2 = x1 * (1.5 - y1 * (x1 * x1)) */
+ emit_insn (gen_rtx_SET (VOIDmode, u1,
+ gen_rtx_MULT (SFmode, x1, x1)));
+ emit_insn (gen_rtx_SET (VOIDmode, v1,
+ gen_rtx_MINUS (SFmode,
+ halfthree,
+ gen_rtx_MULT (SFmode, y1, u1))));
+ emit_insn (gen_rtx_SET (VOIDmode, x2,
+ gen_rtx_MULT (SFmode, x1, v1)));
+
+ /* dst = x2 * (1.5 - y1 * (x2 * x2)) */
+ emit_insn (gen_rtx_SET (VOIDmode, u2,
+ gen_rtx_MULT (SFmode, x2, x2)));
+ emit_insn (gen_rtx_SET (VOIDmode, v2,
+ gen_rtx_MINUS (SFmode,
+ halfthree,
+ gen_rtx_MULT (SFmode, y1, u2))));
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_MULT (SFmode, x2, v2)));
+
+ emit_label (XEXP (label, 0));
+}
+
/* Emit popcount intrinsic on TARGET_POPCNTB targets. DST is the
target, and SRC is the argument operand. */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index b7dced33aad..5ace1555650 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2993,6 +2993,10 @@ enum rs6000_builtins
PAIRED_BUILTIN_CMPU0,
PAIRED_BUILTIN_CMPU1,
+ RS6000_BUILTIN_RECIP,
+ RS6000_BUILTIN_RECIPF,
+ RS6000_BUILTIN_RSQRTF,
+
RS6000_BUILTIN_COUNT
};
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index debacdc74b4..954dfec6f0c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -98,6 +98,7 @@
(UNSPEC_DLMZB 45)
(UNSPEC_DLMZB_CR 46)
(UNSPEC_DLMZB_STRLEN 47)
+ (UNSPEC_RSQRT 48)
])
;;
@@ -5197,26 +5198,12 @@
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")])
-(define_insn "fres"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
- "TARGET_PPC_GFXOPT && flag_finite_math_only"
- "fres %0,%1"
- [(set_attr "type" "fp")])
-
(define_expand "divsf3"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
(match_operand:SF 2 "gpc_reg_operand" "")))]
"TARGET_HARD_FLOAT"
-{
- if (swdiv && !optimize_size && TARGET_PPC_GFXOPT
- && flag_finite_math_only && !flag_trapping_math)
- {
- rs6000_emit_swdivsf (operands[0], operands[1], operands[2]);
- DONE;
- }
-})
+ "")
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
@@ -5234,6 +5221,25 @@
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
+(define_expand "recipsf3"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")]
+ UNSPEC_FRES))]
+ "TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math"
+{
+ rs6000_emit_swdivsf (operands[0], operands[1], operands[2]);
+ DONE;
+})
+
+(define_insn "fres"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+ "TARGET_PPC_GFXOPT && flag_finite_math_only"
+ "fres %0,%1"
+ [(set_attr "type" "fp")])
+
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5368,6 +5374,25 @@
"fsqrt %0,%1"
[(set_attr "type" "dsqrt")])
+(define_expand "rsqrtsf2"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
+ UNSPEC_RSQRT))]
+ "TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math"
+{
+ rs6000_emit_swrsqrtsf (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*rsqrt_internal1"
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
+ UNSPEC_RSQRT))]
+ "TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT"
+ "frsqrte %0,%1"
+ [(set_attr "type" "fp")])
+
(define_expand "copysignsf3"
[(set (match_dup 3)
(abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))
@@ -5599,26 +5624,12 @@
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")])
-(define_insn "fred"
- [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
- (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
- "TARGET_POPCNTB && flag_finite_math_only"
- "fre %0,%1"
- [(set_attr "type" "fp")])
-
(define_expand "divdf3"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
(match_operand:DF 2 "gpc_reg_operand" "")))]
"TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)"
-{
- if (swdiv && !optimize_size && TARGET_POPCNTB
- && flag_finite_math_only && !flag_trapping_math)
- {
- rs6000_emit_swdivdf (operands[0], operands[1], operands[2]);
- DONE;
- }
-})
+ "")
(define_insn "*divdf3_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
@@ -5628,6 +5639,25 @@
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
+(define_expand "recipdf3"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")
+ (match_operand:DF 2 "gpc_reg_operand" "f")]
+ UNSPEC_FRES))]
+ "TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_POPCNTB && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math"
+{
+ rs6000_emit_swdivdf (operands[0], operands[1], operands[2]);
+ DONE;
+})
+
+(define_insn "fred"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+ "TARGET_POPCNTB && flag_finite_math_only"
+ "fre %0,%1"
+ [(set_attr "type" "fp")])
+
(define_insn ""
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 49800633935..c552a03c8fa 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -146,9 +146,9 @@ mxl-compat
Target Report Var(TARGET_XL_COMPAT)
Conform more closely to IBM XLC semantics
-mswdiv
-Target Report Var(swdiv)
-Generate software floating point divide for better throughput
+mrecip
+Target Report Var(TARGET_RECIP)
+Generate software reciprocal sqrt for better throughput
mno-fp-in-toc
Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC)