diff options
author | dje <dje@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-09-24 15:39:18 +0000 |
---|---|---|
committer | dje <dje@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-09-24 15:39:18 +0000 |
commit | 7679d16d42d91bbc96aa7730cc8c644ebf8f0d07 (patch) | |
tree | cba47602080f01479272aef009c94899cd5da45f /gcc/config/rs6000 | |
parent | 743f8dd158dfe7981d3596c8e7c3e742c9fcc990 (diff) | |
download | gcc-7679d16d42d91bbc96aa7730cc8c644ebf8f0d07.tar.gz |
* config/rs6000/rs6000-protos.h (rs6000_emit_swrsqrtsf): Declare.
* config/rs6000/rs6000.opt (swdiv): Change option to ...
(recip): this.
* config/rs6000/rs6000.c (rs6000_builtin_reciprocal): New
function.
(TARGET_BUILTIN_RECIPROCAL): Use it.
(rs6000_builtin_expand): Expand recip, recipf, and rsqrtf.
(rs6000_init_builtins): Initialize recip, recipf, and rsqrtf.
(rs6000_emit_swrsqrtsf): New.
* config/rs6000/rs6000.h (rs6000_builtins): Add recip, recipf, and
rsqrtf.
* config/rs6000/rs6000.md (UNSPEC_RSQRT): Define.
(divsf3): Remove swdiv support.
(recipsf3): New.
(rsqrtsf2): New.
(rsqrt_internal1): New.
(divdf3): Remove swdiv support.
(reciptdf3): New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@128719 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 159 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 90 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 6 |
5 files changed, 221 insertions, 39 deletions
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index c73da36eb9a..799a15ac0a4 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -89,6 +89,7 @@ extern void rs6000_split_compare_and_swapqhi (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_split_lock_test_and_set (rtx, rtx, rtx, rtx); extern void rs6000_emit_swdivsf (rtx, rtx, rtx); extern void rs6000_emit_swdivdf (rtx, rtx, rtx); +extern void rs6000_emit_swrsqrtsf (rtx, rtx); extern void output_toc (FILE *, rtx, int, enum machine_mode); extern void rs6000_initialize_trampoline (rtx, rtx, rtx); extern rtx rs6000_longcall_ref (rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5ada791b7dc..0e9db742af3 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -799,6 +799,7 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int); static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int); static int rs6000_use_sched_lookahead (void); static int rs6000_use_sched_lookahead_guard (rtx); +static tree rs6000_builtin_reciprocal (unsigned int, bool, bool); static tree rs6000_builtin_mask_for_load (void); static tree rs6000_builtin_mul_widen_even (tree); static tree rs6000_builtin_mul_widen_odd (tree); @@ -1213,6 +1214,9 @@ static const char alt_reg_names[][8] = #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p +#undef TARGET_BUILTIN_RECIPROCAL +#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal + struct gcc_target targetm = TARGET_INITIALIZER; @@ -8652,6 +8656,15 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, rtx ret; bool success; + if (fcode == RS6000_BUILTIN_RECIP) + return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target); + + if (fcode == RS6000_BUILTIN_RECIPF) + return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target); + + if (fcode == RS6000_BUILTIN_RSQRTF) + return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target); + if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD || fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) { @@ -8858,6 +8871,31 @@ rs6000_init_builtins (void) altivec_init_builtins (); if (TARGET_ALTIVEC || TARGET_SPE || TARGET_PAIRED_FLOAT) rs6000_common_init_builtins (); + if (TARGET_PPC_GFXOPT) + { + tree ftype = build_function_type_list (float_type_node, + float_type_node, + float_type_node, + NULL_TREE); + def_builtin (MASK_PPC_GFXOPT, "__builtin_recipdivf", ftype, + RS6000_BUILTIN_RECIPF); + + ftype = build_function_type_list (float_type_node, + float_type_node, + NULL_TREE); + def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrtf", ftype, + RS6000_BUILTIN_RSQRTF); + } + if (TARGET_POPCNTB) + { + tree ftype = build_function_type_list (double_type_node, + double_type_node, + double_type_node, + NULL_TREE); + def_builtin (MASK_POPCNTB, "__builtin_recipdiv", ftype, + RS6000_BUILTIN_RECIP); + + } #if TARGET_XCOFF /* AIX libm provides clog as __clog. */ @@ -20874,11 +20912,36 @@ rs6000_memory_move_cost (enum machine_mode mode, enum reg_class class, return 4 + rs6000_register_move_cost (mode, class, GENERAL_REGS); } +/* Returns a code for a target-specific builtin that implements + reciprocal of the function, or NULL_TREE if not available. */ + +static tree +rs6000_builtin_reciprocal (unsigned int fn, bool md_fn, + bool sqrt ATTRIBUTE_UNUSED) +{ + if (! (TARGET_RECIP && TARGET_PPC_GFXOPT && !optimize_size + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations)) + return NULL_TREE; + + if (md_fn) + return NULL_TREE; + else + switch (fn) + { + case BUILT_IN_SQRTF: + return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF]; + + default: + return NULL_TREE; + } +} + /* Newton-Raphson approximation of single-precision floating point divide n/d. Assumes no trapping math and finite arguments. */ void -rs6000_emit_swdivsf (rtx res, rtx n, rtx d) +rs6000_emit_swdivsf (rtx dst, rtx n, rtx d) { rtx x0, e0, e1, y1, u0, v0, one; @@ -20913,8 +20976,8 @@ rs6000_emit_swdivsf (rtx res, rtx n, rtx d) emit_insn (gen_rtx_SET (VOIDmode, v0, gen_rtx_MINUS (SFmode, n, gen_rtx_MULT (SFmode, d, u0)))); - /* res = u0 + v0 * y1 */ - emit_insn (gen_rtx_SET (VOIDmode, res, + /* dst = u0 + v0 * y1 */ + emit_insn (gen_rtx_SET (VOIDmode, dst, gen_rtx_PLUS (SFmode, gen_rtx_MULT (SFmode, v0, y1), u0))); } @@ -20923,7 +20986,7 @@ rs6000_emit_swdivsf (rtx res, rtx n, rtx d) Assumes no trapping math and finite arguments. */ void -rs6000_emit_swdivdf (rtx res, rtx n, rtx d) +rs6000_emit_swdivdf (rtx dst, rtx n, rtx d) { rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one; @@ -20971,13 +21034,97 @@ rs6000_emit_swdivdf (rtx res, rtx n, rtx d) emit_insn (gen_rtx_SET (VOIDmode, v0, gen_rtx_MINUS (DFmode, n, gen_rtx_MULT (DFmode, d, u0)))); - /* res = u0 + v0 * y3 */ - emit_insn (gen_rtx_SET (VOIDmode, res, + /* dst = u0 + v0 * y3 */ + emit_insn (gen_rtx_SET (VOIDmode, dst, gen_rtx_PLUS (DFmode, gen_rtx_MULT (DFmode, v0, y3), u0))); } +/* Newton-Raphson approximation of single-precision floating point rsqrt. + Assumes no trapping math and finite arguments. */ + +void +rs6000_emit_swrsqrtsf (rtx dst, rtx src) +{ + rtx x0, x1, x2, y1, u0, u1, u2, v0, v1, v2, t0, + half, one, halfthree, c1, cond, label; + + x0 = gen_reg_rtx (SFmode); + x1 = gen_reg_rtx (SFmode); + x2 = gen_reg_rtx (SFmode); + y1 = gen_reg_rtx (SFmode); + u0 = gen_reg_rtx (SFmode); + u1 = gen_reg_rtx (SFmode); + u2 = gen_reg_rtx (SFmode); + v0 = gen_reg_rtx (SFmode); + v1 = gen_reg_rtx (SFmode); + v2 = gen_reg_rtx (SFmode); + t0 = gen_reg_rtx (SFmode); + halfthree = gen_reg_rtx (SFmode); + cond = gen_rtx_REG (CCFPmode, CR1_REGNO); + label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + + /* check 0.0, 1.0, NaN, Inf by testing src * src = src */ + emit_insn (gen_rtx_SET (VOIDmode, t0, + gen_rtx_MULT (SFmode, src, src))); + + emit_insn (gen_rtx_SET (VOIDmode, cond, + gen_rtx_COMPARE (CCFPmode, t0, src))); + c1 = gen_rtx_EQ (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (c1, label); + + half = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode)); + one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode)); + + /* halfthree = 1.5 = 1.0 + 0.5 */ + emit_insn (gen_rtx_SET (VOIDmode, halfthree, + gen_rtx_PLUS (SFmode, one, half))); + + /* x0 = rsqrt estimate */ + emit_insn (gen_rtx_SET (VOIDmode, x0, + gen_rtx_UNSPEC (SFmode, gen_rtvec (1, src), + UNSPEC_RSQRT))); + + /* y1 = 0.5 * src = 1.5 * src - src -> fewer constants */ + emit_insn (gen_rtx_SET (VOIDmode, y1, + gen_rtx_MINUS (SFmode, + gen_rtx_MULT (SFmode, src, halfthree), + src))); + + /* x1 = x0 * (1.5 - y1 * (x0 * x0)) */ + emit_insn (gen_rtx_SET (VOIDmode, u0, + gen_rtx_MULT (SFmode, x0, x0))); + emit_insn (gen_rtx_SET (VOIDmode, v0, + gen_rtx_MINUS (SFmode, + halfthree, + gen_rtx_MULT (SFmode, y1, u0)))); + emit_insn (gen_rtx_SET (VOIDmode, x1, + gen_rtx_MULT (SFmode, x0, v0))); + + /* x2 = x1 * (1.5 - y1 * (x1 * x1)) */ + emit_insn (gen_rtx_SET (VOIDmode, u1, + gen_rtx_MULT (SFmode, x1, x1))); + emit_insn (gen_rtx_SET (VOIDmode, v1, + gen_rtx_MINUS (SFmode, + halfthree, + gen_rtx_MULT (SFmode, y1, u1)))); + emit_insn (gen_rtx_SET (VOIDmode, x2, + gen_rtx_MULT (SFmode, x1, v1))); + + /* dst = x2 * (1.5 - y1 * (x2 * x2)) */ + emit_insn (gen_rtx_SET (VOIDmode, u2, + gen_rtx_MULT (SFmode, x2, x2))); + emit_insn (gen_rtx_SET (VOIDmode, v2, + gen_rtx_MINUS (SFmode, + halfthree, + gen_rtx_MULT (SFmode, y1, u2)))); + emit_insn (gen_rtx_SET (VOIDmode, dst, + gen_rtx_MULT (SFmode, x2, v2))); + + emit_label (XEXP (label, 0)); +} + /* Emit popcount intrinsic on TARGET_POPCNTB targets. DST is the target, and SRC is the argument operand. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index b7dced33aad..5ace1555650 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2993,6 +2993,10 @@ enum rs6000_builtins PAIRED_BUILTIN_CMPU0, PAIRED_BUILTIN_CMPU1, + RS6000_BUILTIN_RECIP, + RS6000_BUILTIN_RECIPF, + RS6000_BUILTIN_RSQRTF, + RS6000_BUILTIN_COUNT }; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index debacdc74b4..954dfec6f0c 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -98,6 +98,7 @@ (UNSPEC_DLMZB 45) (UNSPEC_DLMZB_CR 46) (UNSPEC_DLMZB_STRLEN 47) + (UNSPEC_RSQRT 48) ]) ;; @@ -5197,26 +5198,12 @@ "{fm|fmul} %0,%1,%2" [(set_attr "type" "dmul")]) -(define_insn "fres" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] - "TARGET_PPC_GFXOPT && flag_finite_math_only" - "fres %0,%1" - [(set_attr "type" "fp")]) - (define_expand "divsf3" [(set (match_operand:SF 0 "gpc_reg_operand" "") (div:SF (match_operand:SF 1 "gpc_reg_operand" "") (match_operand:SF 2 "gpc_reg_operand" "")))] "TARGET_HARD_FLOAT" -{ - if (swdiv && !optimize_size && TARGET_PPC_GFXOPT - && flag_finite_math_only && !flag_trapping_math) - { - rs6000_emit_swdivsf (operands[0], operands[1], operands[2]); - DONE; - } -}) + "") (define_insn "" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") @@ -5234,6 +5221,25 @@ "{fd|fdiv} %0,%1,%2" [(set_attr "type" "ddiv")]) +(define_expand "recipsf3" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f") + (match_operand:SF 2 "gpc_reg_operand" "f")] + UNSPEC_FRES))] + "TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size + && flag_finite_math_only && !flag_trapping_math" +{ + rs6000_emit_swdivsf (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn "fres" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] + "TARGET_PPC_GFXOPT && flag_finite_math_only" + "fres %0,%1" + [(set_attr "type" "fp")]) + (define_insn "" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f") @@ -5368,6 +5374,25 @@ "fsqrt %0,%1" [(set_attr "type" "dsqrt")]) +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] + UNSPEC_RSQRT))] + "TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size + && flag_finite_math_only && !flag_trapping_math" +{ + rs6000_emit_swrsqrtsf (operands[0], operands[1]); + DONE; +}) + +(define_insn "*rsqrt_internal1" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] + UNSPEC_RSQRT))] + "TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT" + "frsqrte %0,%1" + [(set_attr "type" "fp")]) + (define_expand "copysignsf3" [(set (match_dup 3) (abs:SF (match_operand:SF 1 "gpc_reg_operand" ""))) @@ -5599,26 +5624,12 @@ "{fm|fmul} %0,%1,%2" [(set_attr "type" "dmul")]) -(define_insn "fred" - [(set (match_operand:DF 0 "gpc_reg_operand" "=f") - (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] - "TARGET_POPCNTB && flag_finite_math_only" - "fre %0,%1" - [(set_attr "type" "fp")]) - (define_expand "divdf3" [(set (match_operand:DF 0 "gpc_reg_operand" "") (div:DF (match_operand:DF 1 "gpc_reg_operand" "") (match_operand:DF 2 "gpc_reg_operand" "")))] "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)" -{ - if (swdiv && !optimize_size && TARGET_POPCNTB - && flag_finite_math_only && !flag_trapping_math) - { - rs6000_emit_swdivdf (operands[0], operands[1], operands[2]); - DONE; - } -}) + "") (define_insn "*divdf3_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") @@ -5628,6 +5639,25 @@ "{fd|fdiv} %0,%1,%2" [(set_attr "type" "ddiv")]) +(define_expand "recipdf3" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f") + (match_operand:DF 2 "gpc_reg_operand" "f")] + UNSPEC_FRES))] + "TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_POPCNTB && !optimize_size + && flag_finite_math_only && !flag_trapping_math" +{ + rs6000_emit_swdivdf (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn "fred" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] + "TARGET_POPCNTB && flag_finite_math_only" + "fre %0,%1" + [(set_attr "type" "fp")]) + (define_insn "" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 49800633935..c552a03c8fa 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -146,9 +146,9 @@ mxl-compat Target Report Var(TARGET_XL_COMPAT) Conform more closely to IBM XLC semantics -mswdiv -Target Report Var(swdiv) -Generate software floating point divide for better throughput +mrecip +Target Report Var(TARGET_RECIP) +Generate software reciprocal sqrt for better throughput mno-fp-in-toc Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC) |