summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arm/arm.c17
-rw-r--r--gcc/config/arm/arm.md15
-rw-r--r--gcc/config/arm/predicates.md14
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c229
-rw-r--r--gcc/config/i386/i386.h13
-rw-r--r--gcc/config/i386/i386.md8
-rw-r--r--gcc/config/i386/i386.opt13
-rw-r--r--gcc/config/i386/predicates.md9
-rw-r--r--gcc/config/i386/sse.md124
-rw-r--r--gcc/config/rs6000/aix.h3
-rw-r--r--gcc/config/rx/rx.md10
-rw-r--r--gcc/config/s390/s390.c20
-rw-r--r--gcc/config/s390/s390.md27
-rw-r--r--gcc/config/sparc/sparc.md99
15 files changed, 465 insertions, 138 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 51614396da4..2feac6f45e1 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -6550,9 +6550,26 @@ arm_legitimize_reload_address (rtx *p,
int opnum, int type,
int ind_levels ATTRIBUTE_UNUSED)
{
+ /* We must recognize output that we have already generated ourselves. */
+ if (GET_CODE (*p) == PLUS
+ && GET_CODE (XEXP (*p, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
+ && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
+ && GET_CODE (XEXP (*p, 1)) == CONST_INT)
+ {
+ push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+ MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+ VOIDmode, 0, 0, opnum, (enum reload_type) type);
+ return true;
+ }
+
if (GET_CODE (*p) == PLUS
&& GET_CODE (XEXP (*p, 0)) == REG
&& ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
+ /* If the base register is equivalent to a constant, let the generic
+ code handle it. Otherwise we will run into problems if a future
+ reload pass decides to rematerialize the constant. */
+ && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
&& GET_CODE (XEXP (*p, 1)) == CONST_INT)
{
HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index baa7eb41b4e..a78ba88f8ce 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1213,27 +1213,24 @@
; ??? Check Thumb-2 split length
(define_insn_and_split "*arm_subsi3_insn"
- [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r,r")
- (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n,r")
- (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r,?n")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r")
+ (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n")
+ (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))]
"TARGET_32BIT"
"@
rsb%?\\t%0, %2, %1
sub%?\\t%0, %1, %2
sub%?\\t%0, %1, %2
- #
#"
- "&& ((GET_CODE (operands[1]) == CONST_INT
- && !const_ok_for_arm (INTVAL (operands[1])))
- || (GET_CODE (operands[2]) == CONST_INT
- && !const_ok_for_arm (INTVAL (operands[2]))))"
+ "&& (GET_CODE (operands[1]) == CONST_INT
+ && !const_ok_for_arm (INTVAL (operands[1])))"
[(clobber (const_int 0))]
"
arm_split_constant (MINUS, SImode, curr_insn,
INTVAL (operands[1]), operands[0], operands[2], 0);
DONE;
"
- [(set_attr "length" "4,4,4,16,16")
+ [(set_attr "length" "4,4,4,16")
(set_attr "predicable" "yes")]
)
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 27ba6033f78..2c1a138b0df 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -129,11 +129,12 @@
(ior (match_operand 0 "arm_rhs_operand")
(match_operand 0 "memory_operand")))
+;; This doesn't have to do much because the constant is already checked
+;; in the shift_operator predicate.
(define_predicate "shift_amount_operand"
(ior (and (match_test "TARGET_ARM")
(match_operand 0 "s_register_operand"))
- (and (match_code "const_int")
- (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 32"))))
+ (match_operand 0 "const_int_operand")))
(define_predicate "arm_add_operand"
(ior (match_operand 0 "arm_rhs_operand")
@@ -219,13 +220,20 @@
(match_test "mode == GET_MODE (op)")))
;; True for shift operators.
+;; Notes:
+;; * mult is only permitted with a constant shift amount
+;; * patterns that permit register shift amounts only in ARM mode use
+;; shift_amount_operand, patterns that always allow registers do not,
+;; so we don't have to worry about that sort of thing here.
(define_special_predicate "shift_operator"
(and (ior (ior (and (match_code "mult")
(match_test "power_of_two_operand (XEXP (op, 1), mode)"))
(and (match_code "rotate")
(match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
- (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
+ (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
(match_test "mode == GET_MODE (op)")))
;; True for shift operators which can be used with saturation instructions.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 0bbfa9b735a..eea038e3ab4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -123,7 +123,7 @@ extern bool ix86_expand_int_movcc (rtx[]);
extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
-extern void ix86_expand_vshuffle (rtx[]);
+extern void ix86_expand_vec_perm (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index faad3a53b88..21ce9b26ce9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3057,6 +3057,22 @@ ix86_option_override_internal (bool main_args_p)
PTA_64BIT /* flags are only used for -march switch. */ },
};
+ /* -mrecip options. */
+ static struct
+ {
+ const char *string; /* option name */
+ unsigned int mask; /* mask bits to set */
+ }
+ const recip_options[] =
+ {
+ { "all", RECIP_MASK_ALL },
+ { "none", RECIP_MASK_NONE },
+ { "div", RECIP_MASK_DIV },
+ { "sqrt", RECIP_MASK_SQRT },
+ { "vec-div", RECIP_MASK_VEC_DIV },
+ { "vec-sqrt", RECIP_MASK_VEC_SQRT },
+ };
+
int const pta_size = ARRAY_SIZE (processor_alias_table);
/* Set up prefix/suffix so the error messages refer to either the command
@@ -3814,6 +3830,56 @@ ix86_option_override_internal (bool main_args_p)
target_flags &= ~MASK_VZEROUPPER;
}
+ if (ix86_recip_name)
+ {
+ char *p = ASTRDUP (ix86_recip_name);
+ char *q;
+ unsigned int mask, i;
+ bool invert;
+
+ while ((q = strtok (p, ",")) != NULL)
+ {
+ p = NULL;
+ if (*q == '!')
+ {
+ invert = true;
+ q++;
+ }
+ else
+ invert = false;
+
+ if (!strcmp (q, "default"))
+ mask = RECIP_MASK_ALL;
+ else
+ {
+ for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+ if (!strcmp (q, recip_options[i].string))
+ {
+ mask = recip_options[i].mask;
+ break;
+ }
+
+ if (i == ARRAY_SIZE (recip_options))
+ {
+ error ("unknown option for -mrecip=%s", q);
+ invert = false;
+ mask = RECIP_MASK_NONE;
+ }
+ }
+
+ recip_mask_explicit |= mask;
+ if (invert)
+ recip_mask &= ~mask;
+ else
+ recip_mask |= mask;
+ }
+ }
+
+ if (TARGET_RECIP)
+ recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
+ else if (target_flags_explicit & MASK_RECIP)
+ recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
+
/* Save the initial options in case the user does function specific
options. */
if (main_args_p)
@@ -3946,6 +4012,7 @@ ix86_function_specific_save (struct cl_target_option *ptr)
ptr->arch_specified = ix86_arch_specified;
ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
ptr->ix86_target_flags_explicit = target_flags_explicit;
+ ptr->x_recip_mask_explicit = recip_mask_explicit;
/* The fields are char but the variables are not; make sure the
values fit in the fields. */
@@ -3973,6 +4040,7 @@ ix86_function_specific_restore (struct cl_target_option *ptr)
ix86_arch_specified = ptr->arch_specified;
ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
target_flags_explicit = ptr->ix86_target_flags_explicit;
+ recip_mask_explicit = ptr->x_recip_mask_explicit;
/* Recreate the arch feature tests if the arch changed */
if (old_arch != ix86_arch)
@@ -15730,6 +15798,12 @@ ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
if (MEM_P (src1) && !rtx_equal_p (dst, src1))
src1 = force_reg (mode, src1);
+ /* Improve address combine. */
+ if (code == PLUS
+ && GET_MODE_CLASS (mode) == MODE_INT
+ && MEM_P (src2))
+ src2 = force_reg (mode, src2);
+
operands[1] = src1;
operands[2] = src2;
return dst;
@@ -18873,7 +18947,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
enum machine_mode mode = GET_MODE (dest);
rtx t2, t3, x;
- if (vector_all_ones_operand (op_true, GET_MODE (op_true))
+ if (vector_all_ones_operand (op_true, mode)
&& rtx_equal_p (op_false, CONST0_RTX (mode)))
{
emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
@@ -19102,7 +19176,8 @@ ix86_expand_fp_vcond (rtx operands[])
bool
ix86_expand_int_vcond (rtx operands[])
{
- enum machine_mode mode = GET_MODE (operands[0]);
+ enum machine_mode data_mode = GET_MODE (operands[0]);
+ enum machine_mode mode = GET_MODE (operands[4]);
enum rtx_code code = GET_CODE (operands[3]);
bool negate = false;
rtx x, cop0, cop1;
@@ -19229,32 +19304,150 @@ ix86_expand_int_vcond (rtx operands[])
}
}
- x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
- operands[1+negate], operands[2-negate]);
+ /* Allow the comparison to be done in one mode, but the movcc to
+ happen in another mode. */
+ if (data_mode == mode)
+ {
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+ }
+ else
+ {
+ gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
+ x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
+ code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+ x = gen_lowpart (data_mode, x);
+ }
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
operands[2-negate]);
return true;
}
+/* Expand a variable vector permutation. */
+
void
-ix86_expand_vshuffle (rtx operands[])
+ix86_expand_vec_perm (rtx operands[])
{
rtx target = operands[0];
rtx op0 = operands[1];
rtx op1 = operands[2];
rtx mask = operands[3];
- rtx vt, vec[16];
+ rtx t1, t2, vt, vec[16];
enum machine_mode mode = GET_MODE (op0);
enum machine_mode maskmode = GET_MODE (mask);
int w, e, i;
bool one_operand_shuffle = rtx_equal_p (op0, op1);
- gcc_checking_assert (GET_MODE_BITSIZE (mode) == 128);
-
/* Number of elements in the vector. */
w = GET_MODE_NUNITS (mode);
e = GET_MODE_UNIT_SIZE (mode);
+ gcc_assert (w <= 16);
+
+ if (TARGET_AVX2)
+ {
+ if (mode == V4DImode || mode == V4DFmode)
+ {
+ /* Unfortunately, the VPERMQ and VPERMPD instructions only support
+ an constant shuffle operand. With a tiny bit of effort we can
+ use VPERMD instead. A re-interpretation stall for V4DFmode is
+ unfortunate but there's no avoiding it. */
+ t1 = gen_reg_rtx (V8SImode);
+
+ /* Replicate the low bits of the V4DImode mask into V8SImode:
+ mask = { A B C D }
+ t1 = { A A B B C C D D }. */
+ for (i = 0; i < 4; ++i)
+ vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
+ vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec));
+ vt = force_reg (V8SImode, vt);
+ mask = gen_lowpart (V8SImode, mask);
+ emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
+
+ /* Multiply the shuffle indicies by two. */
+ emit_insn (gen_avx2_lshlv8si3 (t1, t1, const1_rtx));
+
+ /* Add one to the odd shuffle indicies:
+ t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
+ for (i = 0; i < 4; ++i)
+ {
+ vec[i * 2] = const0_rtx;
+ vec[i * 2 + 1] = const1_rtx;
+ }
+ vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec));
+ vt = force_const_mem (V8SImode, vt);
+ emit_insn (gen_addv8si3 (t1, t1, vt));
+
+ /* Continue as if V8SImode was used initially. */
+ operands[3] = mask = t1;
+ target = gen_lowpart (V8SImode, target);
+ op0 = gen_lowpart (V8SImode, op0);
+ op1 = gen_lowpart (V8SImode, op1);
+ maskmode = mode = V8SImode;
+ w = 8;
+ e = 4;
+ }
+
+ switch (mode)
+ {
+ case V8SImode:
+ /* The VPERMD and VPERMPS instructions already properly ignore
+ the high bits of the shuffle elements. No need for us to
+ perform an AND ourselves. */
+ if (one_operand_shuffle)
+ emit_insn (gen_avx2_permvarv8si (target, mask, op0));
+ else
+ {
+ t1 = gen_reg_rtx (V8SImode);
+ t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
+ emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
+ goto merge_two;
+ }
+ return;
+
+ case V8SFmode:
+ mask = gen_lowpart (V8SFmode, mask);
+ if (one_operand_shuffle)
+ emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
+ else
+ {
+ t1 = gen_reg_rtx (V8SFmode);
+ t2 = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
+ emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
+ goto merge_two;
+ }
+ return;
+
+ case V4SImode:
+ /* By combining the two 128-bit input vectors into one 256-bit
+ input vector, we can use VPERMD and VPERMPS for the full
+ two-operand shuffle. */
+ t1 = gen_reg_rtx (V8SImode);
+ t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
+ emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
+ emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
+ return;
+
+ case V4SFmode:
+ t1 = gen_reg_rtx (V8SFmode);
+ t2 = gen_reg_rtx (V8SFmode);
+ mask = gen_lowpart (V4SFmode, mask);
+ emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
+ emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
+ emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
+ emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
+ return;
+
+ default:
+ gcc_assert (GET_MODE_SIZE (mode) <= 16);
+ break;
+ }
+ }
if (TARGET_XOP)
{
@@ -19326,7 +19519,7 @@ ix86_expand_vshuffle (rtx operands[])
}
else
{
- rtx xops[6], t1, t2;
+ rtx xops[6];
bool ok;
/* Shuffle the two input vectors independently. */
@@ -19335,6 +19528,7 @@ ix86_expand_vshuffle (rtx operands[])
emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
+ merge_two:
/* Then merge them together. The key is whether any given control
element contained a bit set that indicates the second word. */
mask = operands[3];
@@ -19361,9 +19555,9 @@ ix86_expand_vshuffle (rtx operands[])
mask = expand_simple_binop (maskmode, AND, mask, vt,
NULL_RTX, 0, OPTAB_DIRECT);
- xops[0] = gen_lowpart (maskmode, operands[0]);
- xops[1] = gen_lowpart (maskmode, t2);
- xops[2] = gen_lowpart (maskmode, t1);
+ xops[0] = operands[0];
+ xops[1] = gen_lowpart (mode, t2);
+ xops[2] = gen_lowpart (mode, t1);
xops[3] = gen_rtx_EQ (maskmode, mask, vt);
xops[4] = mask;
xops[5] = vt;
@@ -25915,7 +26109,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
@@ -25979,7 +26173,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
@@ -25990,7 +26184,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
@@ -27620,6 +27814,11 @@ ix86_expand_args_builtin (const struct builtin_description *d,
rmode = V1TImode;
nargs_constant = 1;
break;
+ case V4DI_FTYPE_V4DI_INT_CONVERT:
+ nargs = 2;
+ rmode = V2TImode;
+ nargs_constant = 1;
+ break;
case V8HI_FTYPE_V8HI_INT:
case V8HI_FTYPE_V8SF_INT:
case V8HI_FTYPE_V4SF_INT:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7d6e05827b0..bd69ec2b5e3 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2315,6 +2315,19 @@ extern void debug_dispatch_window (int);
((FLAGS) & (IX86_CALLCVT_CDECL | IX86_CALLCVT_STDCALL \
| IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL))
+#define RECIP_MASK_NONE 0x00
+#define RECIP_MASK_DIV 0x01
+#define RECIP_MASK_SQRT 0x02
+#define RECIP_MASK_VEC_DIV 0x04
+#define RECIP_MASK_VEC_SQRT 0x08
+#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \
+ | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT)
+
+#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0)
+#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0)
+#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0)
+#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0)
+
/*
Local variables:
version-control: t
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b8a649c2ee4..a11a71b6c2e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7062,7 +7062,9 @@
"(TARGET_80387 && X87_ENABLE_ARITH (SFmode))
|| TARGET_SSE_MATH"
{
- if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+ if (TARGET_SSE_MATH
+ && TARGET_RECIP_DIV
+ && optimize_insn_for_speed_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -13438,7 +13440,9 @@
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
if (<MODE>mode == SFmode
- && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun)
+ && TARGET_SSE_MATH
+ && TARGET_RECIP_SQRT
+ && !optimize_function_for_size_p (cfun)
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 8e4d51b3f9f..43009a3c2a6 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -31,6 +31,15 @@ HOST_WIDE_INT ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAU
Variable
HOST_WIDE_INT ix86_isa_flags_explicit
+TargetVariable
+int recip_mask
+
+Variable
+int recip_mask_explicit
+
+TargetSave
+int x_recip_mask_explicit
+
;; Definitions to add to the cl_target_option structure
;; -march= processor
TargetSave
@@ -373,6 +382,10 @@ mrecip
Target Report Mask(RECIP) Save
Generate reciprocals instead of divss and sqrtss.
+mrecip=
+Target Report RejectNegative Joined Var(ix86_recip_name)
+Control generation of reciprocal estimates.
+
mcld
Target Report Mask(CLD) Save
Generate cld instruction in the function prologue.
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 89cc8a75b55..349f5b0c427 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1210,3 +1210,12 @@
return false;
return true;
})
+
+;; Return true if OP is a proper third operand to vpblendw256.
+(define_predicate "avx2_pblendw_operand"
+ (match_code "const_int")
+{
+ HOST_WIDE_INT val = INTVAL (op);
+ HOST_WIDE_INT low = val & 0xff;
+ return val == ((low << 8) | low);
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee9cf0b4218..f135716c583 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18,13 +18,13 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-;; All vector modes including V1TImode, used in move patterns.
+;; All vector modes including V?TImode, used in move patterns.
(define_mode_iterator V16
[(V32QI "TARGET_AVX") V16QI
(V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI
- V1TI
+ (V2TI "TARGET_AVX") V1TI
(V8SF "TARGET_AVX") V4SF
(V4DF "TARGET_AVX") V2DF])
@@ -99,11 +99,13 @@
(define_mode_iterator VI8_AVX2
[(V4DI "TARGET_AVX2") V2DI])
+;; ??? We should probably use TImode instead.
(define_mode_iterator VIMAX_AVX2
[(V2TI "TARGET_AVX2") V1TI])
+;; ??? This should probably be dropped in favor of VIMAX_AVX2.
(define_mode_iterator SSESCALARMODE
- [(V4DI "TARGET_AVX2") TI])
+ [(V2TI "TARGET_AVX2") TI])
(define_mode_iterator VI12_AVX2
[(V32QI "TARGET_AVX2") V16QI
@@ -147,7 +149,7 @@
(V8HI "ssse3") (V16HI "avx2")
(V4SI "ssse3") (V8SI "avx2")
(V2DI "ssse3") (V4DI "avx2")
- (TI "ssse3")])
+ (TI "ssse3") (V2TI "avx2")])
(define_mode_attr sse4_1_avx2
[(V16QI "sse4_1") (V32QI "avx2")
@@ -230,19 +232,16 @@
(V4SF "V4SF") (V2DF "V2DF")
(TI "TI")])
-;; All 128bit vector modes
-(define_mode_attr sseshuffint
- [(V16QI "V16QI") (V8HI "V8HI")
- (V4SI "V4SI") (V2DI "V2DI")
- (V4SF "V4SI") (V2DF "V2DI")])
-
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
(V4SF "V4SI") (V2DF "V2DI")
(V4DF "V4DI") (V8SF "V8SI")
(V8SI "V8SI") (V4DI "V4DI")
- (V4SI "V4SI") (V2DI "V2DI")])
+ (V4SI "V4SI") (V2DI "V2DI")
+ (V16HI "V16HI") (V8HI "V8HI")
+ (V32QI "V32QI") (V16QI "V16QI")
+ ])
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublevecmode
@@ -779,7 +778,9 @@
{
ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (TARGET_SSE_MATH
+ && TARGET_RECIP_VEC_DIV
+ && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -857,7 +858,9 @@
(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
- if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (TARGET_SSE_MATH
+ && TARGET_RECIP_VEC_SQRT
+ && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
@@ -5648,21 +5651,6 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_lshrqv4di3"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
- (lshiftrt:V4DI
- (match_operand:V4DI 1 "register_operand" "x")
- (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
- "TARGET_AVX2"
-{
- operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
- return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
-}
- [(set_attr "type" "sseishft")
- (set_attr "prefix" "vex")
- (set_attr "length_immediate" "1")
- (set_attr "mode" "OI")])
-
(define_insn "lshr<mode>3"
[(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
(lshiftrt:VI248_AVX2
@@ -5682,20 +5670,6 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_lshlqv4di3"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
- (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x")
- (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
- "TARGET_AVX2"
-{
- operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
- return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
-}
- [(set_attr "type" "sseishft")
- (set_attr "prefix" "vex")
- (set_attr "length_immediate" "1")
- (set_attr "mode" "OI")])
-
(define_insn "avx2_lshl<mode>3"
[(set (match_operand:VI248_256 0 "register_operand" "=x")
(ashift:VI248_256
@@ -6222,14 +6196,22 @@
DONE;
})
-(define_expand "vshuffle<mode>"
- [(match_operand:V_128 0 "register_operand" "")
- (match_operand:V_128 1 "register_operand" "")
- (match_operand:V_128 2 "register_operand" "")
- (match_operand:<sseshuffint> 3 "register_operand" "")]
- "TARGET_SSSE3 || TARGET_AVX"
+;; ??? Irritatingly, the 256-bit VPSHUFB only shuffles within the 128-bit
+;; lanes. For now, we don't try to support V32QI or V16HImode. So we
+;; don't want to use VI_AVX2.
+(define_mode_iterator VEC_PERM_AVX2
+ [V16QI V8HI V4SI V2DI V4SF V2DF
+ (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
+ (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
+
+(define_expand "vec_perm<mode>"
+ [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
+ (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
+ (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
+ (match_operand:<sseintvecmode> 3 "register_operand" "")]
+ "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
{
- ix86_expand_vshuffle (operands);
+ ix86_expand_vec_perm (operands);
DONE;
})
@@ -9435,11 +9417,11 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse4_1_avx2>_pblendw"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
- (vec_merge:VI2_AVX2
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")
- (match_operand:VI2_AVX2 1 "register_operand" "0,x")
+(define_insn "sse4_1_pblendw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+ (vec_merge:V8HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:V8HI 1 "register_operand" "0,x")
(match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
"TARGET_SSE4_1"
"@
@@ -9450,7 +9432,37 @@
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set_attr "mode" "TI")])
+
+;; The builtin uses an 8-bit immediate. Expand that.
+(define_expand "avx2_pblendw"
+ [(set (match_operand:V16HI 0 "register_operand" "")
+ (vec_merge:V16HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "")
+ (match_operand:V16HI 1 "register_operand" "")
+ (match_operand:SI 3 "const_0_to_255_operand" "")))]
+ "TARGET_AVX2"
+{
+ HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
+ operands[3] = GEN_INT (val << 8 | val);
+})
+
+(define_insn "*avx2_pblendw"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_merge:V16HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 1 "register_operand" "x")
+ (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
+ "TARGET_AVX2"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
+ return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
(define_insn "avx2_pblendd<mode>"
[(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
@@ -12393,7 +12405,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
-(define_insn "*vec_concat<mode>_avx"
+(define_insn "avx_vec_concat<mode>"
[(set (match_operand:V_256 0 "register_operand" "=x,x")
(vec_concat:V_256
(match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h
index 2c678a3a247..29eabbb6fd8 100644
--- a/gcc/config/rs6000/aix.h
+++ b/gcc/config/rs6000/aix.h
@@ -97,6 +97,9 @@
{ \
builtin_define ("_IBMR2"); \
builtin_define ("_POWER"); \
+ builtin_define ("__powerpc__"); \
+ builtin_define ("__PPC__"); \
+ builtin_define ("__unix__"); \
builtin_define ("_AIX"); \
builtin_define ("_AIX32"); \
builtin_define ("_AIX41"); \
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 708f9444ca9..a55b62f8f57 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -1199,11 +1199,11 @@
(set_attr "timings" "11,11,11,11,11,33")]
)
-(define_insn "smin<int_modes:mode>3"
- [(set (match_operand:int_modes 0 "register_operand" "=r,r,r,r,r,r")
- (smin:int_modes (match_operand:int_modes 1 "register_operand" "%0,0,0,0,0,0")
- (match_operand:int_modes 2 "rx_source_operand"
- "r,Sint08,Sint16,Sint24,i,Q")))]
+(define_insn "sminsi3"
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r")
+ (smin:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+ (match_operand:SI 2 "rx_source_operand"
+ "r,Sint08,Sint16,Sint24,i,Q")))]
""
"min\t%Q2, %0"
[(set_attr "length" "3,4,5,6,7,6")
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 22f05f9fd21..ffca91a107a 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3604,7 +3604,8 @@ s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
{
rtx insn;
- gcc_assert (flag_pic);
+ if (!flag_pic)
+ emit_insn (s390_load_got ());
if (!s390_tls_symbol)
s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
@@ -7859,6 +7860,12 @@ s390_load_got (void)
{
rtx insns;
+ /* We cannot use pic_offset_table_rtx here since we use this
+ function also for non-pic if __tls_get_offset is called and in
+ that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
+ aren't usable. */
+ rtx got_rtx = gen_rtx_REG (Pmode, 12);
+
if (!got_symbol)
{
got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
@@ -7869,7 +7876,7 @@ s390_load_got (void)
if (TARGET_CPU_ZARCH)
{
- emit_move_insn (pic_offset_table_rtx, got_symbol);
+ emit_move_insn (got_rtx, got_symbol);
}
else
{
@@ -7880,13 +7887,13 @@ s390_load_got (void)
offset = gen_rtx_CONST (Pmode, offset);
offset = force_const_mem (Pmode, offset);
- emit_move_insn (pic_offset_table_rtx, offset);
+ emit_move_insn (got_rtx, offset);
offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
UNSPEC_LTREL_BASE);
- offset = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, offset);
+ offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
- emit_move_insn (pic_offset_table_rtx, offset);
+ emit_move_insn (got_rtx, offset);
}
insns = get_insns ();
@@ -9827,8 +9834,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
/* s390_function_ok_for_sibcall should
have denied sibcalls in this case. */
gcc_assert (retaddr_reg != NULL_RTX);
-
- use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
}
return insn;
}
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 018f6b49214..4c9a40f6e8e 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -367,6 +367,10 @@
(define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")])
(define_mode_iterator W [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")])
+;; Used by the umul pattern to express modes having half the size.
+(define_mode_attr DWH [(TI "DI") (DI "SI")])
+(define_mode_attr dwh [(TI "di") (DI "si")])
+
;; This mode iterator allows the QI and HI patterns to be defined from
;; the same template.
(define_mode_iterator HQI [HI QI])
@@ -5456,21 +5460,22 @@
(set_attr "cpu_facility" "*,*,z10")])
;
-; umulsidi3 instruction pattern(s).
+; umul instruction pattern(s).
;
-(define_insn "umulsidi3"
- [(set (match_operand:DI 0 "register_operand" "=d,d")
- (mult:DI (zero_extend:DI
- (match_operand:SI 1 "register_operand" "%0,0"))
- (zero_extend:DI
- (match_operand:SI 2 "nonimmediate_operand" "d,RT"))))]
- "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+; mlr, ml, mlgr, mlg
+(define_insn "umul<dwh><mode>3"
+ [(set (match_operand:DW 0 "register_operand" "=d, d")
+ (mult:DW (zero_extend:DW
+ (match_operand:<DWH> 1 "register_operand" "%0, 0"))
+ (zero_extend:DW
+ (match_operand:<DWH> 2 "nonimmediate_operand" " d,RT"))))]
+ "TARGET_CPU_ZARCH"
"@
- mlr\t%0,%2
- ml\t%0,%2"
+ ml<tg>r\t%0,%2
+ ml<tg>\t%0,%2"
[(set_attr "op_type" "RRE,RXY")
- (set_attr "type" "imulsi")])
+ (set_attr "type" "imul<dwh>")])
;
; mul(tf|df|sf|td|dd)3 instruction pattern(s).
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 15552b2b1c9..a6eba6ca3a6 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -206,8 +206,6 @@
(define_mode_iterator V64N8 [V2SI V4HI])
-(define_mode_iterator SIDI [SI DI])
-
;; The upper 32 fp regs on the v9 can't hold SFmode values. To deal with this
;; a second register class, EXTRA_FP_REGS, exists for the v9 chip. The name
;; is a bit of a misnomer as it covers all 64 fp regs. The corresponding
@@ -6806,36 +6804,24 @@
[(set_attr "type" "multi")
(set_attr "length" "8")])
-(define_expand "popcount<mode>2"
- [(set (match_operand:SIDI 0 "register_operand" "")
- (popcount:SIDI (match_operand:SIDI 1 "register_operand" "")))]
+(define_expand "popcountdi2"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (popcount:DI (match_operand:DI 1 "register_operand" "")))]
"TARGET_POPC"
{
if (! TARGET_ARCH64)
{
- emit_insn (gen_popcount<mode>_v8plus (operands[0], operands[1]));
+ emit_insn (gen_popcountdi_v8plus (operands[0], operands[1]));
DONE;
}
})
-(define_insn "*popcount<mode>_sp64"
- [(set (match_operand:SIDI 0 "register_operand" "=r")
- (popcount:SIDI (match_operand:SIDI 1 "register_operand" "r")))]
+(define_insn "*popcountdi_sp64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_operand:DI 1 "register_operand" "r")))]
"TARGET_POPC && TARGET_ARCH64"
"popc\t%1, %0")
-(define_insn "popcountsi_v8plus"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (popcount:SI (match_operand:SI 1 "register_operand" "r")))]
- "TARGET_POPC && ! TARGET_ARCH64"
-{
- if (sparc_check_64 (operands[1], insn) <= 0)
- output_asm_insn ("srl\t%1, 0, %1", operands);
- return "popc\t%1, %0";
-}
- [(set_attr "type" "multi")
- (set_attr "length" "2")])
-
(define_insn "popcountdi_v8plus"
[(set (match_operand:DI 0 "register_operand" "=r")
(popcount:DI (match_operand:DI 1 "register_operand" "r")))
@@ -6849,14 +6835,49 @@
[(set_attr "type" "multi")
(set_attr "length" "5")])
-(define_expand "clz<mode>2"
- [(set (match_operand:SIDI 0 "register_operand" "")
- (clz:SIDI (match_operand:SIDI 1 "register_operand" "")))]
+(define_expand "popcountsi2"
+ [(set (match_dup 2)
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI (popcount:DI (match_dup 2))))]
+ "TARGET_POPC"
+{
+ if (! TARGET_ARCH64)
+ {
+ emit_insn (gen_popcountsi_v8plus (operands[0], operands[1]));
+ DONE;
+ }
+ else
+ operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_insn "*popcountsi_sp64"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (truncate:SI
+ (popcount:DI (match_operand:DI 1 "register_operand" "r"))))]
+ "TARGET_POPC && TARGET_ARCH64"
+ "popc\t%1, %0")
+
+(define_insn "popcountsi_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_operand:SI 1 "register_operand" "r")))]
+ "TARGET_POPC && ! TARGET_ARCH64"
+{
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl\t%1, 0, %1", operands);
+ return "popc\t%1, %0";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "2")])
+
+(define_expand "clzdi2"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (clz:DI (match_operand:DI 1 "register_operand" "")))]
"TARGET_VIS3"
{
if (! TARGET_ARCH64)
{
- emit_insn (gen_clz<mode>_v8plus (operands[0], operands[1]));
+ emit_insn (gen_clzdi_v8plus (operands[0], operands[1]));
DONE;
}
})
@@ -6880,13 +6901,33 @@
[(set_attr "type" "multi")
(set_attr "length" "5")])
+(define_expand "clzsi2"
+ [(set (match_dup 2)
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (set (match_dup 3)
+ (truncate:SI (clz:DI (match_dup 2))))
+ (set (match_operand:SI 0 "register_operand" "")
+ (minus:SI (match_dup 3) (const_int 32)))]
+ "TARGET_VIS3"
+{
+ if (! TARGET_ARCH64)
+ {
+ emit_insn (gen_clzsi_v8plus (operands[0], operands[1]));
+ DONE;
+ }
+ else
+ {
+ operands[2] = gen_reg_rtx (DImode);
+ operands[3] = gen_reg_rtx (SImode);
+ }
+})
+
(define_insn "*clzsi_sp64"
[(set (match_operand:SI 0 "register_operand" "=r")
- (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+ (truncate:SI
+ (clz:DI (match_operand:DI 1 "register_operand" "r"))))]
"TARGET_VIS3 && TARGET_ARCH64"
- "lzd\t%1, %0\n\tsub\t%0, 32, %0"
- [(set_attr "type" "multi")
- (set_attr "length" "2")])
+ "lzd\t%1, %0")
(define_insn "clzsi_v8plus"
[(set (match_operand:SI 0 "register_operand" "=r")