diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/arm/arm.c | 17 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 15 | ||||
-rw-r--r-- | gcc/config/arm/predicates.md | 14 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 229 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 13 | ||||
-rw-r--r-- | gcc/config/i386/predicates.md | 9 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 124 | ||||
-rw-r--r-- | gcc/config/rs6000/aix.h | 3 | ||||
-rw-r--r-- | gcc/config/rx/rx.md | 10 | ||||
-rw-r--r-- | gcc/config/s390/s390.c | 20 | ||||
-rw-r--r-- | gcc/config/s390/s390.md | 27 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.md | 99 |
15 files changed, 465 insertions, 138 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 51614396da4..2feac6f45e1 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -6550,9 +6550,26 @@ arm_legitimize_reload_address (rtx *p, int opnum, int type, int ind_levels ATTRIBUTE_UNUSED) { + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (*p) == PLUS + && GET_CODE (XEXP (*p, 0)) == PLUS + && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG + && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT + && GET_CODE (XEXP (*p, 1)) == CONST_INT) + { + push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, + MODE_BASE_REG_CLASS (mode), GET_MODE (*p), + VOIDmode, 0, 0, opnum, (enum reload_type) type); + return true; + } + if (GET_CODE (*p) == PLUS && GET_CODE (XEXP (*p, 0)) == REG && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) + /* If the base register is equivalent to a constant, let the generic + code handle it. Otherwise we will run into problems if a future + reload pass decides to rematerialize the constant. */ + && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0))) && GET_CODE (XEXP (*p, 1)) == CONST_INT) { HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index baa7eb41b4e..a78ba88f8ce 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -1213,27 +1213,24 @@ ; ??? Check Thumb-2 split length (define_insn_and_split "*arm_subsi3_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r,r") - (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n,r") - (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r,?n")))] + [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r") + (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n") + (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))] "TARGET_32BIT" "@ rsb%?\\t%0, %2, %1 sub%?\\t%0, %1, %2 sub%?\\t%0, %1, %2 - # #" - "&& ((GET_CODE (operands[1]) == CONST_INT - && !const_ok_for_arm (INTVAL (operands[1]))) - || (GET_CODE (operands[2]) == CONST_INT - && !const_ok_for_arm (INTVAL (operands[2]))))" + "&& (GET_CODE (operands[1]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[1])))" [(clobber (const_int 0))] " arm_split_constant (MINUS, SImode, curr_insn, INTVAL (operands[1]), operands[0], operands[2], 0); DONE; " - [(set_attr "length" "4,4,4,16,16") + [(set_attr "length" "4,4,4,16") (set_attr "predicable" "yes")] ) diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 27ba6033f78..2c1a138b0df 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -129,11 +129,12 @@ (ior (match_operand 0 "arm_rhs_operand") (match_operand 0 "memory_operand"))) +;; This doesn't have to do much because the constant is already checked +;; in the shift_operator predicate. (define_predicate "shift_amount_operand" (ior (and (match_test "TARGET_ARM") (match_operand 0 "s_register_operand")) - (and (match_code "const_int") - (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 32")))) + (match_operand 0 "const_int_operand"))) (define_predicate "arm_add_operand" (ior (match_operand 0 "arm_rhs_operand") @@ -219,13 +220,20 @@ (match_test "mode == GET_MODE (op)"))) ;; True for shift operators. +;; Notes: +;; * mult is only permitted with a constant shift amount +;; * patterns that permit register shift amounts only in ARM mode use +;; shift_amount_operand, patterns that always allow registers do not, +;; so we don't have to worry about that sort of thing here. (define_special_predicate "shift_operator" (and (ior (ior (and (match_code "mult") (match_test "power_of_two_operand (XEXP (op, 1), mode)")) (and (match_code "rotate") (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) - (match_code "ashift,ashiftrt,lshiftrt,rotatert")) + (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") + (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT + || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) (match_test "mode == GET_MODE (op)"))) ;; True for shift operators which can be used with saturation instructions. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 0bbfa9b735a..eea038e3ab4 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -123,7 +123,7 @@ extern bool ix86_expand_int_movcc (rtx[]); extern bool ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); -extern void ix86_expand_vshuffle (rtx[]); +extern void ix86_expand_vec_perm (rtx[]); extern void ix86_expand_sse_unpack (rtx[], bool, bool); extern bool ix86_expand_int_addcc (rtx[]); extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index faad3a53b88..21ce9b26ce9 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3057,6 +3057,22 @@ ix86_option_override_internal (bool main_args_p) PTA_64BIT /* flags are only used for -march switch. */ }, }; + /* -mrecip options. */ + static struct + { + const char *string; /* option name */ + unsigned int mask; /* mask bits to set */ + } + const recip_options[] = + { + { "all", RECIP_MASK_ALL }, + { "none", RECIP_MASK_NONE }, + { "div", RECIP_MASK_DIV }, + { "sqrt", RECIP_MASK_SQRT }, + { "vec-div", RECIP_MASK_VEC_DIV }, + { "vec-sqrt", RECIP_MASK_VEC_SQRT }, + }; + int const pta_size = ARRAY_SIZE (processor_alias_table); /* Set up prefix/suffix so the error messages refer to either the command @@ -3814,6 +3830,56 @@ ix86_option_override_internal (bool main_args_p) target_flags &= ~MASK_VZEROUPPER; } + if (ix86_recip_name) + { + char *p = ASTRDUP (ix86_recip_name); + char *q; + unsigned int mask, i; + bool invert; + + while ((q = strtok (p, ",")) != NULL) + { + p = NULL; + if (*q == '!') + { + invert = true; + q++; + } + else + invert = false; + + if (!strcmp (q, "default")) + mask = RECIP_MASK_ALL; + else + { + for (i = 0; i < ARRAY_SIZE (recip_options); i++) + if (!strcmp (q, recip_options[i].string)) + { + mask = recip_options[i].mask; + break; + } + + if (i == ARRAY_SIZE (recip_options)) + { + error ("unknown option for -mrecip=%s", q); + invert = false; + mask = RECIP_MASK_NONE; + } + } + + recip_mask_explicit |= mask; + if (invert) + recip_mask &= ~mask; + else + recip_mask |= mask; + } + } + + if (TARGET_RECIP) + recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit; + else if (target_flags_explicit & MASK_RECIP) + recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit); + /* Save the initial options in case the user does function specific options. */ if (main_args_p) @@ -3946,6 +4012,7 @@ ix86_function_specific_save (struct cl_target_option *ptr) ptr->arch_specified = ix86_arch_specified; ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit; ptr->ix86_target_flags_explicit = target_flags_explicit; + ptr->x_recip_mask_explicit = recip_mask_explicit; /* The fields are char but the variables are not; make sure the values fit in the fields. */ @@ -3973,6 +4040,7 @@ ix86_function_specific_restore (struct cl_target_option *ptr) ix86_arch_specified = ptr->arch_specified; ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; target_flags_explicit = ptr->ix86_target_flags_explicit; + recip_mask_explicit = ptr->x_recip_mask_explicit; /* Recreate the arch feature tests if the arch changed */ if (old_arch != ix86_arch) @@ -15730,6 +15798,12 @@ ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, if (MEM_P (src1) && !rtx_equal_p (dst, src1)) src1 = force_reg (mode, src1); + /* Improve address combine. */ + if (code == PLUS + && GET_MODE_CLASS (mode) == MODE_INT + && MEM_P (src2)) + src2 = force_reg (mode, src2); + operands[1] = src1; operands[2] = src2; return dst; @@ -18873,7 +18947,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) enum machine_mode mode = GET_MODE (dest); rtx t2, t3, x; - if (vector_all_ones_operand (op_true, GET_MODE (op_true)) + if (vector_all_ones_operand (op_true, mode) && rtx_equal_p (op_false, CONST0_RTX (mode))) { emit_insn (gen_rtx_SET (VOIDmode, dest, cmp)); @@ -19102,7 +19176,8 @@ ix86_expand_fp_vcond (rtx operands[]) bool ix86_expand_int_vcond (rtx operands[]) { - enum machine_mode mode = GET_MODE (operands[0]); + enum machine_mode data_mode = GET_MODE (operands[0]); + enum machine_mode mode = GET_MODE (operands[4]); enum rtx_code code = GET_CODE (operands[3]); bool negate = false; rtx x, cop0, cop1; @@ -19229,32 +19304,150 @@ ix86_expand_int_vcond (rtx operands[]) } } - x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, - operands[1+negate], operands[2-negate]); + /* Allow the comparison to be done in one mode, but the movcc to + happen in another mode. */ + if (data_mode == mode) + { + x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, + operands[1+negate], operands[2-negate]); + } + else + { + gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); + x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]), + code, cop0, cop1, + operands[1+negate], operands[2-negate]); + x = gen_lowpart (data_mode, x); + } ix86_expand_sse_movcc (operands[0], x, operands[1+negate], operands[2-negate]); return true; } +/* Expand a variable vector permutation. */ + void -ix86_expand_vshuffle (rtx operands[]) +ix86_expand_vec_perm (rtx operands[]) { rtx target = operands[0]; rtx op0 = operands[1]; rtx op1 = operands[2]; rtx mask = operands[3]; - rtx vt, vec[16]; + rtx t1, t2, vt, vec[16]; enum machine_mode mode = GET_MODE (op0); enum machine_mode maskmode = GET_MODE (mask); int w, e, i; bool one_operand_shuffle = rtx_equal_p (op0, op1); - gcc_checking_assert (GET_MODE_BITSIZE (mode) == 128); - /* Number of elements in the vector. */ w = GET_MODE_NUNITS (mode); e = GET_MODE_UNIT_SIZE (mode); + gcc_assert (w <= 16); + + if (TARGET_AVX2) + { + if (mode == V4DImode || mode == V4DFmode) + { + /* Unfortunately, the VPERMQ and VPERMPD instructions only support + an constant shuffle operand. With a tiny bit of effort we can + use VPERMD instead. A re-interpretation stall for V4DFmode is + unfortunate but there's no avoiding it. */ + t1 = gen_reg_rtx (V8SImode); + + /* Replicate the low bits of the V4DImode mask into V8SImode: + mask = { A B C D } + t1 = { A A B B C C D D }. */ + for (i = 0; i < 4; ++i) + vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); + vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec)); + vt = force_reg (V8SImode, vt); + mask = gen_lowpart (V8SImode, mask); + emit_insn (gen_avx2_permvarv8si (t1, vt, mask)); + + /* Multiply the shuffle indicies by two. */ + emit_insn (gen_avx2_lshlv8si3 (t1, t1, const1_rtx)); + + /* Add one to the odd shuffle indicies: + t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ + for (i = 0; i < 4; ++i) + { + vec[i * 2] = const0_rtx; + vec[i * 2 + 1] = const1_rtx; + } + vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec)); + vt = force_const_mem (V8SImode, vt); + emit_insn (gen_addv8si3 (t1, t1, vt)); + + /* Continue as if V8SImode was used initially. */ + operands[3] = mask = t1; + target = gen_lowpart (V8SImode, target); + op0 = gen_lowpart (V8SImode, op0); + op1 = gen_lowpart (V8SImode, op1); + maskmode = mode = V8SImode; + w = 8; + e = 4; + } + + switch (mode) + { + case V8SImode: + /* The VPERMD and VPERMPS instructions already properly ignore + the high bits of the shuffle elements. No need for us to + perform an AND ourselves. */ + if (one_operand_shuffle) + emit_insn (gen_avx2_permvarv8si (target, mask, op0)); + else + { + t1 = gen_reg_rtx (V8SImode); + t2 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx2_permvarv8si (t1, mask, op0)); + emit_insn (gen_avx2_permvarv8si (t2, mask, op1)); + goto merge_two; + } + return; + + case V8SFmode: + mask = gen_lowpart (V8SFmode, mask); + if (one_operand_shuffle) + emit_insn (gen_avx2_permvarv8sf (target, mask, op0)); + else + { + t1 = gen_reg_rtx (V8SFmode); + t2 = gen_reg_rtx (V8SFmode); + emit_insn (gen_avx2_permvarv8sf (t1, mask, op0)); + emit_insn (gen_avx2_permvarv8sf (t2, mask, op1)); + goto merge_two; + } + return; + + case V4SImode: + /* By combining the two 128-bit input vectors into one 256-bit + input vector, we can use VPERMD and VPERMPS for the full + two-operand shuffle. */ + t1 = gen_reg_rtx (V8SImode); + t2 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); + emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); + emit_insn (gen_avx2_permvarv8si (t1, t2, t1)); + emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); + return; + + case V4SFmode: + t1 = gen_reg_rtx (V8SFmode); + t2 = gen_reg_rtx (V8SFmode); + mask = gen_lowpart (V4SFmode, mask); + emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); + emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask)); + emit_insn (gen_avx2_permvarv8sf (t1, t2, t1)); + emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); + return; + + default: + gcc_assert (GET_MODE_SIZE (mode) <= 16); + break; + } + } if (TARGET_XOP) { @@ -19326,7 +19519,7 @@ ix86_expand_vshuffle (rtx operands[]) } else { - rtx xops[6], t1, t2; + rtx xops[6]; bool ok; /* Shuffle the two input vectors independently. */ @@ -19335,6 +19528,7 @@ ix86_expand_vshuffle (rtx operands[]) emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); + merge_two: /* Then merge them together. The key is whether any given control element contained a bit set that indicates the second word. */ mask = operands[3]; @@ -19361,9 +19555,9 @@ ix86_expand_vshuffle (rtx operands[]) mask = expand_simple_binop (maskmode, AND, mask, vt, NULL_RTX, 0, OPTAB_DIRECT); - xops[0] = gen_lowpart (maskmode, operands[0]); - xops[1] = gen_lowpart (maskmode, t2); - xops[2] = gen_lowpart (maskmode, t1); + xops[0] = operands[0]; + xops[1] = gen_lowpart (mode, t2); + xops[2] = gen_lowpart (mode, t1); xops[3] = gen_rtx_EQ (maskmode, mask, vt); xops[4] = mask; xops[5] = vt; @@ -25915,7 +26109,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, @@ -25979,7 +26173,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT }, @@ -25990,7 +26184,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT }, @@ -27620,6 +27814,11 @@ ix86_expand_args_builtin (const struct builtin_description *d, rmode = V1TImode; nargs_constant = 1; break; + case V4DI_FTYPE_V4DI_INT_CONVERT: + nargs = 2; + rmode = V2TImode; + nargs_constant = 1; + break; case V8HI_FTYPE_V8HI_INT: case V8HI_FTYPE_V8SF_INT: case V8HI_FTYPE_V4SF_INT: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7d6e05827b0..bd69ec2b5e3 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2315,6 +2315,19 @@ extern void debug_dispatch_window (int); ((FLAGS) & (IX86_CALLCVT_CDECL | IX86_CALLCVT_STDCALL \ | IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) +#define RECIP_MASK_NONE 0x00 +#define RECIP_MASK_DIV 0x01 +#define RECIP_MASK_SQRT 0x02 +#define RECIP_MASK_VEC_DIV 0x04 +#define RECIP_MASK_VEC_SQRT 0x08 +#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \ + | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT) + +#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0) +#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0) +#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0) +#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0) + /* Local variables: version-control: t diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b8a649c2ee4..a11a71b6c2e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -7062,7 +7062,9 @@ "(TARGET_80387 && X87_ENABLE_ARITH (SFmode)) || TARGET_SSE_MATH" { - if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () + if (TARGET_SSE_MATH + && TARGET_RECIP_DIV + && optimize_insn_for_speed_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -13438,7 +13440,9 @@ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" { if (<MODE>mode == SFmode - && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun) + && TARGET_SSE_MATH + && TARGET_RECIP_SQRT + && !optimize_function_for_size_p (cfun) && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 8e4d51b3f9f..43009a3c2a6 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -31,6 +31,15 @@ HOST_WIDE_INT ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAU Variable HOST_WIDE_INT ix86_isa_flags_explicit +TargetVariable +int recip_mask + +Variable +int recip_mask_explicit + +TargetSave +int x_recip_mask_explicit + ;; Definitions to add to the cl_target_option structure ;; -march= processor TargetSave @@ -373,6 +382,10 @@ mrecip Target Report Mask(RECIP) Save Generate reciprocals instead of divss and sqrtss. +mrecip= +Target Report RejectNegative Joined Var(ix86_recip_name) +Control generation of reciprocal estimates. + mcld Target Report Mask(CLD) Save Generate cld instruction in the function prologue. diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 89cc8a75b55..349f5b0c427 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1210,3 +1210,12 @@ return false; return true; }) + +;; Return true if OP is a proper third operand to vpblendw256. +(define_predicate "avx2_pblendw_operand" + (match_code "const_int") +{ + HOST_WIDE_INT val = INTVAL (op); + HOST_WIDE_INT low = val & 0xff; + return val == ((low << 8) | low); +}) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ee9cf0b4218..f135716c583 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -18,13 +18,13 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. -;; All vector modes including V1TImode, used in move patterns. +;; All vector modes including V?TImode, used in move patterns. (define_mode_iterator V16 [(V32QI "TARGET_AVX") V16QI (V16HI "TARGET_AVX") V8HI (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI - V1TI + (V2TI "TARGET_AVX") V1TI (V8SF "TARGET_AVX") V4SF (V4DF "TARGET_AVX") V2DF]) @@ -99,11 +99,13 @@ (define_mode_iterator VI8_AVX2 [(V4DI "TARGET_AVX2") V2DI]) +;; ??? We should probably use TImode instead. (define_mode_iterator VIMAX_AVX2 [(V2TI "TARGET_AVX2") V1TI]) +;; ??? This should probably be dropped in favor of VIMAX_AVX2. (define_mode_iterator SSESCALARMODE - [(V4DI "TARGET_AVX2") TI]) + [(V2TI "TARGET_AVX2") TI]) (define_mode_iterator VI12_AVX2 [(V32QI "TARGET_AVX2") V16QI @@ -147,7 +149,7 @@ (V8HI "ssse3") (V16HI "avx2") (V4SI "ssse3") (V8SI "avx2") (V2DI "ssse3") (V4DI "avx2") - (TI "ssse3")]) + (TI "ssse3") (V2TI "avx2")]) (define_mode_attr sse4_1_avx2 [(V16QI "sse4_1") (V32QI "avx2") @@ -230,19 +232,16 @@ (V4SF "V4SF") (V2DF "V2DF") (TI "TI")]) -;; All 128bit vector modes -(define_mode_attr sseshuffint - [(V16QI "V16QI") (V8HI "V8HI") - (V4SI "V4SI") (V2DI "V2DI") - (V4SF "V4SI") (V2DF "V2DI")]) - ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr sseintvecmode [(V8SF "V8SI") (V4DF "V4DI") (V4SF "V4SI") (V2DF "V2DI") (V4DF "V4DI") (V8SF "V8SI") (V8SI "V8SI") (V4DI "V4DI") - (V4SI "V4SI") (V2DI "V2DI")]) + (V4SI "V4SI") (V2DI "V2DI") + (V16HI "V16HI") (V8HI "V8HI") + (V32QI "V32QI") (V16QI "V16QI") + ]) ;; Mapping of vector modes to a vector mode of double size (define_mode_attr ssedoublevecmode @@ -779,7 +778,9 @@ { ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () + if (TARGET_SSE_MATH + && TARGET_RECIP_VEC_DIV + && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -857,7 +858,9 @@ (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))] "TARGET_SSE" { - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () + if (TARGET_SSE_MATH + && TARGET_RECIP_VEC_SQRT + && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -5648,21 +5651,6 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx2_lshrqv4di3" - [(set (match_operand:V4DI 0 "register_operand" "=x") - (lshiftrt:V4DI - (match_operand:V4DI 1 "register_operand" "x") - (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] - "TARGET_AVX2" -{ - operands[2] = GEN_INT (INTVAL (operands[2]) / 8); - return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; -} - [(set_attr "type" "sseishft") - (set_attr "prefix" "vex") - (set_attr "length_immediate" "1") - (set_attr "mode" "OI")]) - (define_insn "lshr<mode>3" [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") (lshiftrt:VI248_AVX2 @@ -5682,20 +5670,6 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx2_lshlqv4di3" - [(set (match_operand:V4DI 0 "register_operand" "=x") - (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x") - (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] - "TARGET_AVX2" -{ - operands[2] = GEN_INT (INTVAL (operands[2]) / 8); - return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; -} - [(set_attr "type" "sseishft") - (set_attr "prefix" "vex") - (set_attr "length_immediate" "1") - (set_attr "mode" "OI")]) - (define_insn "avx2_lshl<mode>3" [(set (match_operand:VI248_256 0 "register_operand" "=x") (ashift:VI248_256 @@ -6222,14 +6196,22 @@ DONE; }) -(define_expand "vshuffle<mode>" - [(match_operand:V_128 0 "register_operand" "") - (match_operand:V_128 1 "register_operand" "") - (match_operand:V_128 2 "register_operand" "") - (match_operand:<sseshuffint> 3 "register_operand" "")] - "TARGET_SSSE3 || TARGET_AVX" +;; ??? Irritatingly, the 256-bit VPSHUFB only shuffles within the 128-bit +;; lanes. For now, we don't try to support V32QI or V16HImode. So we +;; don't want to use VI_AVX2. +(define_mode_iterator VEC_PERM_AVX2 + [V16QI V8HI V4SI V2DI V4SF V2DF + (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") + (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")]) + +(define_expand "vec_perm<mode>" + [(match_operand:VEC_PERM_AVX2 0 "register_operand" "") + (match_operand:VEC_PERM_AVX2 1 "register_operand" "") + (match_operand:VEC_PERM_AVX2 2 "register_operand" "") + (match_operand:<sseintvecmode> 3 "register_operand" "")] + "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP" { - ix86_expand_vshuffle (operands); + ix86_expand_vec_perm (operands); DONE; }) @@ -9435,11 +9417,11 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<sse4_1_avx2>_pblendw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") - (vec_merge:VI2_AVX2 - (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:VI2_AVX2 1 "register_operand" "0,x") +(define_insn "sse4_1_pblendw" + [(set (match_operand:V8HI 0 "register_operand" "=x,x") + (vec_merge:V8HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") + (match_operand:V8HI 1 "register_operand" "0,x") (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] "TARGET_SSE4_1" "@ @@ -9450,7 +9432,37 @@ (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") - (set_attr "mode" "<sseinsnmode>")]) + (set_attr "mode" "TI")]) + +;; The builtin uses an 8-bit immediate. Expand that. +(define_expand "avx2_pblendw" + [(set (match_operand:V16HI 0 "register_operand" "") + (vec_merge:V16HI + (match_operand:V16HI 2 "nonimmediate_operand" "") + (match_operand:V16HI 1 "register_operand" "") + (match_operand:SI 3 "const_0_to_255_operand" "")))] + "TARGET_AVX2" +{ + HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; + operands[3] = GEN_INT (val << 8 | val); +}) + +(define_insn "*avx2_pblendw" + [(set (match_operand:V16HI 0 "register_operand" "=x") + (vec_merge:V16HI + (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (match_operand:V16HI 1 "register_operand" "x") + (match_operand:SI 3 "avx2_pblendw_operand" "n")))] + "TARGET_AVX2" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); + return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; +} + [(set_attr "type" "ssemov") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) (define_insn "avx2_pblendd<mode>" [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") @@ -12393,7 +12405,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "*vec_concat<mode>_avx" +(define_insn "avx_vec_concat<mode>" [(set (match_operand:V_256 0 "register_operand" "=x,x") (vec_concat:V_256 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x") diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h index 2c678a3a247..29eabbb6fd8 100644 --- a/gcc/config/rs6000/aix.h +++ b/gcc/config/rs6000/aix.h @@ -97,6 +97,9 @@ { \ builtin_define ("_IBMR2"); \ builtin_define ("_POWER"); \ + builtin_define ("__powerpc__"); \ + builtin_define ("__PPC__"); \ + builtin_define ("__unix__"); \ builtin_define ("_AIX"); \ builtin_define ("_AIX32"); \ builtin_define ("_AIX41"); \ diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 708f9444ca9..a55b62f8f57 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -1199,11 +1199,11 @@ (set_attr "timings" "11,11,11,11,11,33")] ) -(define_insn "smin<int_modes:mode>3" - [(set (match_operand:int_modes 0 "register_operand" "=r,r,r,r,r,r") - (smin:int_modes (match_operand:int_modes 1 "register_operand" "%0,0,0,0,0,0") - (match_operand:int_modes 2 "rx_source_operand" - "r,Sint08,Sint16,Sint24,i,Q")))] +(define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r") + (smin:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:SI 2 "rx_source_operand" + "r,Sint08,Sint16,Sint24,i,Q")))] "" "min\t%Q2, %0" [(set_attr "length" "3,4,5,6,7,6") diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 22f05f9fd21..ffca91a107a 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -3604,7 +3604,8 @@ s390_emit_tls_call_insn (rtx result_reg, rtx tls_call) { rtx insn; - gcc_assert (flag_pic); + if (!flag_pic) + emit_insn (s390_load_got ()); if (!s390_tls_symbol) s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset"); @@ -7859,6 +7860,12 @@ s390_load_got (void) { rtx insns; + /* We cannot use pic_offset_table_rtx here since we use this + function also for non-pic if __tls_get_offset is called and in + that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx + aren't usable. */ + rtx got_rtx = gen_rtx_REG (Pmode, 12); + if (!got_symbol) { got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); @@ -7869,7 +7876,7 @@ s390_load_got (void) if (TARGET_CPU_ZARCH) { - emit_move_insn (pic_offset_table_rtx, got_symbol); + emit_move_insn (got_rtx, got_symbol); } else { @@ -7880,13 +7887,13 @@ s390_load_got (void) offset = gen_rtx_CONST (Pmode, offset); offset = force_const_mem (Pmode, offset); - emit_move_insn (pic_offset_table_rtx, offset); + emit_move_insn (got_rtx, offset); offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)), UNSPEC_LTREL_BASE); - offset = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, offset); + offset = gen_rtx_PLUS (Pmode, got_rtx, offset); - emit_move_insn (pic_offset_table_rtx, offset); + emit_move_insn (got_rtx, offset); } insns = get_insns (); @@ -9827,8 +9834,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, /* s390_function_ok_for_sibcall should have denied sibcalls in this case. */ gcc_assert (retaddr_reg != NULL_RTX); - - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12)); } return insn; } diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 018f6b49214..4c9a40f6e8e 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -367,6 +367,10 @@ (define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")]) (define_mode_iterator W [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")]) +;; Used by the umul pattern to express modes having half the size. +(define_mode_attr DWH [(TI "DI") (DI "SI")]) +(define_mode_attr dwh [(TI "di") (DI "si")]) + ;; This mode iterator allows the QI and HI patterns to be defined from ;; the same template. (define_mode_iterator HQI [HI QI]) @@ -5456,21 +5460,22 @@ (set_attr "cpu_facility" "*,*,z10")]) ; -; umulsidi3 instruction pattern(s). +; umul instruction pattern(s). ; -(define_insn "umulsidi3" - [(set (match_operand:DI 0 "register_operand" "=d,d") - (mult:DI (zero_extend:DI - (match_operand:SI 1 "register_operand" "%0,0")) - (zero_extend:DI - (match_operand:SI 2 "nonimmediate_operand" "d,RT"))))] - "!TARGET_ZARCH && TARGET_CPU_ZARCH" +; mlr, ml, mlgr, mlg +(define_insn "umul<dwh><mode>3" + [(set (match_operand:DW 0 "register_operand" "=d, d") + (mult:DW (zero_extend:DW + (match_operand:<DWH> 1 "register_operand" "%0, 0")) + (zero_extend:DW + (match_operand:<DWH> 2 "nonimmediate_operand" " d,RT"))))] + "TARGET_CPU_ZARCH" "@ - mlr\t%0,%2 - ml\t%0,%2" + ml<tg>r\t%0,%2 + ml<tg>\t%0,%2" [(set_attr "op_type" "RRE,RXY") - (set_attr "type" "imulsi")]) + (set_attr "type" "imul<dwh>")]) ; ; mul(tf|df|sf|td|dd)3 instruction pattern(s). diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 15552b2b1c9..a6eba6ca3a6 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -206,8 +206,6 @@ (define_mode_iterator V64N8 [V2SI V4HI]) -(define_mode_iterator SIDI [SI DI]) - ;; The upper 32 fp regs on the v9 can't hold SFmode values. To deal with this ;; a second register class, EXTRA_FP_REGS, exists for the v9 chip. The name ;; is a bit of a misnomer as it covers all 64 fp regs. The corresponding @@ -6806,36 +6804,24 @@ [(set_attr "type" "multi") (set_attr "length" "8")]) -(define_expand "popcount<mode>2" - [(set (match_operand:SIDI 0 "register_operand" "") - (popcount:SIDI (match_operand:SIDI 1 "register_operand" "")))] +(define_expand "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "") + (popcount:DI (match_operand:DI 1 "register_operand" "")))] "TARGET_POPC" { if (! TARGET_ARCH64) { - emit_insn (gen_popcount<mode>_v8plus (operands[0], operands[1])); + emit_insn (gen_popcountdi_v8plus (operands[0], operands[1])); DONE; } }) -(define_insn "*popcount<mode>_sp64" - [(set (match_operand:SIDI 0 "register_operand" "=r") - (popcount:SIDI (match_operand:SIDI 1 "register_operand" "r")))] +(define_insn "*popcountdi_sp64" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "register_operand" "r")))] "TARGET_POPC && TARGET_ARCH64" "popc\t%1, %0") -(define_insn "popcountsi_v8plus" - [(set (match_operand:SI 0 "register_operand" "=r") - (popcount:SI (match_operand:SI 1 "register_operand" "r")))] - "TARGET_POPC && ! TARGET_ARCH64" -{ - if (sparc_check_64 (operands[1], insn) <= 0) - output_asm_insn ("srl\t%1, 0, %1", operands); - return "popc\t%1, %0"; -} - [(set_attr "type" "multi") - (set_attr "length" "2")]) - (define_insn "popcountdi_v8plus" [(set (match_operand:DI 0 "register_operand" "=r") (popcount:DI (match_operand:DI 1 "register_operand" "r"))) @@ -6849,14 +6835,49 @@ [(set_attr "type" "multi") (set_attr "length" "5")]) -(define_expand "clz<mode>2" - [(set (match_operand:SIDI 0 "register_operand" "") - (clz:SIDI (match_operand:SIDI 1 "register_operand" "")))] +(define_expand "popcountsi2" + [(set (match_dup 2) + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (truncate:SI (popcount:DI (match_dup 2))))] + "TARGET_POPC" +{ + if (! TARGET_ARCH64) + { + emit_insn (gen_popcountsi_v8plus (operands[0], operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (DImode); +}) + +(define_insn "*popcountsi_sp64" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (popcount:DI (match_operand:DI 1 "register_operand" "r"))))] + "TARGET_POPC && TARGET_ARCH64" + "popc\t%1, %0") + +(define_insn "popcountsi_v8plus" + [(set (match_operand:SI 0 "register_operand" "=r") + (popcount:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_POPC && ! TARGET_ARCH64" +{ + if (sparc_check_64 (operands[1], insn) <= 0) + output_asm_insn ("srl\t%1, 0, %1", operands); + return "popc\t%1, %0"; +} + [(set_attr "type" "multi") + (set_attr "length" "2")]) + +(define_expand "clzdi2" + [(set (match_operand:DI 0 "register_operand" "") + (clz:DI (match_operand:DI 1 "register_operand" "")))] "TARGET_VIS3" { if (! TARGET_ARCH64) { - emit_insn (gen_clz<mode>_v8plus (operands[0], operands[1])); + emit_insn (gen_clzdi_v8plus (operands[0], operands[1])); DONE; } }) @@ -6880,13 +6901,33 @@ [(set_attr "type" "multi") (set_attr "length" "5")]) +(define_expand "clzsi2" + [(set (match_dup 2) + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (set (match_dup 3) + (truncate:SI (clz:DI (match_dup 2)))) + (set (match_operand:SI 0 "register_operand" "") + (minus:SI (match_dup 3) (const_int 32)))] + "TARGET_VIS3" +{ + if (! TARGET_ARCH64) + { + emit_insn (gen_clzsi_v8plus (operands[0], operands[1])); + DONE; + } + else + { + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (SImode); + } +}) + (define_insn "*clzsi_sp64" [(set (match_operand:SI 0 "register_operand" "=r") - (clz:SI (match_operand:SI 1 "register_operand" "r")))] + (truncate:SI + (clz:DI (match_operand:DI 1 "register_operand" "r"))))] "TARGET_VIS3 && TARGET_ARCH64" - "lzd\t%1, %0\n\tsub\t%0, 32, %0" - [(set_attr "type" "multi") - (set_attr "length" "2")]) + "lzd\t%1, %0") (define_insn "clzsi_v8plus" [(set (match_operand:SI 0 "register_operand" "=r") |