diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 367 |
1 files changed, 341 insertions, 26 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 47ee8e154df..799e12b2b14 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10913,15 +10913,28 @@ ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, it looks like we might want one, insert a NOP. */ { rtx insn = get_last_insn (); + rtx deleted_debug_label = NULL_RTX; while (insn && NOTE_P (insn) && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) - insn = PREV_INSN (insn); + { + /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL + notes only, instead set their CODE_LABEL_NUMBER to -1, + otherwise there would be code generation differences + in between -g and -g0. */ + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) + deleted_debug_label = insn; + insn = PREV_INSN (insn); + } if (insn && (LABEL_P (insn) || (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))) fputs ("\tnop\n", file); + else if (deleted_debug_label) + for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) + if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) + CODE_LABEL_NUMBER (insn) = -1; } #endif @@ -16530,6 +16543,29 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[]) return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1); } +/* Return true if we should emit lea instruction instead of mov + instruction. */ + +bool +ix86_use_lea_for_mov (rtx insn, rtx operands[]) +{ + unsigned int regno0; + unsigned int regno1; + + /* Check if we need to optimize. */ + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return false; + + /* Use lea for reg to reg moves only. */ + if (!REG_P (operands[0]) || !REG_P (operands[1])) + return false; + + regno0 = true_regnum (operands[0]); + regno1 = true_regnum (operands[1]); + + return ix86_lea_outperforms (insn, regno0, regno1, -1, 0); +} + /* Return true if we need to split lea into a sequence of instructions to avoid AGU stalls. */ @@ -17050,18 +17086,56 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) emit_move_insn (target, fp_hi); } +/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert + a vector of unsigned ints VAL to vector of floats TARGET. */ + +void +ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) +{ + rtx tmp[8]; + REAL_VALUE_TYPE TWO16r; + enum machine_mode intmode = GET_MODE (val); + enum machine_mode fltmode = GET_MODE (target); + rtx (*cvt) (rtx, rtx); + + if (intmode == V4SImode) + cvt = gen_floatv4siv4sf2; + else + cvt = gen_floatv8siv8sf2; + tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); + tmp[0] = force_reg (intmode, tmp[0]); + tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, + OPTAB_DIRECT); + tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), + NULL_RTX, 1, OPTAB_DIRECT); + tmp[3] = gen_reg_rtx (fltmode); + emit_insn (cvt (tmp[3], tmp[1])); + tmp[4] = gen_reg_rtx (fltmode); + emit_insn (cvt (tmp[4], tmp[2])); + real_ldexp (&TWO16r, &dconst1, 16); + tmp[5] = const_double_from_real_value (TWO16r, SFmode); + tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, + OPTAB_DIRECT); + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, + OPTAB_DIRECT); + if (tmp[7] != target) + emit_move_insn (target, tmp[7]); +} + /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. - This is done by subtracting 0x1p32 from VAL if VAL is greater or equal - (non-signalling) than 0x1p31. */ + This is done by doing just signed conversion if < 0x1p31, and otherwise by + subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ rtx -ix86_expand_adjust_ufix_to_sfix_si (rtx val) +ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) { - REAL_VALUE_TYPE MTWO32r, TWO31r; - rtx two31r, mtwo32r, tmp[3]; + REAL_VALUE_TYPE TWO31r; + rtx two31r, tmp[4]; enum machine_mode mode = GET_MODE (val); enum machine_mode scalarmode = GET_MODE_INNER (mode); + enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; rtx (*cmp) (rtx, rtx, rtx, rtx); int i; @@ -17071,22 +17145,33 @@ ix86_expand_adjust_ufix_to_sfix_si (rtx val) two31r = const_double_from_real_value (TWO31r, scalarmode); two31r = ix86_build_const_vector (mode, 1, two31r); two31r = force_reg (mode, two31r); - real_ldexp (&MTWO32r, &dconstm1, 32); - mtwo32r = const_double_from_real_value (MTWO32r, scalarmode); - mtwo32r = ix86_build_const_vector (mode, 1, mtwo32r); - mtwo32r = force_reg (mode, mtwo32r); switch (mode) { - case V8SFmode: cmp = gen_avx_cmpv8sf3; break; - case V4SFmode: cmp = gen_avx_cmpv4sf3; break; - case V4DFmode: cmp = gen_avx_cmpv4df3; break; - case V2DFmode: cmp = gen_avx_cmpv2df3; break; + case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; + case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; + case V4DFmode: cmp = gen_avx_maskcmpv4df3; break; + case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; default: gcc_unreachable (); } - emit_insn (cmp (tmp[0], val, two31r, GEN_INT (29))); - tmp[1] = expand_simple_binop (mode, AND, tmp[0], mtwo32r, tmp[1], + tmp[3] = gen_rtx_LE (mode, two31r, val); + emit_insn (cmp (tmp[0], two31r, val, tmp[3])); + tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], 0, OPTAB_DIRECT); - return expand_simple_binop (mode, PLUS, val, tmp[1], tmp[2], + if (intmode == V4SImode || TARGET_AVX2) + *xorp = expand_simple_binop (intmode, ASHIFT, + gen_lowpart (intmode, tmp[0]), + GEN_INT (31), NULL_RTX, 0, + OPTAB_DIRECT); + else + { + rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31); + two31 = ix86_build_const_vector (intmode, 1, two31); + *xorp = expand_simple_binop (intmode, AND, + gen_lowpart (intmode, tmp[0]), + two31, NULL_RTX, 0, + OPTAB_DIRECT); + } + return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], 0, OPTAB_DIRECT); } @@ -24779,6 +24864,7 @@ enum ix86_builtins IX86_BUILTIN_VEC_SET_V16QI, IX86_BUILTIN_VEC_PACK_SFIX, + IX86_BUILTIN_VEC_PACK_SFIX256, /* SSE4.2. */ IX86_BUILTIN_CRC32QI, @@ -25139,6 +25225,13 @@ enum ix86_builtins IX86_BUILTIN_GATHERDIV4SI, IX86_BUILTIN_GATHERDIV8SI, + /* Alternate 4 element gather for the vectorizer where + all operands are 32-byte wide. */ + IX86_BUILTIN_GATHERALTSIV4DF, + IX86_BUILTIN_GATHERALTDIV8SF, + IX86_BUILTIN_GATHERALTSIV4DI, + IX86_BUILTIN_GATHERALTDIV8SI, + /* TFmode support builtins. */ IX86_BUILTIN_INFQ, IX86_BUILTIN_HUGE_VALQ, @@ -26223,7 +26316,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, @@ -26300,6 +26393,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF }, + /* AVX2 */ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI }, @@ -27065,6 +27160,22 @@ ix86_init_mmx_sse_builtins (void) V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, IX86_BUILTIN_GATHERDIV8SI); + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ", + V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, + IX86_BUILTIN_GATHERALTSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ", + V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, + IX86_BUILTIN_GATHERALTDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ", + V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, + IX86_BUILTIN_GATHERALTSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ", + V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, + IX86_BUILTIN_GATHERALTDIV8SI); + /* MMX access to the vec_init patterns. */ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); @@ -28123,6 +28234,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V32QI_FTYPE_V32QI_V32QI: case V16HI_FTYPE_V32QI_V32QI: case V16HI_FTYPE_V16HI_V16HI: + case V8SI_FTYPE_V4DF_V4DF: case V8SI_FTYPE_V8SI_V8SI: case V8SI_FTYPE_V16HI_V16HI: case V4DI_FTYPE_V4DI_V4DI: @@ -29052,7 +29164,7 @@ rdrand_step: icode = CODE_FOR_avx2_gatherdiv4sf; goto gather_gen; case IX86_BUILTIN_GATHERDIV8SF: - icode = CODE_FOR_avx2_gatherdiv4sf256; + icode = CODE_FOR_avx2_gatherdiv8sf; goto gather_gen; case IX86_BUILTIN_GATHERSIV2DI: icode = CODE_FOR_avx2_gathersiv2di; @@ -29076,7 +29188,20 @@ rdrand_step: icode = CODE_FOR_avx2_gatherdiv4si; goto gather_gen; case IX86_BUILTIN_GATHERDIV8SI: - icode = CODE_FOR_avx2_gatherdiv4si256; + icode = CODE_FOR_avx2_gatherdiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHERALTSIV4DF: + icode = CODE_FOR_avx2_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHERALTDIV8SF: + icode = CODE_FOR_avx2_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHERALTSIV4DI: + icode = CODE_FOR_avx2_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHERALTDIV8SI: + icode = CODE_FOR_avx2_gatherdiv8si; + goto gather_gen; gather_gen: arg0 = CALL_EXPR_ARG (exp, 0); @@ -29095,8 +29220,39 @@ rdrand_step: mode3 = insn_data[icode].operand[4].mode; mode4 = insn_data[icode].operand[5].mode; - if (target == NULL_RTX) - target = gen_reg_rtx (insn_data[icode].operand[0].mode); + if (target == NULL_RTX + || GET_MODE (target) != insn_data[icode].operand[0].mode) + subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); + else + subtarget = target; + + if (fcode == IX86_BUILTIN_GATHERALTSIV4DF + || fcode == IX86_BUILTIN_GATHERALTSIV4DI) + { + rtx half = gen_reg_rtx (V4SImode); + if (!nonimmediate_operand (op2, V8SImode)) + op2 = copy_to_mode_reg (V8SImode, op2); + emit_insn (gen_vec_extract_lo_v8si (half, op2)); + op2 = half; + } + else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF + || fcode == IX86_BUILTIN_GATHERALTDIV8SI) + { + rtx (*gen) (rtx, rtx); + rtx half = gen_reg_rtx (mode0); + if (mode0 == V4SFmode) + gen = gen_vec_extract_lo_v8sf; + else + gen = gen_vec_extract_lo_v8si; + if (!nonimmediate_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + emit_insn (gen (half, op0)); + op0 = half; + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + } /* Force memory operand only with base register here. But we don't want to do it on memory operand for other builtin @@ -29118,10 +29274,91 @@ rdrand_step: error ("last argument must be scale 1, 2, 4, 8"); return const0_rtx; } - pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4); + + /* Optimize. If mask is known to have all high bits set, + replace op0 with pc_rtx to signal that the instruction + overwrites the whole destination and doesn't use its + previous contents. */ + if (optimize) + { + if (TREE_CODE (arg3) == VECTOR_CST) + { + tree elt; + unsigned int negative = 0; + for (elt = TREE_VECTOR_CST_ELTS (arg3); + elt; elt = TREE_CHAIN (elt)) + { + tree cst = TREE_VALUE (elt); + if (TREE_CODE (cst) == INTEGER_CST + && tree_int_cst_sign_bit (cst)) + negative++; + else if (TREE_CODE (cst) == REAL_CST + && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) + negative++; + } + if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) + op0 = pc_rtx; + } + else if (TREE_CODE (arg3) == SSA_NAME) + { + /* Recognize also when mask is like: + __v2df src = _mm_setzero_pd (); + __v2df mask = _mm_cmpeq_pd (src, src); + or + __v8sf src = _mm256_setzero_ps (); + __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); + as that is a cheaper way to load all ones into + a register than having to load a constant from + memory. */ + gimple def_stmt = SSA_NAME_DEF_STMT (arg3); + if (is_gimple_call (def_stmt)) + { + tree fndecl = gimple_call_fndecl (def_stmt); + if (fndecl + && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + switch ((unsigned int) DECL_FUNCTION_CODE (fndecl)) + { + case IX86_BUILTIN_CMPPD: + case IX86_BUILTIN_CMPPS: + case IX86_BUILTIN_CMPPD256: + case IX86_BUILTIN_CMPPS256: + if (!integer_zerop (gimple_call_arg (def_stmt, 2))) + break; + /* FALLTHRU */ + case IX86_BUILTIN_CMPEQPD: + case IX86_BUILTIN_CMPEQPS: + if (initializer_zerop (gimple_call_arg (def_stmt, 0)) + && initializer_zerop (gimple_call_arg (def_stmt, + 1))) + op0 = pc_rtx; + break; + default: + break; + } + } + } + } + + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); if (! pat) return const0_rtx; emit_insn (pat); + + if (fcode == IX86_BUILTIN_GATHERDIV8SF + || fcode == IX86_BUILTIN_GATHERDIV8SI) + { + enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode + ? V4SFmode : V4SImode; + if (target == NULL_RTX) + target = gen_reg_rtx (tmode); + if (tmode == V4SFmode) + emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); + else + emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); + } + else + target = subtarget; + return target; default: @@ -29218,13 +29455,21 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out, } break; + case BUILT_IN_IRINT: case BUILT_IN_LRINT: - if (out_mode == SImode && out_n == 4 - && in_mode == DFmode && in_n == 2) - return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX]; + case BUILT_IN_LLRINT: + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX]; + else if (out_n == 8 && in_n == 4) + return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256]; + } break; + case BUILT_IN_IRINTF: case BUILT_IN_LRINTF: + case BUILT_IN_LLRINTF: if (out_mode == SImode && in_mode == SFmode) { if (out_n == 4 && in_n == 4) @@ -29626,6 +29871,73 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) return new_fndecl; } +/* Returns a decl of a function that implements gather load with + memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. + Return NULL_TREE if it is not available. */ + +static tree +ix86_vectorize_builtin_gather (const_tree mem_vectype, + const_tree index_type, int scale) +{ + bool si; + enum ix86_builtins code; + + if (! TARGET_AVX2) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE + && !POINTER_TYPE_P (index_type)) + || (TYPE_MODE (index_type) != SImode + && TYPE_MODE (index_type) != DImode)) + return NULL_TREE; + + if (TYPE_PRECISION (index_type) > POINTER_SIZE) + return NULL_TREE; + + /* v*gather* insn sign extends index to pointer mode. */ + if (TYPE_PRECISION (index_type) < POINTER_SIZE + && TYPE_UNSIGNED (index_type)) + return NULL_TREE; + + if (scale <= 0 + || scale > 8 + || (scale & (scale - 1)) != 0) + return NULL_TREE; + + si = TYPE_MODE (index_type) == SImode; + switch (TYPE_MODE (mem_vectype)) + { + case V2DFmode: + code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; + break; + case V4DFmode: + code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; + break; + case V2DImode: + code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; + break; + case V4DImode: + code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; + break; + case V4SFmode: + code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; + break; + case V8SFmode: + code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; + break; + case V4SImode: + code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; + break; + case V8SImode: + code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; + break; + default: + return NULL_TREE; + } + + return ix86_builtins[code]; +} + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ @@ -37835,6 +38147,9 @@ ix86_autovectorize_vector_sizes (void) #undef TARGET_VECTORIZE_BUILTIN_TM_STORE #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store +#undef TARGET_VECTORIZE_BUILTIN_GATHER +#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather + #undef TARGET_BUILTIN_RECIPROCAL #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal |