summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c367
1 files changed, 341 insertions, 26 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 47ee8e154df..799e12b2b14 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10913,15 +10913,28 @@ ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
it looks like we might want one, insert a NOP. */
{
rtx insn = get_last_insn ();
+ rtx deleted_debug_label = NULL_RTX;
while (insn
&& NOTE_P (insn)
&& NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
- insn = PREV_INSN (insn);
+ {
+ /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
+ notes only, instead set their CODE_LABEL_NUMBER to -1,
+ otherwise there would be code generation differences
+ in between -g and -g0. */
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+ deleted_debug_label = insn;
+ insn = PREV_INSN (insn);
+ }
if (insn
&& (LABEL_P (insn)
|| (NOTE_P (insn)
&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
fputs ("\tnop\n", file);
+ else if (deleted_debug_label)
+ for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
+ if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+ CODE_LABEL_NUMBER (insn) = -1;
}
#endif
@@ -16530,6 +16543,29 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[])
return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
}
+/* Return true if we should emit lea instruction instead of mov
+ instruction. */
+
+bool
+ix86_use_lea_for_mov (rtx insn, rtx operands[])
+{
+ unsigned int regno0;
+ unsigned int regno1;
+
+ /* Check if we need to optimize. */
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return false;
+
+ /* Use lea for reg to reg moves only. */
+ if (!REG_P (operands[0]) || !REG_P (operands[1]))
+ return false;
+
+ regno0 = true_regnum (operands[0]);
+ regno1 = true_regnum (operands[1]);
+
+ return ix86_lea_outperforms (insn, regno0, regno1, -1, 0);
+}
+
/* Return true if we need to split lea into a sequence of
instructions to avoid AGU stalls. */
@@ -17050,18 +17086,56 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
emit_move_insn (target, fp_hi);
}
+/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
+ a vector of unsigned ints VAL to vector of floats TARGET. */
+
+void
+ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
+{
+ rtx tmp[8];
+ REAL_VALUE_TYPE TWO16r;
+ enum machine_mode intmode = GET_MODE (val);
+ enum machine_mode fltmode = GET_MODE (target);
+ rtx (*cvt) (rtx, rtx);
+
+ if (intmode == V4SImode)
+ cvt = gen_floatv4siv4sf2;
+ else
+ cvt = gen_floatv8siv8sf2;
+ tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
+ tmp[0] = force_reg (intmode, tmp[0]);
+ tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
+ OPTAB_DIRECT);
+ tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ tmp[3] = gen_reg_rtx (fltmode);
+ emit_insn (cvt (tmp[3], tmp[1]));
+ tmp[4] = gen_reg_rtx (fltmode);
+ emit_insn (cvt (tmp[4], tmp[2]));
+ real_ldexp (&TWO16r, &dconst1, 16);
+ tmp[5] = const_double_from_real_value (TWO16r, SFmode);
+ tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
+ tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
+ OPTAB_DIRECT);
+ tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
+ OPTAB_DIRECT);
+ if (tmp[7] != target)
+ emit_move_insn (target, tmp[7]);
+}
+
/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
- This is done by subtracting 0x1p32 from VAL if VAL is greater or equal
- (non-signalling) than 0x1p31. */
+ This is done by doing just signed conversion if < 0x1p31, and otherwise by
+ subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
rtx
-ix86_expand_adjust_ufix_to_sfix_si (rtx val)
+ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
{
- REAL_VALUE_TYPE MTWO32r, TWO31r;
- rtx two31r, mtwo32r, tmp[3];
+ REAL_VALUE_TYPE TWO31r;
+ rtx two31r, tmp[4];
enum machine_mode mode = GET_MODE (val);
enum machine_mode scalarmode = GET_MODE_INNER (mode);
+ enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
rtx (*cmp) (rtx, rtx, rtx, rtx);
int i;
@@ -17071,22 +17145,33 @@ ix86_expand_adjust_ufix_to_sfix_si (rtx val)
two31r = const_double_from_real_value (TWO31r, scalarmode);
two31r = ix86_build_const_vector (mode, 1, two31r);
two31r = force_reg (mode, two31r);
- real_ldexp (&MTWO32r, &dconstm1, 32);
- mtwo32r = const_double_from_real_value (MTWO32r, scalarmode);
- mtwo32r = ix86_build_const_vector (mode, 1, mtwo32r);
- mtwo32r = force_reg (mode, mtwo32r);
switch (mode)
{
- case V8SFmode: cmp = gen_avx_cmpv8sf3; break;
- case V4SFmode: cmp = gen_avx_cmpv4sf3; break;
- case V4DFmode: cmp = gen_avx_cmpv4df3; break;
- case V2DFmode: cmp = gen_avx_cmpv2df3; break;
+ case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
+ case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
+ case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
+ case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
default: gcc_unreachable ();
}
- emit_insn (cmp (tmp[0], val, two31r, GEN_INT (29)));
- tmp[1] = expand_simple_binop (mode, AND, tmp[0], mtwo32r, tmp[1],
+ tmp[3] = gen_rtx_LE (mode, two31r, val);
+ emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
+ tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
0, OPTAB_DIRECT);
- return expand_simple_binop (mode, PLUS, val, tmp[1], tmp[2],
+ if (intmode == V4SImode || TARGET_AVX2)
+ *xorp = expand_simple_binop (intmode, ASHIFT,
+ gen_lowpart (intmode, tmp[0]),
+ GEN_INT (31), NULL_RTX, 0,
+ OPTAB_DIRECT);
+ else
+ {
+ rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
+ two31 = ix86_build_const_vector (intmode, 1, two31);
+ *xorp = expand_simple_binop (intmode, AND,
+ gen_lowpart (intmode, tmp[0]),
+ two31, NULL_RTX, 0,
+ OPTAB_DIRECT);
+ }
+ return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
0, OPTAB_DIRECT);
}
@@ -24779,6 +24864,7 @@ enum ix86_builtins
IX86_BUILTIN_VEC_SET_V16QI,
IX86_BUILTIN_VEC_PACK_SFIX,
+ IX86_BUILTIN_VEC_PACK_SFIX256,
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
@@ -25139,6 +25225,13 @@ enum ix86_builtins
IX86_BUILTIN_GATHERDIV4SI,
IX86_BUILTIN_GATHERDIV8SI,
+ /* Alternate 4 element gather for the vectorizer where
+ all operands are 32-byte wide. */
+ IX86_BUILTIN_GATHERALTSIV4DF,
+ IX86_BUILTIN_GATHERALTDIV8SF,
+ IX86_BUILTIN_GATHERALTSIV4DI,
+ IX86_BUILTIN_GATHERALTDIV8SI,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_HUGE_VALQ,
@@ -26223,7 +26316,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
@@ -26300,6 +26393,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
/* AVX2 */
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
@@ -27065,6 +27160,22 @@ ix86_init_mmx_sse_builtins (void)
V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
IX86_BUILTIN_GATHERDIV8SI);
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
+ IX86_BUILTIN_GATHERALTSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
+ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
+ IX86_BUILTIN_GATHERALTDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
+ V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
+ IX86_BUILTIN_GATHERALTSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
+ V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
+ IX86_BUILTIN_GATHERALTDIV8SI);
+
/* MMX access to the vec_init patterns. */
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
@@ -28123,6 +28234,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V32QI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V16HI_V16HI:
+ case V8SI_FTYPE_V4DF_V4DF:
case V8SI_FTYPE_V8SI_V8SI:
case V8SI_FTYPE_V16HI_V16HI:
case V4DI_FTYPE_V4DI_V4DI:
@@ -29052,7 +29164,7 @@ rdrand_step:
icode = CODE_FOR_avx2_gatherdiv4sf;
goto gather_gen;
case IX86_BUILTIN_GATHERDIV8SF:
- icode = CODE_FOR_avx2_gatherdiv4sf256;
+ icode = CODE_FOR_avx2_gatherdiv8sf;
goto gather_gen;
case IX86_BUILTIN_GATHERSIV2DI:
icode = CODE_FOR_avx2_gathersiv2di;
@@ -29076,7 +29188,20 @@ rdrand_step:
icode = CODE_FOR_avx2_gatherdiv4si;
goto gather_gen;
case IX86_BUILTIN_GATHERDIV8SI:
- icode = CODE_FOR_avx2_gatherdiv4si256;
+ icode = CODE_FOR_avx2_gatherdiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTSIV4DF:
+ icode = CODE_FOR_avx2_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTDIV8SF:
+ icode = CODE_FOR_avx2_gatherdiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTSIV4DI:
+ icode = CODE_FOR_avx2_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTDIV8SI:
+ icode = CODE_FOR_avx2_gatherdiv8si;
+ goto gather_gen;
gather_gen:
arg0 = CALL_EXPR_ARG (exp, 0);
@@ -29095,8 +29220,39 @@ rdrand_step:
mode3 = insn_data[icode].operand[4].mode;
mode4 = insn_data[icode].operand[5].mode;
- if (target == NULL_RTX)
- target = gen_reg_rtx (insn_data[icode].operand[0].mode);
+ if (target == NULL_RTX
+ || GET_MODE (target) != insn_data[icode].operand[0].mode)
+ subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
+ else
+ subtarget = target;
+
+ if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
+ || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
+ {
+ rtx half = gen_reg_rtx (V4SImode);
+ if (!nonimmediate_operand (op2, V8SImode))
+ op2 = copy_to_mode_reg (V8SImode, op2);
+ emit_insn (gen_vec_extract_lo_v8si (half, op2));
+ op2 = half;
+ }
+ else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
+ || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
+ {
+ rtx (*gen) (rtx, rtx);
+ rtx half = gen_reg_rtx (mode0);
+ if (mode0 == V4SFmode)
+ gen = gen_vec_extract_lo_v8sf;
+ else
+ gen = gen_vec_extract_lo_v8si;
+ if (!nonimmediate_operand (op0, GET_MODE (op0)))
+ op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+ emit_insn (gen (half, op0));
+ op0 = half;
+ if (!nonimmediate_operand (op3, GET_MODE (op3)))
+ op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+ emit_insn (gen (half, op3));
+ op3 = half;
+ }
/* Force memory operand only with base register here. But we
don't want to do it on memory operand for other builtin
@@ -29118,10 +29274,91 @@ rdrand_step:
error ("last argument must be scale 1, 2, 4, 8");
return const0_rtx;
}
- pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
+
+ /* Optimize. If mask is known to have all high bits set,
+ replace op0 with pc_rtx to signal that the instruction
+ overwrites the whole destination and doesn't use its
+ previous contents. */
+ if (optimize)
+ {
+ if (TREE_CODE (arg3) == VECTOR_CST)
+ {
+ tree elt;
+ unsigned int negative = 0;
+ for (elt = TREE_VECTOR_CST_ELTS (arg3);
+ elt; elt = TREE_CHAIN (elt))
+ {
+ tree cst = TREE_VALUE (elt);
+ if (TREE_CODE (cst) == INTEGER_CST
+ && tree_int_cst_sign_bit (cst))
+ negative++;
+ else if (TREE_CODE (cst) == REAL_CST
+ && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
+ negative++;
+ }
+ if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
+ op0 = pc_rtx;
+ }
+ else if (TREE_CODE (arg3) == SSA_NAME)
+ {
+ /* Recognize also when mask is like:
+ __v2df src = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (src, src);
+ or
+ __v8sf src = _mm256_setzero_ps ();
+ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+ as that is a cheaper way to load all ones into
+ a register than having to load a constant from
+ memory. */
+ gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
+ if (is_gimple_call (def_stmt))
+ {
+ tree fndecl = gimple_call_fndecl (def_stmt);
+ if (fndecl
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+ switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
+ {
+ case IX86_BUILTIN_CMPPD:
+ case IX86_BUILTIN_CMPPS:
+ case IX86_BUILTIN_CMPPD256:
+ case IX86_BUILTIN_CMPPS256:
+ if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
+ break;
+ /* FALLTHRU */
+ case IX86_BUILTIN_CMPEQPD:
+ case IX86_BUILTIN_CMPEQPS:
+ if (initializer_zerop (gimple_call_arg (def_stmt, 0))
+ && initializer_zerop (gimple_call_arg (def_stmt,
+ 1)))
+ op0 = pc_rtx;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
if (! pat)
return const0_rtx;
emit_insn (pat);
+
+ if (fcode == IX86_BUILTIN_GATHERDIV8SF
+ || fcode == IX86_BUILTIN_GATHERDIV8SI)
+ {
+ enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
+ ? V4SFmode : V4SImode;
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (tmode);
+ if (tmode == V4SFmode)
+ emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
+ else
+ emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+ }
+ else
+ target = subtarget;
+
return target;
default:
@@ -29218,13 +29455,21 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
}
break;
+ case BUILT_IN_IRINT:
case BUILT_IN_LRINT:
- if (out_mode == SImode && out_n == 4
- && in_mode == DFmode && in_n == 2)
- return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ case BUILT_IN_LLRINT:
+ if (out_mode == SImode && in_mode == DFmode)
+ {
+ if (out_n == 4 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ else if (out_n == 8 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
+ }
break;
+ case BUILT_IN_IRINTF:
case BUILT_IN_LRINTF:
+ case BUILT_IN_LLRINTF:
if (out_mode == SImode && in_mode == SFmode)
{
if (out_n == 4 && in_n == 4)
@@ -29626,6 +29871,73 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
return new_fndecl;
}
+/* Returns a decl of a function that implements gather load with
+ memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
+ Return NULL_TREE if it is not available. */
+
+static tree
+ix86_vectorize_builtin_gather (const_tree mem_vectype,
+ const_tree index_type, int scale)
+{
+ bool si;
+ enum ix86_builtins code;
+
+ if (! TARGET_AVX2)
+ return NULL_TREE;
+
+ if ((TREE_CODE (index_type) != INTEGER_TYPE
+ && !POINTER_TYPE_P (index_type))
+ || (TYPE_MODE (index_type) != SImode
+ && TYPE_MODE (index_type) != DImode))
+ return NULL_TREE;
+
+ if (TYPE_PRECISION (index_type) > POINTER_SIZE)
+ return NULL_TREE;
+
+ /* v*gather* insn sign extends index to pointer mode. */
+ if (TYPE_PRECISION (index_type) < POINTER_SIZE
+ && TYPE_UNSIGNED (index_type))
+ return NULL_TREE;
+
+ if (scale <= 0
+ || scale > 8
+ || (scale & (scale - 1)) != 0)
+ return NULL_TREE;
+
+ si = TYPE_MODE (index_type) == SImode;
+ switch (TYPE_MODE (mem_vectype))
+ {
+ case V2DFmode:
+ code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
+ break;
+ case V4DFmode:
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
+ break;
+ case V2DImode:
+ code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
+ break;
+ case V4DImode:
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
+ break;
+ case V4SFmode:
+ code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
+ break;
+ case V8SFmode:
+ code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
+ break;
+ case V4SImode:
+ code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
+ break;
+ case V8SImode:
+ code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
+ break;
+ default:
+ return NULL_TREE;
+ }
+
+ return ix86_builtins[code];
+}
+
/* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */
@@ -37835,6 +38147,9 @@ ix86_autovectorize_vector_sizes (void)
#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
+#undef TARGET_VECTORIZE_BUILTIN_GATHER
+#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
+
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal