summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/cgraph.h6
-rw-r--r--gcc/config/aarch64/aarch64.c30
-rw-r--r--gcc/config/i386/i386.c8
-rw-r--r--gcc/gengtype.c1
-rw-r--r--gcc/omp-simd-clone.c70
-rw-r--r--gcc/poly-int-types.h8
-rw-r--r--gcc/poly-int.h57
-rw-r--r--gcc/tree-vect-stmts.c43
8 files changed, 159 insertions, 64 deletions
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index cf543705e13..cd22676ff9e 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -759,17 +759,17 @@ struct GTY(()) cgraph_simd_clone_arg {
struct GTY(()) cgraph_simd_clone {
/* Number of words in the SIMD lane associated with this clone. */
- unsigned int simdlen;
+ poly_uint64 simdlen;
/* Number of annotated function arguments in `args'. This is
usually the number of named arguments in FNDECL. */
unsigned int nargs;
/* Max hardware vector size in bits for integral vectors. */
- unsigned int vecsize_int;
+ poly_uint64 vecsize_int;
/* Max hardware vector size in bits for floating point vectors. */
- unsigned int vecsize_float;
+ poly_uint64 vecsize_float;
/* Machine mode of the mask argument(s), if they are to be passed
as bitmasks in integer argument(s). VOIDmode if masks are passed
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index db991e59cbe..27f587be7e7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -22960,18 +22960,23 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
tree base_type, int num)
{
tree t, ret_type, arg_type;
- unsigned int elt_bits, vec_bits, count;
+ unsigned int elt_bits, count;
+ unsigned HOST_WIDE_INT const_simdlen;
+ poly_uint64 vec_bits;
if (!TARGET_SIMD)
return 0;
- if (clonei->simdlen
- && (clonei->simdlen < 2
- || clonei->simdlen > 1024
- || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+ /* For now, SVE simdclones won't produce illegal simdlen, So only check
+ const simdlens here. */
+ if (maybe_ne (clonei->simdlen, 0U)
+ && clonei->simdlen.is_constant (&const_simdlen)
+ && (const_simdlen < 2
+ || const_simdlen > 1024
+ || (const_simdlen & (const_simdlen - 1)) != 0))
{
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %d", clonei->simdlen);
+ "unsupported simdlen %wd", const_simdlen);
return 0;
}
@@ -23015,21 +23020,24 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
clonei->vecsize_mangle = 'n';
clonei->mask_mode = VOIDmode;
elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
- if (clonei->simdlen == 0)
+ if (known_eq (clonei->simdlen, 0U))
{
count = 2;
vec_bits = (num == 0 ? 64 : 128);
- clonei->simdlen = vec_bits / elt_bits;
+ clonei->simdlen = exact_div (vec_bits, elt_bits);
}
else
{
count = 1;
vec_bits = clonei->simdlen * elt_bits;
- if (vec_bits != 64 && vec_bits != 128)
+ /* For now, SVE simdclones won't produce illegal simdlen, So only check
+ const simdlens here. */
+ if (clonei->simdlen.is_constant (&const_simdlen)
+ && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
{
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "GCC does not currently support simdlen %d for type %qT",
- clonei->simdlen, base_type);
+ "GCC does not currently support simdlen %wd for type %qT",
+ const_simdlen, base_type);
return 0;
}
}
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index caa9b9d5ac1..df47a53d701 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22485,7 +22485,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
|| (clonei->simdlen & (clonei->simdlen - 1)) != 0))
{
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %d", clonei->simdlen);
+ "unsupported simdlen %wd", clonei->simdlen.to_constant ());
return 0;
}
@@ -22590,7 +22590,8 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
clonei->simdlen = clonei->vecsize_int;
else
clonei->simdlen = clonei->vecsize_float;
- clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+ clonei->simdlen = clonei->simdlen
+ / GET_MODE_BITSIZE (TYPE_MODE (base_type));
}
else if (clonei->simdlen > 16)
{
@@ -22612,7 +22613,8 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
if (cnt > (TARGET_64BIT ? 16 : 8))
{
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
- "unsupported simdlen %d", clonei->simdlen);
+ "unsupported simdlen %wd",
+ clonei->simdlen.to_constant ());
return 0;
}
}
diff --git a/gcc/gengtype.c b/gcc/gengtype.c
index a7cf5c25893..b21eeacdbe2 100644
--- a/gcc/gengtype.c
+++ b/gcc/gengtype.c
@@ -5200,6 +5200,7 @@ main (int argc, char **argv)
POS_HERE (do_scalar_typedef ("widest_int", &pos));
POS_HERE (do_scalar_typedef ("int64_t", &pos));
POS_HERE (do_scalar_typedef ("poly_int64", &pos));
+ POS_HERE (do_scalar_typedef ("poly_uint64", &pos));
POS_HERE (do_scalar_typedef ("uint64_t", &pos));
POS_HERE (do_scalar_typedef ("uint8", &pos));
POS_HERE (do_scalar_typedef ("uintptr_t", &pos));
diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c
index 942fb971cb7..cbd58c8987b 100644
--- a/gcc/omp-simd-clone.c
+++ b/gcc/omp-simd-clone.c
@@ -338,16 +338,18 @@ simd_clone_mangle (struct cgraph_node *node,
{
char vecsize_mangle = clone_info->vecsize_mangle;
char mask = clone_info->inbranch ? 'M' : 'N';
- unsigned int simdlen = clone_info->simdlen;
+ poly_uint64 simdlen = clone_info->simdlen;
unsigned int n;
pretty_printer pp;
- gcc_assert (vecsize_mangle && simdlen);
+ gcc_assert (vecsize_mangle && maybe_ne (simdlen, 0U));
pp_string (&pp, "_ZGV");
pp_character (&pp, vecsize_mangle);
pp_character (&pp, mask);
- pp_decimal_int (&pp, simdlen);
+ /* For now, simdlen is always constant, while variable simdlen pp 'n'. */
+ unsigned int len = simdlen.to_constant ();
+ pp_decimal_int (&pp, (len));
for (n = 0; n < clone_info->nargs; ++n)
{
@@ -491,7 +493,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node)
{
tree fndecl = node->decl;
tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
- unsigned int veclen;
+ poly_uint64 veclen;
tree t;
/* Adjust the function return type. */
@@ -502,17 +504,18 @@ simd_clone_adjust_return_type (struct cgraph_node *node)
veclen = node->simdclone->vecsize_int;
else
veclen = node->simdclone->vecsize_float;
- veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t));
- if (veclen > node->simdclone->simdlen)
+ veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t)));
+ if (multiple_p (veclen, node->simdclone->simdlen))
veclen = node->simdclone->simdlen;
if (POINTER_TYPE_P (t))
t = pointer_sized_int_node;
- if (veclen == node->simdclone->simdlen)
+ if (known_eq (veclen, node->simdclone->simdlen))
t = build_vector_type (t, node->simdclone->simdlen);
else
{
t = build_vector_type (t, veclen);
- t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
+ t = build_array_type_nelts (t, exact_div (node->simdclone->simdlen,
+ veclen));
}
TREE_TYPE (TREE_TYPE (fndecl)) = t;
if (!node->definition)
@@ -526,7 +529,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node)
tree atype = build_array_type_nelts (orig_rettype,
node->simdclone->simdlen);
- if (veclen != node->simdclone->simdlen)
+ if (maybe_ne (veclen, node->simdclone->simdlen))
return build1 (VIEW_CONVERT_EXPR, atype, t);
/* Set up a SIMD array to use as the return value. */
@@ -546,7 +549,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node)
SIMDLEN is the number of elements. */
static tree
-create_tmp_simd_array (const char *prefix, tree type, int simdlen)
+create_tmp_simd_array (const char *prefix, tree type, poly_uint64 simdlen)
{
tree atype = build_array_type_nelts (type, simdlen);
tree avar = create_tmp_var_raw (atype, prefix);
@@ -578,7 +581,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
struct cgraph_simd_clone *sc = node->simdclone;
vec<ipa_adjusted_param, va_gc> *new_params = NULL;
vec_safe_reserve (new_params, sc->nargs);
- unsigned i, j, veclen;
+ unsigned i, j, k;
+ poly_uint64 veclen;
for (i = 0; i < sc->nargs; ++i)
{
@@ -614,8 +618,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
veclen = sc->vecsize_int;
else
veclen = sc->vecsize_float;
- veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type));
- if (veclen > sc->simdlen)
+ veclen = exact_div (veclen,
+ GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type)));
+ if (multiple_p (veclen, sc->simdlen))
veclen = sc->simdlen;
adj.op = IPA_PARAM_OP_NEW;
adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
@@ -624,10 +629,11 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
else
adj.type = build_vector_type (parm_type, veclen);
sc->args[i].vector_type = adj.type;
- for (j = veclen; j < sc->simdlen; j += veclen)
+ k = vector_unroll_factor (sc->simdlen, veclen);
+ for (j = 1; j < k; j++)
{
vec_safe_push (new_params, adj);
- if (j == veclen)
+ if (j == 1)
{
memset (&adj, 0, sizeof (adj));
adj.op = IPA_PARAM_OP_NEW;
@@ -663,8 +669,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
veclen = sc->vecsize_int;
else
veclen = sc->vecsize_float;
- veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
- if (veclen > sc->simdlen)
+ veclen = exact_div (veclen,
+ GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)));
+ if (multiple_p (veclen, sc->simdlen))
veclen = sc->simdlen;
if (sc->mask_mode != VOIDmode)
adj.type
@@ -675,7 +682,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
adj.type = build_vector_type (base_type, veclen);
vec_safe_push (new_params, adj);
- for (j = veclen; j < sc->simdlen; j += veclen)
+ k = vector_unroll_factor (sc->simdlen, veclen);
+ for (j = 1; j < k; j++)
vec_safe_push (new_params, adj);
/* We have previously allocated one extra entry for the mask. Use
@@ -690,9 +698,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
if (sc->mask_mode == VOIDmode)
sc->args[i].simd_array
= create_tmp_simd_array ("mask", base_type, sc->simdlen);
- else if (veclen < sc->simdlen)
+ else if (k > 1)
sc->args[i].simd_array
- = create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen);
+ = create_tmp_simd_array ("mask", adj.type, k);
else
sc->args[i].simd_array = NULL_TREE;
}
@@ -783,7 +791,8 @@ simd_clone_init_simd_arrays (struct cgraph_node *node,
}
continue;
}
- if (simd_clone_subparts (TREE_TYPE (arg)) == node->simdclone->simdlen)
+ if (known_eq (simd_clone_subparts (TREE_TYPE (arg)),
+ node->simdclone->simdlen))
{
tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
tree ptr = build_fold_addr_expr (array);
@@ -795,8 +804,10 @@ simd_clone_init_simd_arrays (struct cgraph_node *node,
else
{
unsigned int simdlen = simd_clone_subparts (TREE_TYPE (arg));
+ unsigned int times = vector_unroll_factor (node->simdclone->simdlen,
+ simdlen);
tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
- for (k = 0; k < node->simdclone->simdlen; k += simdlen)
+ for (k = 0; k < times; k++)
{
tree ptr = build_fold_addr_expr (array);
int elemsize;
@@ -808,7 +819,7 @@ simd_clone_init_simd_arrays (struct cgraph_node *node,
tree elemtype = TREE_TYPE (TREE_TYPE (arg));
elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype));
tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
- build_int_cst (ptype, k * elemsize));
+ build_int_cst (ptype, k * elemsize * simdlen));
t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
gimplify_and_add (t, &seq);
}
@@ -981,8 +992,9 @@ ipa_simd_modify_function_body (struct cgraph_node *node,
iter, NULL_TREE, NULL_TREE);
adjustments->register_replacement (&(*adjustments->m_adj_params)[j], r);
- if (simd_clone_subparts (vectype) < node->simdclone->simdlen)
- j += node->simdclone->simdlen / simd_clone_subparts (vectype) - 1;
+ if (multiple_p (node->simdclone->simdlen, simd_clone_subparts (vectype)))
+ j += vector_unroll_factor (node->simdclone->simdlen,
+ simd_clone_subparts (vectype)) - 1;
}
tree name;
@@ -1249,7 +1261,8 @@ simd_clone_adjust (struct cgraph_node *node)
below). */
loop = alloc_loop ();
cfun->has_force_vectorize_loops = true;
- loop->safelen = node->simdclone->simdlen;
+ /* For now, simlen is always constant. */
+ loop->safelen = node->simdclone->simdlen.to_constant ();
loop->force_vectorize = true;
loop->header = body_bb;
}
@@ -1275,7 +1288,8 @@ simd_clone_adjust (struct cgraph_node *node)
{
tree maskt = TREE_TYPE (mask_array);
int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
- c = node->simdclone->simdlen / (c + 1);
+ /* For now, c must be constant here. */
+ c = exact_div (node->simdclone->simdlen, c + 1).to_constant ();
int s = exact_log2 (c);
gcc_assert (s > 0);
c--;
@@ -1683,7 +1697,7 @@ expand_simd_clones (struct cgraph_node *node)
if (clone_info == NULL)
continue;
- int orig_simdlen = clone_info->simdlen;
+ poly_uint64 orig_simdlen = clone_info->simdlen;
tree base_type = simd_clone_compute_base_data_type (node, clone_info);
/* The target can return 0 (no simd clones should be created),
1 (just one ISA of simd clones should be created) or higher
diff --git a/gcc/poly-int-types.h b/gcc/poly-int-types.h
index 5e04e63ebf2..302e5bfd97d 100644
--- a/gcc/poly-int-types.h
+++ b/gcc/poly-int-types.h
@@ -81,6 +81,14 @@ typedef poly_int<NUM_POLY_INT_COEFFS, widest_int> poly_widest_int;
#define vector_element_size(SIZE, NELTS) \
(exact_div (SIZE, NELTS).to_constant ())
+/* Return the number of unroll times when a vector that has NELTS1 elements
+ is unrolled to vectors that have NELTS2 elements.
+
+ to_constant () is safe in this situation because the multiples of the
+ NELTS of two vectors are always constant-size scalars. */
+#define vector_unroll_factor(NELTS1, NELTS2) \
+ (exact_div (NELTS1, NELTS2).to_constant ())
+
/* Wrapper for poly_int arguments to target macros, so that if a target
doesn't need polynomial-sized modes, its header file can continue to
treat the argument as a normal constant. This should go away once
diff --git a/gcc/poly-int.h b/gcc/poly-int.h
index b953ffacec4..96a763daedf 100644
--- a/gcc/poly-int.h
+++ b/gcc/poly-int.h
@@ -2044,6 +2044,63 @@ constant_multiple_p (const poly_int_pod<N, Ca> &a,
return true;
}
+/* Return true if A is a constant multiple of B. */
+
+template<unsigned int N, typename Ca, typename Cb>
+inline typename if_nonpoly<Cb, bool>::type
+constant_multiple_p (const poly_int_pod<N, Ca> &a, Cb b)
+{
+ typedef POLY_CAST (Ca, Cb) NCa;
+ typedef POLY_CAST (Cb, Ca) NCb;
+
+ /* Do the modulus before the constant check, to catch divide by
+ zero errors. */
+ if (NCa (a.coeffs[0]) % NCb (b) != 0 || !a.is_constant ())
+ return false;
+ return true;
+}
+
+template<unsigned int N, typename Ca, typename Cb>
+inline typename if_nonpoly<Ca, bool>::type
+constant_multiple_p (Ca a, const poly_int_pod<N, Cb> &b)
+{
+ typedef POLY_CAST (Ca, Cb) NCa;
+ typedef POLY_CAST (Cb, Ca) NCb;
+ typedef POLY_INT_TYPE (Ca) int_type;
+
+ /* Do the modulus before the constant check, to catch divide by
+ zero errors. */
+ if (NCa (a) % NCb (b.coeffs[0]) != 0
+ || (a != int_type (0) && !b.is_constant ()))
+ return false;
+ return true;
+}
+
+template<unsigned int N, typename Ca, typename Cb>
+inline bool
+constant_multiple_p (const poly_int_pod<N, Ca> &a,
+ const poly_int_pod<N, Cb> &b)
+{
+ typedef POLY_CAST (Ca, Cb) NCa;
+ typedef POLY_CAST (Cb, Ca) NCb;
+ typedef POLY_INT_TYPE (Ca) ICa;
+ typedef POLY_INT_TYPE (Cb) ICb;
+ typedef POLY_BINARY_COEFF (Ca, Cb) C;
+
+ if (NCa (a.coeffs[0]) % NCb (b.coeffs[0]) != 0)
+ return false;
+
+ C r = NCa (a.coeffs[0]) / NCb (b.coeffs[0]);
+ for (unsigned int i = 1; i < N; ++i)
+ if (b.coeffs[i] == ICb (0)
+ ? a.coeffs[i] != ICa (0)
+ : (NCa (a.coeffs[i]) % NCb (b.coeffs[i]) != 0
+ || NCa (a.coeffs[i]) / NCb (b.coeffs[i]) != r))
+ return false;
+ return true;
+}
+
+
/* Return true if A is a multiple of B. */
template<typename Ca, typename Cb>
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 1a0da0e84cc..9cf85a0cd51 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3707,7 +3707,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
tree op, type;
tree vec_oprnd0 = NULL_TREE;
tree vectype;
- unsigned int nunits;
+ poly_uint64 nunits;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -3859,8 +3859,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
arginfo.quick_push (thisarginfo);
}
- unsigned HOST_WIDE_INT vf;
- if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (!vf.is_constant ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3878,12 +3878,12 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
n = n->simdclone->next_clone)
{
unsigned int this_badness = 0;
- if (n->simdclone->simdlen > vf
+ unsigned int num_calls;
+ if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
|| n->simdclone->nargs != nargs)
continue;
- if (n->simdclone->simdlen < vf)
- this_badness += (exact_log2 (vf)
- - exact_log2 (n->simdclone->simdlen)) * 1024;
+ if (num_calls != 1)
+ this_badness += exact_log2 (num_calls) * 1024;
if (n->simdclone->inbranch)
this_badness += 2048;
int target_badness = targetm.simd_clone.usable (n);
@@ -3964,19 +3964,19 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
slp_node);
if (arginfo[i].vectype == NULL
- || (simd_clone_subparts (arginfo[i].vectype)
- > bestn->simdclone->simdlen))
+ || !constant_multiple_p (bestn->simdclone->simdlen,
+ simd_clone_subparts (arginfo[i].vectype)))
return false;
}
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
- ncopies = vf / nunits;
+ ncopies = vector_unroll_factor (vf, nunits);
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
performed using SIMD instructions. */
- if ((loop == NULL || (unsigned) loop->safelen < nunits)
+ if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
&& gimple_vuse (stmt))
return false;
@@ -4054,7 +4054,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
atype = bestn->simdclone->args[i].vector_type;
- o = nunits / simd_clone_subparts (atype);
+ o = vector_unroll_factor (nunits,
+ simd_clone_subparts (atype));
for (m = j * o; m < (j + 1) * o; m++)
{
if (simd_clone_subparts (atype)
@@ -4179,7 +4180,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
? POINTER_PLUS_EXPR : PLUS_EXPR;
tree type = POINTER_TYPE_P (TREE_TYPE (op))
? sizetype : TREE_TYPE (op);
- widest_int cst
+ poly_widest_int cst
= wi::mul (bestn->simdclone->args[i].linear_step,
ncopies * nunits);
tree tcst = wide_int_to_tree (type, cst);
@@ -4200,7 +4201,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
? POINTER_PLUS_EXPR : PLUS_EXPR;
tree type = POINTER_TYPE_P (TREE_TYPE (op))
? sizetype : TREE_TYPE (op);
- widest_int cst
+ poly_widest_int cst
= wi::mul (bestn->simdclone->args[i].linear_step,
j * nunits);
tree tcst = wide_int_to_tree (type, cst);
@@ -4226,7 +4227,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
gcall *new_call = gimple_build_call_vec (fndecl, vargs);
if (vec_dest)
{
- gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
+ gcc_assert (ratype
+ || known_eq (simd_clone_subparts (rtype), nunits));
if (ratype)
new_temp = create_tmp_var (ratype);
else if (useless_type_conversion_p (vectype, rtype))
@@ -4240,12 +4242,13 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
if (vec_dest)
{
- if (simd_clone_subparts (vectype) < nunits)
+ if (!multiple_p (simd_clone_subparts (vectype), nunits))
{
unsigned int k, l;
poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
- k = nunits / simd_clone_subparts (vectype);
+ k = vector_unroll_factor (nunits,
+ simd_clone_subparts (vectype));
gcc_assert ((k & (k - 1)) == 0);
for (l = 0; l < k; l++)
{
@@ -4271,7 +4274,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
continue;
}
- else if (simd_clone_subparts (vectype) > nunits)
+ else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
{
unsigned int k = (simd_clone_subparts (vectype)
/ simd_clone_subparts (rtype));
@@ -4280,7 +4283,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
vec_alloc (ret_ctor_elts, k);
if (ratype)
{
- unsigned int m, o = nunits / simd_clone_subparts (rtype);
+ unsigned int m, o;
+ o = vector_unroll_factor (nunits,
+ simd_clone_subparts (rtype));
for (m = 0; m < o; m++)
{
tree tem = build4 (ARRAY_REF, rtype, new_temp,