diff options
author | ienkovich <ienkovich@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-11-10 11:57:34 +0000 |
---|---|---|
committer | ienkovich <ienkovich@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-11-10 11:57:34 +0000 |
commit | dab48979c4f2797cda7836f535c6a4924e4088c6 (patch) | |
tree | 5b2a30146ccd97f4ba3781bec197d5f88c9ed198 /gcc | |
parent | e86ce8ed8c07f5c430bc2c8ec316e4818922e3a7 (diff) | |
download | gcc-dab48979c4f2797cda7836f535c6a4924e4088c6.tar.gz |
gcc/
* expr.c (do_store_flag): Use expand_vec_cmp_expr for mask results.
(const_vector_mask_from_tree): New.
(const_vector_from_tree): Use const_vector_mask_from_tree
for boolean vectors.
* optabs-query.h (get_vec_cmp_icode): New.
* optabs-tree.c (expand_vec_cmp_expr_p): New.
* optabs-tree.h (expand_vec_cmp_expr_p): New.
* optabs.c (vector_compare_rtx): Add OPNO arg.
(expand_vec_cond_expr): Adjust to vector_compare_rtx change.
(expand_vec_cmp_expr): New.
* optabs.def (vec_cmp_optab): New.
(vec_cmpu_optab): New.
* optabs.h (expand_vec_cmp_expr): New.
* tree-vect-generic.c (expand_vector_comparison): Add vector
comparison optabs check.
* tree-vect-loop.c (vect_determine_vectorization_factor): Ignore mask
operations for VF. Add mask type computation.
* tree-vect-stmts.c (get_mask_type_for_scalar_type): New.
(vectorizable_comparison): New.
(vect_analyze_stmt): Add vectorizable_comparison.
(vect_transform_stmt): Likewise.
(vect_init_vector): Support boolean vector invariants.
(vect_get_vec_def_for_operand): Add VECTYPE arg.
(vectorizable_condition): Directly provide vectype for invariants
used in comparison.
* tree-vectorizer.h (get_mask_type_for_scalar_type): New.
(enum vect_var_kind): Add vect_mask_var.
(enum stmt_vec_info_type): Add comparison_vec_info_type.
(vectorizable_comparison): New.
(vect_get_vec_def_for_operand): Add VECTYPE arg.
* tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var.
(vect_create_destination_var): Likewise.
* tree-vect-patterns.c (check_bool_pattern): Check fails
if we can vectorize comparison directly.
(search_type_for_mask): New.
(vect_recog_bool_pattern): Support cases when bool pattern
check fails.
* tree-vect-slp.c (vect_build_slp_tree_1): Allow
comparison statements.
(vect_get_constant_vectors): Support boolean vector
constants.
* config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): New.
(ix86_expand_int_vec_cmp): New.
(ix86_expand_fp_vec_cmp): New.
* config/i386/i386.c (ix86_expand_sse_cmp): Allow NULL for
op_true and op_false.
(ix86_int_cmp_code_to_pcmp_immediate): New.
(ix86_fp_cmp_code_to_pcmp_immediate): New.
(ix86_cmp_code_to_pcmp_immediate): New.
(ix86_expand_mask_vec_cmp): New.
(ix86_expand_fp_vec_cmp): New.
(ix86_expand_int_sse_cmp): New.
(ix86_expand_int_vcond): Use ix86_expand_int_sse_cmp.
(ix86_expand_int_vec_cmp): New.
(ix86_get_mask_mode): New.
(TARGET_VECTORIZE_GET_MASK_MODE): New.
* config/i386/sse.md (avx512fmaskmodelower): New.
(vec_cmp<mode><avx512fmaskmodelower>): New.
(vec_cmp<mode><sseintvecmodelower>): New.
(vec_cmpv2div2di): New.
(vec_cmpu<mode><avx512fmaskmodelower>): New.
(vec_cmpu<mode><sseintvecmodelower>): New.
(vec_cmpuv2div2di): New.
gcc/testsuite/
* gcc.dg/vect/slp-cond-5.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@230098 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 66 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 386 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 153 | ||||
-rw-r--r-- | gcc/expr.c | 49 | ||||
-rw-r--r-- | gcc/optabs-query.h | 10 | ||||
-rw-r--r-- | gcc/optabs-tree.c | 13 | ||||
-rw-r--r-- | gcc/optabs-tree.h | 1 | ||||
-rw-r--r-- | gcc/optabs.c | 44 | ||||
-rw-r--r-- | gcc/optabs.def | 2 | ||||
-rw-r--r-- | gcc/optabs.h | 3 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/slp-cond-5.c | 81 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.c | 9 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 3 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 135 | ||||
-rw-r--r-- | gcc/tree-vect-patterns.c | 184 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 27 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 229 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 9 |
20 files changed, 1280 insertions, 131 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3bc856546c6..0141553d307 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,69 @@ +2015-11-10 Ilya Enkovich <enkovich.gnu@gmail.com> + + * expr.c (do_store_flag): Use expand_vec_cmp_expr for mask results. + (const_vector_mask_from_tree): New. + (const_vector_from_tree): Use const_vector_mask_from_tree + for boolean vectors. + * optabs-query.h (get_vec_cmp_icode): New. + * optabs-tree.c (expand_vec_cmp_expr_p): New. + * optabs-tree.h (expand_vec_cmp_expr_p): New. + * optabs.c (vector_compare_rtx): Add OPNO arg. + (expand_vec_cond_expr): Adjust to vector_compare_rtx change. + (expand_vec_cmp_expr): New. + * optabs.def (vec_cmp_optab): New. + (vec_cmpu_optab): New. + * optabs.h (expand_vec_cmp_expr): New. + * tree-vect-generic.c (expand_vector_comparison): Add vector + comparison optabs check. + * tree-vect-loop.c (vect_determine_vectorization_factor): Ignore mask + operations for VF. Add mask type computation. + * tree-vect-stmts.c (get_mask_type_for_scalar_type): New. + (vectorizable_comparison): New. + (vect_analyze_stmt): Add vectorizable_comparison. + (vect_transform_stmt): Likewise. + (vect_init_vector): Support boolean vector invariants. + (vect_get_vec_def_for_operand): Add VECTYPE arg. + (vectorizable_condition): Directly provide vectype for invariants + used in comparison. + * tree-vectorizer.h (get_mask_type_for_scalar_type): New. + (enum vect_var_kind): Add vect_mask_var. + (enum stmt_vec_info_type): Add comparison_vec_info_type. + (vectorizable_comparison): New. + (vect_get_vec_def_for_operand): Add VECTYPE arg. + * tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var. + (vect_create_destination_var): Likewise. + * tree-vect-patterns.c (check_bool_pattern): Check fails + if we can vectorize comparison directly. + (search_type_for_mask): New. + (vect_recog_bool_pattern): Support cases when bool pattern + check fails. + * tree-vect-slp.c (vect_build_slp_tree_1): Allow + comparison statements. + (vect_get_constant_vectors): Support boolean vector + constants. + * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): New. + (ix86_expand_int_vec_cmp): New. + (ix86_expand_fp_vec_cmp): New. + * config/i386/i386.c (ix86_expand_sse_cmp): Allow NULL for + op_true and op_false. + (ix86_int_cmp_code_to_pcmp_immediate): New. + (ix86_fp_cmp_code_to_pcmp_immediate): New. + (ix86_cmp_code_to_pcmp_immediate): New. + (ix86_expand_mask_vec_cmp): New. + (ix86_expand_fp_vec_cmp): New. + (ix86_expand_int_sse_cmp): New. + (ix86_expand_int_vcond): Use ix86_expand_int_sse_cmp. + (ix86_expand_int_vec_cmp): New. + (ix86_get_mask_mode): New. + (TARGET_VECTORIZE_GET_MASK_MODE): New. + * config/i386/sse.md (avx512fmaskmodelower): New. + (vec_cmp<mode><avx512fmaskmodelower>): New. + (vec_cmp<mode><sseintvecmodelower>): New. + (vec_cmpv2div2di): New. + (vec_cmpu<mode><avx512fmaskmodelower>): New. + (vec_cmpu<mode><sseintvecmodelower>): New. + (vec_cmpuv2div2di): New. + 2015-11-10 Richard Biener <rguenther@suse.de> PR tree-optimization/68240 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 026b778f4e3..9e20714099d 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -129,6 +129,9 @@ extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_vec_perm (rtx[]); extern bool ix86_expand_vec_perm_const (rtx[]); +extern bool ix86_expand_mask_vec_cmp (rtx[]); +extern bool ix86_expand_int_vec_cmp (rtx[]); +extern bool ix86_expand_fp_vec_cmp (rtx[]); extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool); extern bool ix86_expand_int_addcc (rtx[]); extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e93f7a0b41a..f6c17dfd405 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22582,8 +22582,8 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); if (optimize - || reg_overlap_mentioned_p (dest, op_true) - || reg_overlap_mentioned_p (dest, op_false)) + || (op_true && reg_overlap_mentioned_p (dest, op_true)) + || (op_false && reg_overlap_mentioned_p (dest, op_false))) dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); /* Compare patterns for int modes are unspec in AVX512F only. */ @@ -22644,6 +22644,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) rtx t2, t3, x; + /* If we have an integer mask and FP value then we need + to cast mask to FP mode. */ + if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) + { + cmp = force_reg (cmpmode, cmp); + cmp = gen_rtx_SUBREG (mode, cmp, 0); + } + if (vector_all_ones_operand (op_true, mode) && rtx_equal_p (op_false, CONST0_RTX (mode)) && !maskcmp) @@ -22855,34 +22863,127 @@ ix86_expand_fp_movcc (rtx operands[]) return true; } -/* Expand a floating-point vector conditional move; a vcond operation - rather than a movcc operation. */ +/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ + +static int +ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0; + case LT: + case LTU: + return 1; + case LE: + case LEU: + return 2; + case NE: + return 4; + case GE: + case GEU: + return 5; + case GT: + case GTU: + return 6; + default: + gcc_unreachable (); + } +} + +/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ + +static int +ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0x08; + case NE: + return 0x04; + case GT: + return 0x16; + case LE: + return 0x1a; + case GE: + return 0x15; + case LT: + return 0x19; + default: + gcc_unreachable (); + } +} + +/* Return immediate value to be used in UNSPEC_PCMP + for comparison CODE in MODE. */ + +static int +ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) +{ + if (FLOAT_MODE_P (mode)) + return ix86_fp_cmp_code_to_pcmp_immediate (code); + return ix86_int_cmp_code_to_pcmp_immediate (code); +} + +/* Expand AVX-512 vector comparison. */ bool -ix86_expand_fp_vcond (rtx operands[]) +ix86_expand_mask_vec_cmp (rtx operands[]) { - enum rtx_code code = GET_CODE (operands[3]); + machine_mode mask_mode = GET_MODE (operands[0]); + machine_mode cmp_mode = GET_MODE (operands[2]); + enum rtx_code code = GET_CODE (operands[1]); + rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); + int unspec_code; + rtx unspec; + + switch (code) + { + case LEU: + case GTU: + case GEU: + case LTU: + unspec_code = UNSPEC_UNSIGNED_PCMP; + default: + unspec_code = UNSPEC_PCMP; + } + + unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], + operands[3], imm), + unspec_code); + emit_insn (gen_rtx_SET (operands[0], unspec)); + + return true; +} + +/* Expand fp vector comparison. */ + +bool +ix86_expand_fp_vec_cmp (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); rtx cmp; code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &operands[4], &operands[5]); + &operands[2], &operands[3]); if (code == UNKNOWN) { rtx temp; - switch (GET_CODE (operands[3])) + switch (GET_CODE (operands[1])) { case LTGT: - temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], - operands[5], operands[1], operands[2]); + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], + operands[3], NULL, NULL); code = AND; break; case UNEQ: - temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], - operands[5], operands[1], operands[2]); + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], + operands[3], NULL, NULL); code = IOR; break; default: @@ -22890,72 +22991,26 @@ ix86_expand_fp_vcond (rtx operands[]) } cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, OPTAB_DIRECT); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); - return true; } + else + cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], + operands[1], operands[2]); - if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], - operands[5], operands[1], operands[2])) - return true; + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); return true; } -/* Expand a signed/unsigned integral vector conditional move. */ - -bool -ix86_expand_int_vcond (rtx operands[]) +static rtx +ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, + rtx op_true, rtx op_false, bool *negate) { - machine_mode data_mode = GET_MODE (operands[0]); - machine_mode mode = GET_MODE (operands[4]); - enum rtx_code code = GET_CODE (operands[3]); - bool negate = false; - rtx x, cop0, cop1; + machine_mode data_mode = GET_MODE (dest); + machine_mode mode = GET_MODE (cop0); + rtx x; - cop0 = operands[4]; - cop1 = operands[5]; - - /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 - and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ - if ((code == LT || code == GE) - && data_mode == mode - && cop1 == CONST0_RTX (mode) - && operands[1 + (code == LT)] == CONST0_RTX (data_mode) - && GET_MODE_UNIT_SIZE (data_mode) > 1 - && GET_MODE_UNIT_SIZE (data_mode) <= 8 - && (GET_MODE_SIZE (data_mode) == 16 - || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) - { - rtx negop = operands[2 - (code == LT)]; - int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; - if (negop == CONST1_RTX (data_mode)) - { - rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), - operands[0], 1, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - else if (GET_MODE_INNER (data_mode) != DImode - && vector_all_ones_operand (negop, data_mode)) - { - rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), - operands[0], 0, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - } - - if (!nonimmediate_operand (cop1, mode)) - cop1 = force_reg (mode, cop1); - if (!general_operand (operands[1], data_mode)) - operands[1] = force_reg (data_mode, operands[1]); - if (!general_operand (operands[2], data_mode)) - operands[2] = force_reg (data_mode, operands[2]); + *negate = false; /* XOP supports all of the comparisons on all 128-bit vector int types. */ if (TARGET_XOP @@ -22976,13 +23031,13 @@ ix86_expand_int_vcond (rtx operands[]) case LE: case LEU: code = reverse_condition (code); - negate = true; + *negate = true; break; case GE: case GEU: code = reverse_condition (code); - negate = true; + *negate = true; /* FALLTHRU */ case LT: @@ -23003,14 +23058,14 @@ ix86_expand_int_vcond (rtx operands[]) case EQ: /* SSE4.1 supports EQ. */ if (!TARGET_SSE4_1) - return false; + return NULL; break; case GT: case GTU: /* SSE4.2 supports GT/GTU. */ if (!TARGET_SSE4_2) - return false; + return NULL; break; default: @@ -23071,12 +23126,13 @@ ix86_expand_int_vcond (rtx operands[]) case V8HImode: /* Perform a parallel unsigned saturating subtraction. */ x = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1))); + emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, + cop1))); cop0 = x; cop1 = CONST0_RTX (mode); code = EQ; - negate = !negate; + *negate = !*negate; break; default: @@ -23085,22 +23141,162 @@ ix86_expand_int_vcond (rtx operands[]) } } + if (*negate) + std::swap (op_true, op_false); + /* Allow the comparison to be done in one mode, but the movcc to happen in another mode. */ if (data_mode == mode) { - x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, - operands[1+negate], operands[2-negate]); + x = ix86_expand_sse_cmp (dest, code, cop0, cop1, + op_true, op_false); } else { gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, - operands[1+negate], operands[2-negate]); + op_true, op_false); if (GET_MODE (x) == mode) x = gen_lowpart (data_mode, x); } + return x; +} + +/* Expand integer vector comparison. */ + +bool +ix86_expand_int_vec_cmp (rtx operands[]) +{ + rtx_code code = GET_CODE (operands[1]); + bool negate = false; + rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], + operands[3], NULL, NULL, &negate); + + if (!cmp) + return false; + + if (negate) + cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, + CONST0_RTX (GET_MODE (cmp)), + NULL, NULL, &negate); + + gcc_assert (!negate); + + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); + + return true; +} + +/* Expand a floating-point vector conditional move; a vcond operation + rather than a movcc operation. */ + +bool +ix86_expand_fp_vcond (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[4], &operands[5]); + if (code == UNKNOWN) + { + rtx temp; + switch (GET_CODE (operands[3])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], + operands[5], operands[1], operands[2]); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], + operands[5], operands[1], operands[2]); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; + } + + if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], + operands[5], operands[1], operands[2])) + return true; + + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + +/* Expand a signed/unsigned integral vector conditional move. */ + +bool +ix86_expand_int_vcond (rtx operands[]) +{ + machine_mode data_mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[4]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate = false; + rtx x, cop0, cop1; + + cop0 = operands[4]; + cop1 = operands[5]; + + /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 + and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ + if ((code == LT || code == GE) + && data_mode == mode + && cop1 == CONST0_RTX (mode) + && operands[1 + (code == LT)] == CONST0_RTX (data_mode) + && GET_MODE_UNIT_SIZE (data_mode) > 1 + && GET_MODE_UNIT_SIZE (data_mode) <= 8 + && (GET_MODE_SIZE (data_mode) == 16 + || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) + { + rtx negop = operands[2 - (code == LT)]; + int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; + if (negop == CONST1_RTX (data_mode)) + { + rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), + operands[0], 1, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + else if (GET_MODE_INNER (data_mode) != DImode + && vector_all_ones_operand (negop, data_mode)) + { + rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), + operands[0], 0, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + } + + if (!nonimmediate_operand (cop1, mode)) + cop1 = force_reg (mode, cop1); + if (!general_operand (operands[1], data_mode)) + operands[1] = force_reg (data_mode, operands[1]); + if (!general_operand (operands[2], data_mode)) + operands[2] = force_reg (data_mode, operands[2]); + + x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, + operands[1], operands[2], &negate); + + if (!x) + return false; + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], operands[2-negate]); return true; @@ -53085,6 +53281,28 @@ ix86_autovectorize_vector_sizes (void) (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0; } +/* Implemenation of targetm.vectorize.get_mask_mode. */ + +static machine_mode +ix86_get_mask_mode (unsigned nunits, unsigned vector_size) +{ + unsigned elem_size = vector_size / nunits; + + /* Scalar mask case. */ + if (TARGET_AVX512F && vector_size == 64) + { + if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW) + return smallest_mode_for_size (nunits, MODE_INT); + } + + machine_mode elem_mode + = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT); + + gcc_assert (elem_size * nunits == vector_size); + + return mode_for_vector (elem_mode, nunits); +} + /* Return class of registers which could be used for pseudo of MODE @@ -54096,6 +54314,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as) #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ ix86_autovectorize_vector_sizes +#undef TARGET_VECTORIZE_GET_MASK_MODE +#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode #undef TARGET_VECTORIZE_INIT_COST #define TARGET_VECTORIZE_INIT_COST ix86_init_cost #undef TARGET_VECTORIZE_ADD_STMT_COST diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 43dcc6a17e2..57feb27798b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -613,6 +613,15 @@ (V16SF "HI") (V8SF "QI") (V4SF "QI") (V8DF "QI") (V4DF "QI") (V2DF "QI")]) +;; Mapping of vector modes to corresponding mask size +(define_mode_attr avx512fmaskmodelower + [(V64QI "di") (V32QI "si") (V16QI "hi") + (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi") + (V16SI "hi") (V8SI "qi") (V4SI "qi") + (V8DI "qi") (V4DI "qi") (V2DI "qi") + (V16SF "hi") (V8SF "qi") (V4SF "qi") + (V8DF "qi") (V4DF "qi") (V2DF "qi")]) + ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr sseintvecmode [(V16SF "V16SI") (V8DF "V8DI") @@ -2811,6 +2820,150 @@ (const_string "0"))) (set_attr "mode" "<MODE>")]) +(define_expand "vec_cmp<mode><avx512fmaskmodelower>" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (match_operator:<avx512fmaskmode> 1 "" + [(match_operand:V48_AVX512VL 2 "register_operand") + (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512F" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp<mode><avx512fmaskmodelower>" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (match_operator:<avx512fmaskmode> 1 "" + [(match_operand:VI12_AVX512VL 2 "register_operand") + (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512BW" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp<mode><sseintvecmodelower>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (match_operator:<sseintvecmode> 1 "" + [(match_operand:VI_256 2 "register_operand") + (match_operand:VI_256 3 "nonimmediate_operand")]))] + "TARGET_AVX2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp<mode><sseintvecmodelower>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (match_operator:<sseintvecmode> 1 "" + [(match_operand:VI124_128 2 "register_operand") + (match_operand:VI124_128 3 "nonimmediate_operand")]))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpv2div2di" + [(set (match_operand:V2DI 0 "register_operand") + (match_operator:V2DI 1 "" + [(match_operand:V2DI 2 "register_operand") + (match_operand:V2DI 3 "nonimmediate_operand")]))] + "TARGET_SSE4_2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp<mode><sseintvecmodelower>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (match_operator:<sseintvecmode> 1 "" + [(match_operand:VF_256 2 "register_operand") + (match_operand:VF_256 3 "nonimmediate_operand")]))] + "TARGET_AVX" +{ + bool ok = ix86_expand_fp_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp<mode><sseintvecmodelower>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (match_operator:<sseintvecmode> 1 "" + [(match_operand:VF_128 2 "register_operand") + (match_operand:VF_128 3 "nonimmediate_operand")]))] + "TARGET_SSE" +{ + bool ok = ix86_expand_fp_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu<mode><avx512fmaskmodelower>" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (match_operator:<avx512fmaskmode> 1 "" + [(match_operand:VI48_AVX512VL 2 "register_operand") + (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512F" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu<mode><avx512fmaskmodelower>" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (match_operator:<avx512fmaskmode> 1 "" + [(match_operand:VI12_AVX512VL 2 "register_operand") + (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512BW" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu<mode><sseintvecmodelower>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (match_operator:<sseintvecmode> 1 "" + [(match_operand:VI_256 2 "register_operand") + (match_operand:VI_256 3 "nonimmediate_operand")]))] + "TARGET_AVX2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu<mode><sseintvecmodelower>" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (match_operator:<sseintvecmode> 1 "" + [(match_operand:VI124_128 2 "register_operand") + (match_operand:VI124_128 3 "nonimmediate_operand")]))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpuv2div2di" + [(set (match_operand:V2DI 0 "register_operand") + (match_operator:V2DI 1 "" + [(match_operand:V2DI 2 "register_operand") + (match_operand:V2DI 3 "nonimmediate_operand")]))] + "TARGET_SSE4_2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcond<V_512:mode><VF_512:mode>" [(set (match_operand:V_512 0 "register_operand") (if_then_else:V_512 diff --git a/gcc/expr.c b/gcc/expr.c index 050642daa4f..03936ee3ddb 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -11128,9 +11128,15 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) if (TREE_CODE (ops->type) == VECTOR_TYPE) { tree ifexp = build2 (ops->code, ops->type, arg0, arg1); - tree if_true = constant_boolean_node (true, ops->type); - tree if_false = constant_boolean_node (false, ops->type); - return expand_vec_cond_expr (ops->type, ifexp, if_true, if_false, target); + if (VECTOR_BOOLEAN_TYPE_P (ops->type)) + return expand_vec_cmp_expr (ops->type, ifexp, target); + else + { + tree if_true = constant_boolean_node (true, ops->type); + tree if_false = constant_boolean_node (false, ops->type); + return expand_vec_cond_expr (ops->type, ifexp, if_true, + if_false, target); + } } /* Get the rtx comparison code to use. We know that EXP is a comparison @@ -11417,6 +11423,40 @@ try_tablejump (tree index_type, tree index_expr, tree minval, tree range, return 1; } +/* Return a CONST_VECTOR rtx representing vector mask for + a VECTOR_CST of booleans. */ +static rtx +const_vector_mask_from_tree (tree exp) +{ + rtvec v; + unsigned i; + int units; + tree elt; + machine_mode inner, mode; + + mode = TYPE_MODE (TREE_TYPE (exp)); + units = GET_MODE_NUNITS (mode); + inner = GET_MODE_INNER (mode); + + v = rtvec_alloc (units); + + for (i = 0; i < VECTOR_CST_NELTS (exp); ++i) + { + elt = VECTOR_CST_ELT (exp, i); + + gcc_assert (TREE_CODE (elt) == INTEGER_CST); + if (integer_zerop (elt)) + RTVEC_ELT (v, i) = CONST0_RTX (inner); + else if (integer_onep (elt) + || integer_minus_onep (elt)) + RTVEC_ELT (v, i) = CONSTM1_RTX (inner); + else + gcc_unreachable (); + } + + return gen_rtx_CONST_VECTOR (mode, v); +} + /* Return a CONST_VECTOR rtx for a VECTOR_CST tree. */ static rtx const_vector_from_tree (tree exp) @@ -11432,6 +11472,9 @@ const_vector_from_tree (tree exp) if (initializer_zerop (exp)) return CONST0_RTX (mode); + if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp))) + return const_vector_mask_from_tree (exp); + units = GET_MODE_NUNITS (mode); inner = GET_MODE_INNER (mode); diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 73f272946a7..81ac3627c51 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -74,6 +74,16 @@ trapv_binoptab_p (optab binoptab) || binoptab == smulv_optab); } +/* Return insn code for a comparison operator with VMODE + resultin MASK_MODE, unsigned if UNS is true. */ + +static inline enum insn_code +get_vec_cmp_icode (machine_mode vmode, machine_mode mask_mode, bool uns) +{ + optab tab = uns ? vec_cmpu_optab : vec_cmp_optab; + return convert_optab_handler (tab, vmode, mask_mode); +} + /* Return insn code for a conditional operator with a comparison in mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE. */ diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index 3b033382c18..aa863cfb941 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -320,6 +320,19 @@ supportable_convert_operation (enum tree_code code, return false; } +/* Return TRUE if appropriate vector insn is available + for vector comparison expr with vector type VALUE_TYPE + and resulting mask with MASK_TYPE. */ + +bool +expand_vec_cmp_expr_p (tree value_type, tree mask_type) +{ + enum insn_code icode = get_vec_cmp_icode (TYPE_MODE (value_type), + TYPE_MODE (mask_type), + TYPE_UNSIGNED (value_type)); + return (icode != CODE_FOR_nothing); +} + /* Return TRUE iff, appropriate vector insns are available for vector cond expr with vector type VALUE_TYPE and a comparison with operand vector types in CMP_OP_TYPE. */ diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h index bf6c9e31495..5b966ca92b8 100644 --- a/gcc/optabs-tree.h +++ b/gcc/optabs-tree.h @@ -39,6 +39,7 @@ optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype); optab scalar_reduc_to_vector (optab, const_tree); bool supportable_convert_operation (enum tree_code, tree, tree, tree *, enum tree_code *); +bool expand_vec_cmp_expr_p (tree, tree); bool expand_vec_cond_expr_p (tree, tree); void init_tree_optimization_optabs (tree); diff --git a/gcc/optabs.c b/gcc/optabs.c index 1e328a66f91..f9fbfde967d 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -5180,11 +5180,13 @@ get_rtx_code (enum tree_code tcode, bool unsignedp) } /* Return comparison rtx for COND. Use UNSIGNEDP to select signed or - unsigned operators. Do not generate compare instruction. */ + unsigned operators. OPNO holds an index of the first comparison + operand in insn with code ICODE. Do not generate compare instruction. */ static rtx vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1, - bool unsignedp, enum insn_code icode) + bool unsignedp, enum insn_code icode, + unsigned int opno) { struct expand_operand ops[2]; rtx rtx_op0, rtx_op1; @@ -5210,7 +5212,7 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1, create_input_operand (&ops[0], rtx_op0, m0); create_input_operand (&ops[1], rtx_op1, m1); - if (!maybe_legitimize_operands (icode, 4, 2, ops)) + if (!maybe_legitimize_operands (icode, opno, 2, ops)) gcc_unreachable (); return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value); } @@ -5465,7 +5467,7 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, if (icode == CODE_FOR_nothing) return 0; - comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode); + comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, 4); rtx_op1 = expand_normal (op1); rtx_op2 = expand_normal (op2); @@ -5479,6 +5481,40 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, return ops[0].value; } +/* Generate insns for a vector comparison into a mask. */ + +rtx +expand_vec_cmp_expr (tree type, tree exp, rtx target) +{ + struct expand_operand ops[4]; + enum insn_code icode; + rtx comparison; + machine_mode mask_mode = TYPE_MODE (type); + machine_mode vmode; + bool unsignedp; + tree op0a, op0b; + enum tree_code tcode; + + op0a = TREE_OPERAND (exp, 0); + op0b = TREE_OPERAND (exp, 1); + tcode = TREE_CODE (exp); + + unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a)); + vmode = TYPE_MODE (TREE_TYPE (op0a)); + + icode = get_vec_cmp_icode (vmode, mask_mode, unsignedp); + if (icode == CODE_FOR_nothing) + return 0; + + comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, 2); + create_output_operand (&ops[0], target, mask_mode); + create_fixed_operand (&ops[1], comparison); + create_fixed_operand (&ops[2], XEXP (comparison, 0)); + create_fixed_operand (&ops[3], XEXP (comparison, 1)); + expand_insn (icode, 4, ops); + return ops[0].value; +} + /* Expand a highpart multiply. */ rtx diff --git a/gcc/optabs.def b/gcc/optabs.def index 6fad6d9711e..c057186c07a 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -61,6 +61,8 @@ OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b") OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b") OPTAB_CD(vcond_optab, "vcond$a$b") OPTAB_CD(vcondu_optab, "vcondu$a$b") +OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b") +OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b") OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(add_optab, "add$F$a3") diff --git a/gcc/optabs.h b/gcc/optabs.h index 5e6fe11ff00..78666f63b45 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -304,6 +304,9 @@ extern rtx_insn *gen_cond_trap (enum rtx_code, rtx, rtx, rtx); /* Generate code for VEC_PERM_EXPR. */ extern rtx expand_vec_perm (machine_mode, rtx, rtx, rtx, rtx); +/* Generate code for vector comparison. */ +extern rtx expand_vec_cmp_expr (tree, tree, rtx); + /* Generate code for VEC_COND_EXPR. */ extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f2a7d3f6f96..cc81cc409f8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-11-10 Ilya Enkovich <enkovich.gnu@gmail.com> + + * gcc.dg/vect/slp-cond-5.c: New test. + 2015-11-10 Richard Biener <rguenther@suse.de> PR tree-optimization/68240 diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-5.c b/gcc/testsuite/gcc.dg/vect/slp-cond-5.c new file mode 100644 index 00000000000..5ade7d1fbad --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-cond-5.c @@ -0,0 +1,81 @@ +/* { dg-require-effective-target vect_condition } */ + +#include "tree-vect.h" + +#define N 128 + +static inline int +foo (int x, int y, int a, int b) +{ + if (x >= y && a > b) + return a; + else + return b; +} + +__attribute__((noinline, noclone)) void +bar (int * __restrict__ a, int * __restrict__ b, + int * __restrict__ c, int * __restrict__ d, + int * __restrict__ e, int w) +{ + int i; + for (i = 0; i < N/16; i++, a += 16, b += 16, c += 16, d += 16, e += 16) + { + e[0] = foo (c[0], d[0], a[0] * w, b[0] * w); + e[1] = foo (c[1], d[1], a[1] * w, b[1] * w); + e[2] = foo (c[2], d[2], a[2] * w, b[2] * w); + e[3] = foo (c[3], d[3], a[3] * w, b[3] * w); + e[4] = foo (c[4], d[4], a[4] * w, b[4] * w); + e[5] = foo (c[5], d[5], a[5] * w, b[5] * w); + e[6] = foo (c[6], d[6], a[6] * w, b[6] * w); + e[7] = foo (c[7], d[7], a[7] * w, b[7] * w); + e[8] = foo (c[8], d[8], a[8] * w, b[8] * w); + e[9] = foo (c[9], d[9], a[9] * w, b[9] * w); + e[10] = foo (c[10], d[10], a[10] * w, b[10] * w); + e[11] = foo (c[11], d[11], a[11] * w, b[11] * w); + e[12] = foo (c[12], d[12], a[12] * w, b[12] * w); + e[13] = foo (c[13], d[13], a[13] * w, b[13] * w); + e[14] = foo (c[14], d[14], a[14] * w, b[14] * w); + e[15] = foo (c[15], d[15], a[15] * w, b[15] * w); + } +} + + +int a[N], b[N], c[N], d[N], e[N]; + +int main () +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + a[i] = i; + b[i] = 5; + e[i] = 0; + + switch (i % 9) + { + case 0: asm (""); c[i] = i; d[i] = i + 1; break; + case 1: c[i] = 0; d[i] = 0; break; + case 2: c[i] = i + 1; d[i] = i - 1; break; + case 3: c[i] = i; d[i] = i + 7; break; + case 4: c[i] = i; d[i] = i; break; + case 5: c[i] = i + 16; d[i] = i + 3; break; + case 6: c[i] = i - 5; d[i] = i; break; + case 7: c[i] = i; d[i] = i; break; + case 8: c[i] = i; d[i] = i - 7; break; + } + } + + bar (a, b, c, d, e, 2); + for (i = 0; i < N; i++) + if (e[i] != ((i % 3) == 0 || i <= 5 ? 10 : 2 * i)) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */ + diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 77313ec4e1a..87936ddee1d 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3863,6 +3863,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) case vect_scalar_var: prefix = "stmp"; break; + case vect_mask_var: + prefix = "mask"; + break; case vect_pointer_var: prefix = "vectp"; break; @@ -4452,7 +4455,11 @@ vect_create_destination_var (tree scalar_dest, tree vectype) tree type; enum vect_var_kind kind; - kind = vectype ? vect_simple_var : vect_scalar_var; + kind = vectype + ? VECTOR_BOOLEAN_TYPE_P (vectype) + ? vect_mask_var + : vect_simple_var + : vect_scalar_var; type = vectype ? vectype : TREE_TYPE (scalar_dest); gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index f82b6696a34..5883db8abd6 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -346,7 +346,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, tree op1, enum tree_code code) { tree t; - if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0))) + if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type) + && !expand_vec_cond_expr_p (type, TREE_TYPE (op0))) t = expand_vector_piecewise (gsi, do_compare, type, TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); else diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 43ada18ac7c..cbf0073ffcf 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -178,19 +178,21 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; + unsigned nbbs = loop->num_nodes; unsigned int vectorization_factor = 0; tree scalar_type; gphi *phi; tree vectype; unsigned int nunits; stmt_vec_info stmt_info; - int i; + unsigned i; HOST_WIDE_INT dummy; gimple *stmt, *pattern_stmt = NULL; gimple_seq pattern_def_seq = NULL; gimple_stmt_iterator pattern_def_si = gsi_none (); bool analyze_pattern_stmt = false; + bool bool_result; + auto_vec<stmt_vec_info> mask_producers; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -409,6 +411,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) return false; } + bool_result = false; + if (STMT_VINFO_VECTYPE (stmt_info)) { /* The only case when a vectype had been already set is for stmts @@ -429,6 +433,32 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); else scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); + + /* Bool ops don't participate in vectorization factor + computation. For comparison use compared types to + compute a factor. */ + if (TREE_CODE (scalar_type) == BOOLEAN_TYPE) + { + mask_producers.safe_push (stmt_info); + bool_result = true; + + if (gimple_code (stmt) == GIMPLE_ASSIGN + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) + == tcc_comparison + && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) + != BOOLEAN_TYPE) + scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); + else + { + if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) + { + pattern_def_seq = NULL; + gsi_next (&si); + } + continue; + } + } + if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, @@ -451,7 +481,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) return false; } - STMT_VINFO_VECTYPE (stmt_info) = vectype; + if (!bool_result) + STMT_VINFO_VECTYPE (stmt_info) = vectype; if (dump_enabled_p ()) { @@ -464,8 +495,9 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) /* The vectorization factor is according to the smallest scalar type (or the largest vector size, but we only support one vector size per loop). */ - scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, - &dummy); + if (!bool_result) + scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, + &dummy); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, @@ -540,6 +572,99 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + for (i = 0; i < mask_producers.length (); i++) + { + tree mask_type = NULL; + + stmt = STMT_VINFO_STMT (mask_producers[i]); + + if (gimple_code (stmt) == GIMPLE_ASSIGN + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison + && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE) + { + scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); + mask_type = get_mask_type_for_scalar_type (scalar_type); + + if (!mask_type) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: unsupported mask\n"); + return false; + } + } + else + { + tree rhs; + ssa_op_iter iter; + gimple *def_stmt; + enum vect_def_type dt; + + FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE) + { + if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo, + &def_stmt, &dt, &vectype)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: can't compute mask type " + "for statement, "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, + 0); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } + + /* No vectype probably means external definition. + Allow it in case there is another operand which + allows to determine mask type. */ + if (!vectype) + continue; + + if (!mask_type) + mask_type = vectype; + else if (TYPE_VECTOR_SUBPARTS (mask_type) + != TYPE_VECTOR_SUBPARTS (vectype)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: different sized masks " + "types in statement, "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + mask_type); + dump_printf (MSG_MISSED_OPTIMIZATION, " and "); + dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + vectype); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } + } + } + + /* No mask_type should mean loop invariant predicate. + This is probably a subject for optimization in + if-conversion. */ + if (!mask_type) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: can't compute mask type " + "for statement, "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, + 0); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } + + STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type; + } + return true; } diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index d003d335f5d..e91c6e008a0 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -2849,7 +2849,9 @@ vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in, /* Helper function of vect_recog_bool_pattern. Called recursively, return - true if bool VAR can be optimized that way. */ + true if bool VAR can and should be optimized that way. Assume it shouldn't + in case it's a result of a comparison which can be directly vectorized into + a vector comparison. */ static bool check_bool_pattern (tree var, vec_info *vinfo) @@ -2898,7 +2900,7 @@ check_bool_pattern (tree var, vec_info *vinfo) default: if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) { - tree vecitype, comp_vectype; + tree vecitype, comp_vectype, mask_type; /* If the comparison can throw, then is_gimple_condexpr will be false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */ @@ -2909,6 +2911,11 @@ check_bool_pattern (tree var, vec_info *vinfo) if (comp_vectype == NULL_TREE) return false; + mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1)); + if (mask_type + && expand_vec_cmp_expr_p (comp_vectype, mask_type)) + return false; + if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) { machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); @@ -3133,6 +3140,73 @@ adjust_bool_pattern (tree var, tree out_type, tree trueval, } +/* Return the proper type for converting bool VAR into + an integer value or NULL_TREE if no such type exists. + The type is chosen so that converted value has the + same number of elements as VAR's vector type. */ + +static tree +search_type_for_mask (tree var, vec_info *vinfo) +{ + gimple *def_stmt; + enum vect_def_type dt; + tree rhs1; + enum tree_code rhs_code; + tree res = NULL_TREE; + + if (TREE_CODE (var) != SSA_NAME) + return NULL_TREE; + + if ((TYPE_PRECISION (TREE_TYPE (var)) != 1 + || !TYPE_UNSIGNED (TREE_TYPE (var))) + && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE) + return NULL_TREE; + + if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt)) + return NULL_TREE; + + if (dt != vect_internal_def) + return NULL_TREE; + + if (!is_gimple_assign (def_stmt)) + return NULL_TREE; + + rhs_code = gimple_assign_rhs_code (def_stmt); + rhs1 = gimple_assign_rhs1 (def_stmt); + + switch (rhs_code) + { + case SSA_NAME: + case BIT_NOT_EXPR: + CASE_CONVERT: + res = search_type_for_mask (rhs1, vinfo); + break; + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (!(res = search_type_for_mask (rhs1, vinfo))) + res = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo); + break; + + default: + if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) + { + if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE + || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) + { + machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); + res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); + } + else + res = TREE_TYPE (rhs1); + } + } + + return res; +} + + /* Function vect_recog_bool_pattern Try to find pattern like following: @@ -3190,6 +3264,7 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in, enum tree_code rhs_code; tree var, lhs, rhs, vectype; stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info new_stmt_info; vec_info *vinfo = stmt_vinfo->vinfo; gimple *pattern_stmt; @@ -3214,16 +3289,52 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in, if (vectype == NULL_TREE) return NULL; - if (!check_bool_pattern (var, vinfo)) - return NULL; - - rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts); - lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); - if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) - pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); + if (check_bool_pattern (var, vinfo)) + { + rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts); + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) + pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); + else + pattern_stmt + = gimple_build_assign (lhs, NOP_EXPR, rhs); + } else - pattern_stmt - = gimple_build_assign (lhs, NOP_EXPR, rhs); + { + tree type = search_type_for_mask (var, vinfo); + tree cst0, cst1, cmp, tmp; + + if (!type) + return NULL; + + /* We may directly use cond with narrowed type to avoid + multiple cond exprs with following result packing and + perform single cond with packed mask instead. In case + of widening we better make cond first and then extract + results. */ + if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs))) + type = TREE_TYPE (lhs); + + cst0 = build_int_cst (type, 0); + cst1 = build_int_cst (type, 1); + tmp = vect_recog_temp_ssa_var (type, NULL); + cmp = build2 (NE_EXPR, boolean_type_node, + var, build_int_cst (TREE_TYPE (var), 0)); + pattern_stmt = gimple_build_assign (tmp, COND_EXPR, cmp, cst1, cst0); + + if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) + { + tree new_vectype = get_vectype_for_scalar_type (type); + new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); + set_vinfo_for_stmt (pattern_stmt, new_stmt_info); + STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype; + new_pattern_def_seq (stmt_vinfo, pattern_stmt); + + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp); + } + } + *type_out = vectype; *type_in = vectype; stmts->safe_push (last_stmt); @@ -3252,15 +3363,19 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in, if (get_vectype_for_scalar_type (type) == NULL_TREE) return NULL; - if (!check_bool_pattern (var, vinfo)) - return NULL; + if (check_bool_pattern (var, vinfo)) + { + rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts); + rhs = build2 (NE_EXPR, boolean_type_node, + rhs, build_int_cst (type, 0)); + } + else + rhs = build2 (NE_EXPR, boolean_type_node, + var, build_int_cst (TREE_TYPE (var), 0)), - rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts); lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); pattern_stmt - = gimple_build_assign (lhs, COND_EXPR, - build2 (NE_EXPR, boolean_type_node, - rhs, build_int_cst (type, 0)), + = gimple_build_assign (lhs, COND_EXPR, rhs, gimple_assign_rhs2 (last_stmt), gimple_assign_rhs3 (last_stmt)); *type_out = vectype; @@ -3280,16 +3395,43 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in, gcc_assert (vectype != NULL_TREE); if (!VECTOR_MODE_P (TYPE_MODE (vectype))) return NULL; - if (!check_bool_pattern (var, vinfo)) - return NULL; - rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); + if (check_bool_pattern (var, vinfo)) + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), + NULL_TREE, stmts); + else + { + tree type = search_type_for_mask (var, vinfo); + tree cst0, cst1, cmp, new_vectype; + + if (!type) + return NULL; + + if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype))) + type = TREE_TYPE (vectype); + + cst0 = build_int_cst (type, 0); + cst1 = build_int_cst (type, 1); + new_vectype = get_vectype_for_scalar_type (type); + + rhs = vect_recog_temp_ssa_var (type, NULL); + cmp = build2 (NE_EXPR, boolean_type_node, + var, build_int_cst (TREE_TYPE (var), 0)); + pattern_stmt = gimple_build_assign (rhs, COND_EXPR, + cmp, cst1, cst0); + + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); + STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype; + append_pattern_def_seq (stmt_vinfo, pattern_stmt); + } + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) { tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs); - new_pattern_def_seq (stmt_vinfo, cast_stmt); + append_pattern_def_seq (stmt_vinfo, cast_stmt); rhs = rhs2; } pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 6dda2ec9eae..fe838282777 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -792,6 +792,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, if (TREE_CODE_CLASS (rhs_code) != tcc_binary && TREE_CODE_CLASS (rhs_code) != tcc_unary && TREE_CODE_CLASS (rhs_code) != tcc_expression + && TREE_CODE_CLASS (rhs_code) != tcc_comparison && rhs_code != CALL_EXPR) { if (dump_enabled_p ()) @@ -2640,7 +2641,14 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, struct loop *loop; gimple_seq ctor_seq = NULL; - vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); + /* Check if vector type is a boolean vector. */ + if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE + && (VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)) + || (code == COND_EXPR && op_num < 2))) + vector_type + = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo)); + else + vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); nunits = TYPE_VECTOR_SUBPARTS (vector_type); if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def @@ -2812,8 +2820,21 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, { if (CONSTANT_CLASS_P (op)) { - op = fold_unary (VIEW_CONVERT_EXPR, - TREE_TYPE (vector_type), op); + if (VECTOR_BOOLEAN_TYPE_P (vector_type)) + { + /* Can't use VIEW_CONVERT_EXPR for booleans because + of possibly different sizes of scalar value and + vector element. */ + if (integer_zerop (op)) + op = build_int_cst (TREE_TYPE (vector_type), 0); + else if (integer_onep (op)) + op = build_int_cst (TREE_TYPE (vector_type), 1); + else + gcc_unreachable (); + } + else + op = fold_unary (VIEW_CONVERT_EXPR, + TREE_TYPE (vector_type), op); gcc_assert (op && CONSTANT_CLASS_P (op)); } else diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 51dff9eb19e..af203ab438c 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1300,7 +1300,7 @@ vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi) if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val))) { if (CONSTANT_CLASS_P (val)) - val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val); + val = fold_convert (TREE_TYPE (type), val); else { new_temp = make_ssa_name (TREE_TYPE (type)); @@ -1328,16 +1328,18 @@ vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi) STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. In case OP is an invariant or constant, a new stmt that creates a vector def - needs to be introduced. */ + needs to be introduced. VECTYPE may be used to specify a required type for + vector invariant. */ tree -vect_get_vec_def_for_operand (tree op, gimple *stmt) +vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype) { tree vec_oprnd; gimple *vec_stmt; gimple *def_stmt; stmt_vec_info def_stmt_info = NULL; stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); enum vect_def_type dt; bool is_simple_use; @@ -1372,7 +1374,14 @@ vect_get_vec_def_for_operand (tree op, gimple *stmt) case vect_constant_def: case vect_external_def: { - vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); + if (vectype) + vector_type = vectype; + else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE + && VECTOR_BOOLEAN_TYPE_P (stmt_vectype)) + vector_type = build_same_sized_truth_vector_type (stmt_vectype); + else + vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); + gcc_assert (vector_type); return vect_init_vector (stmt, op, vector_type, NULL); } @@ -7329,13 +7338,14 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, { gimple *gtemp; vec_cond_lhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt); + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), + stmt, comp_vectype); vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, >emp, &dts[0]); vec_cond_rhs = vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), - stmt); + stmt, comp_vectype); vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, >emp, &dts[1]); if (reduc_index == 1) @@ -7416,6 +7426,185 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, return true; } +/* vectorizable_comparison. + + Check if STMT is comparison expression that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + comparison, put it in VEC_STMT, and insert it at GSI. + + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi, + gimple **vec_stmt, tree reduc_def, + slp_tree slp_node) +{ + tree lhs, rhs1, rhs2; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE; + tree new_temp; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type}; + unsigned nunits; + int ncopies; + enum tree_code code; + stmt_vec_info prev_stmt_info = NULL; + int i, j; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + vec<tree> vec_oprnds0 = vNULL; + vec<tree> vec_oprnds1 = vNULL; + gimple *def_stmt; + tree mask_type; + tree mask; + + if (!VECTOR_BOOLEAN_TYPE_P (vectype)) + return false; + + mask_type = vectype; + nunits = TYPE_VECTOR_SUBPARTS (vectype); + + if (slp_node || PURE_SLP_STMT (stmt_info)) + ncopies = 1; + else + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + gcc_assert (ncopies >= 1); + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def + && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + && reduc_def)) + return false; + + if (STMT_VINFO_LIVE_P (stmt_info)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "value used after loop.\n"); + return false; + } + + if (!is_gimple_assign (stmt)) + return false; + + code = gimple_assign_rhs_code (stmt); + + if (TREE_CODE_CLASS (code) != tcc_comparison) + return false; + + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + + if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt, + &dts[0], &vectype1)) + return false; + + if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt, + &dts[1], &vectype2)) + return false; + + if (vectype1 && vectype2 + && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2)) + return false; + + vectype = vectype1 ? vectype1 : vectype2; + + /* Invariant comparison. */ + if (!vectype) + { + vectype = build_vector_type (TREE_TYPE (rhs1), nunits); + if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size) + return false; + } + else if (nunits != TYPE_VECTOR_SUBPARTS (vectype)) + return false; + + if (!vec_stmt) + { + STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; + vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL); + return expand_vec_cmp_expr_p (vectype, mask_type); + } + + /* Transform. */ + if (!slp_node) + { + vec_oprnds0.create (1); + vec_oprnds1.create (1); + } + + /* Handle def. */ + lhs = gimple_assign_lhs (stmt); + mask = vect_create_destination_var (lhs, mask_type); + + /* Handle cmp expr. */ + for (j = 0; j < ncopies; j++) + { + gassign *new_stmt = NULL; + if (j == 0) + { + if (slp_node) + { + auto_vec<tree, 2> ops; + auto_vec<vec<tree>, 2> vec_defs; + + ops.safe_push (rhs1); + ops.safe_push (rhs2); + vect_get_slp_defs (ops, slp_node, &vec_defs, -1); + vec_oprnds1 = vec_defs.pop (); + vec_oprnds0 = vec_defs.pop (); + } + else + { + vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, NULL); + vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, NULL); + } + } + else + { + vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0], + vec_oprnds0.pop ()); + vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1], + vec_oprnds1.pop ()); + } + + if (!slp_node) + { + vec_oprnds0.quick_push (vec_rhs1); + vec_oprnds1.quick_push (vec_rhs2); + } + + /* Arguments are ready. Create the new vector stmt. */ + FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1) + { + vec_rhs2 = vec_oprnds1[i]; + + new_temp = make_ssa_name (mask); + new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + } + + if (slp_node) + continue; + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + vec_oprnds0.release (); + vec_oprnds1.release (); + + return true; +} /* Make sure the statement is vectorizable. */ @@ -7619,7 +7808,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node) || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) || vectorizable_reduction (stmt, NULL, NULL, node) - || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) + || vectorizable_comparison (stmt, NULL, NULL, NULL, node)); else { if (bb_vinfo) @@ -7631,7 +7821,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node) || vectorizable_load (stmt, NULL, NULL, node, NULL) || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) - || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); + || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) + || vectorizable_comparison (stmt, NULL, NULL, NULL, node)); } if (!ok) @@ -7747,6 +7938,11 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi, gcc_assert (done); break; + case comparison_vec_info_type: + done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node); + gcc_assert (done); + break; + case call_vec_info_type: done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); stmt = gsi_stmt (*gsi); @@ -8080,6 +8276,23 @@ get_vectype_for_scalar_type (tree scalar_type) return vectype; } +/* Function get_mask_type_for_scalar_type. + + Returns the mask type corresponding to a result of comparison + of vectors of specified SCALAR_TYPE as supported by target. */ + +tree +get_mask_type_for_scalar_type (tree scalar_type) +{ + tree vectype = get_vectype_for_scalar_type (scalar_type); + + if (!vectype) + return NULL; + + return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype), + current_vector_size); +} + /* Function get_same_sized_vectype Returns a vector type corresponding to SCALAR_TYPE of size diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index fed80c81c3f..d890eb11d48 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see enum vect_var_kind { vect_simple_var, vect_pointer_var, - vect_scalar_var + vect_scalar_var, + vect_mask_var }; /* Defines type of operation. */ @@ -420,6 +421,7 @@ enum stmt_vec_info_type { call_simd_clone_vec_info_type, assignment_vec_info_type, condition_vec_info_type, + comparison_vec_info_type, reduc_vec_info_type, induc_vec_info_type, type_promotion_vec_info_type, @@ -944,6 +946,7 @@ extern bool vect_can_advance_ivs_p (loop_vec_info); /* In tree-vect-stmts.c. */ extern unsigned int current_vector_size; extern tree get_vectype_for_scalar_type (tree); +extern tree get_mask_type_for_scalar_type (tree); extern tree get_same_sized_vectype (tree, tree); extern bool vect_is_simple_use (tree, vec_info *, gimple **, enum vect_def_type *); @@ -975,7 +978,7 @@ extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, extern void vect_finish_stmt_generation (gimple *, gimple *, gimple_stmt_iterator *); extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); -extern tree vect_get_vec_def_for_operand (tree, gimple *); +extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL); extern tree vect_init_vector (gimple *, tree, tree, gimple_stmt_iterator *); extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); @@ -985,6 +988,8 @@ extern void vect_remove_stores (gimple *); extern bool vect_analyze_stmt (gimple *, bool *, slp_tree); extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *, gimple **, tree, int, slp_tree); +extern bool vectorizable_comparison (gimple *, gimple_stmt_iterator *, + gimple **, tree, int, slp_tree); extern void vect_get_load_cost (struct data_reference *, int, bool, unsigned int *, unsigned int *, stmt_vector_for_cost *, |