/* Lower complex number and vector operations to scalar operations. Copyright (C) 2004, 2005 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "config.h" #include "system.h" #include "coretypes.h" #include "tree.h" #include "tm.h" #include "rtl.h" #include "expr.h" #include "insn-codes.h" #include "diagnostic.h" #include "optabs.h" #include "machmode.h" #include "langhooks.h" #include "tree-flow.h" #include "tree-gimple.h" #include "tree-iterator.h" #include "tree-pass.h" #include "flags.h" #include "ggc.h" /* Extract the real or imaginary part of a complex variable or constant. Make sure that it's a proper gimple_val and gimplify it if not. Emit any new code before BSI. */ static tree extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p) { tree ret, inner_type; inner_type = TREE_TYPE (TREE_TYPE (t)); switch (TREE_CODE (t)) { case COMPLEX_CST: ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t)); break; case COMPLEX_EXPR: ret = TREE_OPERAND (t, imagpart_p); break; case VAR_DECL: case PARM_DECL: ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR), inner_type, t); break; default: gcc_unreachable (); } return gimplify_val (bsi, inner_type, ret); } /* Update an assignment to a complex variable in place. */ static void update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i) { tree stmt = bsi_stmt (*bsi); tree type; if (TREE_CODE (stmt) == RETURN_EXPR) stmt = TREE_OPERAND (stmt, 0); type = TREE_TYPE (TREE_OPERAND (stmt, 1)); TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i); mark_stmt_modified (stmt); } /* Expand complex addition to scalars: a + b = (ar + br) + i(ai + bi) a - b = (ar - br) + i(ai + bi) */ static void expand_complex_addition (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai, tree br, tree bi, enum tree_code code) { tree rr, ri; rr = gimplify_build2 (bsi, code, inner_type, ar, br); ri = gimplify_build2 (bsi, code, inner_type, ai, bi); update_complex_assignment (bsi, rr, ri); } /* Expand a complex multiplication or division to a libcall to the c99 compliant routines. */ static void expand_complex_libcall (block_stmt_iterator *bsi, tree ar, tree ai, tree br, tree bi, enum tree_code code) { enum machine_mode mode; enum built_in_function bcode; tree args, fn, stmt, type; args = tree_cons (NULL, bi, NULL); args = tree_cons (NULL, br, args); args = tree_cons (NULL, ai, args); args = tree_cons (NULL, ar, args); stmt = bsi_stmt (*bsi); type = TREE_TYPE (TREE_OPERAND (stmt, 1)); mode = TYPE_MODE (type); gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT); if (code == MULT_EXPR) bcode = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT; else if (code == RDIV_EXPR) bcode = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT; else gcc_unreachable (); fn = built_in_decls[bcode]; TREE_OPERAND (stmt, 1) = build3 (CALL_EXPR, type, build_fold_addr_expr (fn), args, NULL); update_stmt (stmt); } /* Expand complex multiplication to scalars: a * b = (ar*br - ai*bi) + i(ar*bi + br*ai) */ static void expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai, tree br, tree bi) { tree t1, t2, t3, t4, rr, ri; if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type)) { expand_complex_libcall (bsi, ar, ai, br, bi, MULT_EXPR); return; } t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br); t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi); t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi); /* Avoid expanding redundant multiplication for the common case of squaring a complex number. */ if (ar == br && ai == bi) t4 = t3; else t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br); rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2); ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4); update_complex_assignment (bsi, rr, ri); } /* Expand complex division to scalars, straightforward algorithm. a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t) t = br*br + bi*bi */ static void expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai, tree br, tree bi, enum tree_code code) { tree rr, ri, div, t1, t2, t3; t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br); t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi); div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br); t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi); t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2); rr = gimplify_build2 (bsi, code, inner_type, t3, div); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br); t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi); t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2); ri = gimplify_build2 (bsi, code, inner_type, t3, div); update_complex_assignment (bsi, rr, ri); } /* Expand complex division to scalars, modified algorithm to minimize overflow with wide input ranges. */ static void expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai, tree br, tree bi, enum tree_code code) { tree rr, ri, ratio, div, t1, t2, tr, ti, cond; basic_block bb_cond, bb_true, bb_false, bb_join; /* Examine |br| < |bi|, and branch. */ t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br); t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi); cond = fold (build (LT_EXPR, boolean_type_node, t1, t2)); STRIP_NOPS (cond); bb_cond = bb_true = bb_false = bb_join = NULL; rr = ri = tr = ti = NULL; if (!TREE_CONSTANT (cond)) { edge e; cond = build (COND_EXPR, void_type_node, cond, NULL, NULL); bsi_insert_before (bsi, cond, BSI_SAME_STMT); /* Split the original block, and create the TRUE and FALSE blocks. */ e = split_block (bsi->bb, cond); bb_cond = e->src; bb_join = e->dest; bb_true = create_empty_bb (bb_cond); bb_false = create_empty_bb (bb_true); t1 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_true)); t2 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_false)); COND_EXPR_THEN (cond) = t1; COND_EXPR_ELSE (cond) = t2; /* Wire the blocks together. */ e->flags = EDGE_TRUE_VALUE; redirect_edge_succ (e, bb_true); make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE); make_edge (bb_true, bb_join, EDGE_FALLTHRU); make_edge (bb_false, bb_join, EDGE_FALLTHRU); /* Update dominance info. Note that bb_join's data was updated by split_block. */ if (dom_info_available_p (CDI_DOMINATORS)) { set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond); set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond); } rr = make_rename_temp (inner_type, NULL); ri = make_rename_temp (inner_type, NULL); } /* In the TRUE branch, we compute ratio = br/bi; div = (br * ratio) + bi; tr = (ar * ratio) + ai; ti = (ai * ratio) - ar; tr = tr / div; ti = ti / div; */ if (bb_true || integer_nonzerop (cond)) { if (bb_true) { *bsi = bsi_last (bb_true); bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT); } ratio = gimplify_build2 (bsi, code, inner_type, br, bi); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, ratio); div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, bi); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio); tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ai); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio); ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, ar); tr = gimplify_build2 (bsi, code, inner_type, tr, div); ti = gimplify_build2 (bsi, code, inner_type, ti, div); if (bb_true) { t1 = build (MODIFY_EXPR, inner_type, rr, tr); bsi_insert_before (bsi, t1, BSI_SAME_STMT); t1 = build (MODIFY_EXPR, inner_type, ri, ti); bsi_insert_before (bsi, t1, BSI_SAME_STMT); bsi_remove (bsi); } } /* In the FALSE branch, we compute ratio = d/c; divisor = (d * ratio) + c; tr = (b * ratio) + a; ti = b - (a * ratio); tr = tr / div; ti = ti / div; */ if (bb_false || integer_zerop (cond)) { if (bb_false) { *bsi = bsi_last (bb_false); bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT); } ratio = gimplify_build2 (bsi, code, inner_type, bi, br); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, ratio); div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, br); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio); tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ar); t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio); ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1); tr = gimplify_build2 (bsi, code, inner_type, tr, div); ti = gimplify_build2 (bsi, code, inner_type, ti, div); if (bb_false) { t1 = build (MODIFY_EXPR, inner_type, rr, tr); bsi_insert_before (bsi, t1, BSI_SAME_STMT); t1 = build (MODIFY_EXPR, inner_type, ri, ti); bsi_insert_before (bsi, t1, BSI_SAME_STMT); bsi_remove (bsi); } } if (bb_join) *bsi = bsi_start (bb_join); else rr = tr, ri = ti; update_complex_assignment (bsi, rr, ri); } /* Expand complex division to scalars. */ static void expand_complex_division (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai, tree br, tree bi, enum tree_code code) { switch (flag_complex_method) { case 0: /* straightforward implementation of complex divide acceptable. */ expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code); break; case 2: if (SCALAR_FLOAT_TYPE_P (inner_type)) { expand_complex_libcall (bsi, ar, ai, br, bi, code); return; } /* FALLTHRU */ case 1: /* wide ranges of inputs must work for complex divide. */ expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code); break; default: gcc_unreachable (); } } /* Expand complex negation to scalars: -a = (-ar) + i(-ai) */ static void expand_complex_negation (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai) { tree rr, ri; rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar); ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai); update_complex_assignment (bsi, rr, ri); } /* Expand complex conjugate to scalars: ~a = (ar) + i(-ai) */ static void expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type, tree ar, tree ai) { tree ri; ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai); update_complex_assignment (bsi, ar, ri); } /* Expand complex comparison (EQ or NE only). */ static void expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai, tree br, tree bi, enum tree_code code) { tree cr, ci, cc, stmt, expr, type; cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br); ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi); cc = gimplify_build2 (bsi, (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR), boolean_type_node, cr, ci); stmt = expr = bsi_stmt (*bsi); switch (TREE_CODE (stmt)) { case RETURN_EXPR: expr = TREE_OPERAND (stmt, 0); /* FALLTHRU */ case MODIFY_EXPR: type = TREE_TYPE (TREE_OPERAND (expr, 1)); TREE_OPERAND (expr, 1) = fold_convert (type, cc); break; case COND_EXPR: TREE_OPERAND (stmt, 0) = cc; break; default: gcc_unreachable (); } mark_stmt_modified (stmt); } /* Process one statement. If we identify a complex operation, expand it. */ static void expand_complex_operations_1 (block_stmt_iterator *bsi) { tree stmt = bsi_stmt (*bsi); tree rhs, type, inner_type; tree ac, ar, ai, bc, br, bi; enum tree_code code; switch (TREE_CODE (stmt)) { case RETURN_EXPR: stmt = TREE_OPERAND (stmt, 0); if (!stmt) return; if (TREE_CODE (stmt) != MODIFY_EXPR) return; /* FALLTHRU */ case MODIFY_EXPR: rhs = TREE_OPERAND (stmt, 1); break; case COND_EXPR: rhs = TREE_OPERAND (stmt, 0); break; default: return; } type = TREE_TYPE (rhs); code = TREE_CODE (rhs); /* Initial filter for operations we handle. */ switch (code) { case PLUS_EXPR: case MINUS_EXPR: case MULT_EXPR: case TRUNC_DIV_EXPR: case CEIL_DIV_EXPR: case FLOOR_DIV_EXPR: case ROUND_DIV_EXPR: case RDIV_EXPR: case NEGATE_EXPR: case CONJ_EXPR: if (TREE_CODE (type) != COMPLEX_TYPE) return; inner_type = TREE_TYPE (type); break; case EQ_EXPR: case NE_EXPR: inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1)); if (TREE_CODE (inner_type) != COMPLEX_TYPE) return; break; default: return; } /* Extract the components of the two complex values. Make sure and handle the common case of the same value used twice specially. */ ac = TREE_OPERAND (rhs, 0); ar = extract_component (bsi, ac, 0); ai = extract_component (bsi, ac, 1); if (TREE_CODE_CLASS (code) == tcc_unary) bc = br = bi = NULL; else { bc = TREE_OPERAND (rhs, 1); if (ac == bc) br = ar, bi = ai; else { br = extract_component (bsi, bc, 0); bi = extract_component (bsi, bc, 1); } } switch (code) { case PLUS_EXPR: case MINUS_EXPR: expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code); break; case MULT_EXPR: expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi); break; case TRUNC_DIV_EXPR: case CEIL_DIV_EXPR: case FLOOR_DIV_EXPR: case ROUND_DIV_EXPR: case RDIV_EXPR: expand_complex_division (bsi, inner_type, ar, ai, br, bi, code); break; case NEGATE_EXPR: expand_complex_negation (bsi, inner_type, ar, ai); break; case CONJ_EXPR: expand_complex_conjugate (bsi, inner_type, ar, ai); break; case EQ_EXPR: case NE_EXPR: expand_complex_comparison (bsi, ar, ai, br, bi, code); break; default: gcc_unreachable (); } update_stmt_if_modified (stmt); } /* Build a constant of type TYPE, made of VALUE's bits replicated every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ static tree build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) { int width = tree_low_cst (TYPE_SIZE (inner_type), 1); int n = HOST_BITS_PER_WIDE_INT / width; unsigned HOST_WIDE_INT low, high, mask; tree ret; gcc_assert (n); if (width == HOST_BITS_PER_WIDE_INT) low = value; else { mask = ((HOST_WIDE_INT)1 << width) - 1; low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); } if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) high = 0; else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) high = low; else gcc_unreachable (); ret = build_int_cst_wide (type, low, high); return ret; } static GTY(()) tree vector_inner_type; static GTY(()) tree vector_last_type; static GTY(()) int vector_last_nunits; /* Return a suitable vector types made of SUBPARTS units each of mode "word_mode" (the global variable). */ static tree build_word_mode_vector_type (int nunits) { if (!vector_inner_type) vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); else if (vector_last_nunits == nunits) { gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); return vector_last_type; } /* We build a new type, but we canonicalize it nevertheless, because it still saves some memory. */ vector_last_nunits = nunits; vector_last_type = type_hash_canon (nunits, build_vector_type (vector_inner_type, nunits)); return vector_last_type; } typedef tree (*elem_op_func) (block_stmt_iterator *, tree, tree, tree, tree, tree, enum tree_code); static inline tree tree_vec_extract (block_stmt_iterator *bsi, tree type, tree t, tree bitsize, tree bitpos) { if (bitpos) return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos); /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will anyway be stored in memory, so prefer NOP_EXPR. */ else if (TYPE_MODE (type) == BLKmode) return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t); else return gimplify_build1 (bsi, NOP_EXPR, type, t); } static tree do_unop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, enum tree_code code) { a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); return gimplify_build1 (bsi, code, inner_type, a); } static tree do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b, tree bitpos, tree bitsize, enum tree_code code) { a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos); return gimplify_build2 (bsi, code, inner_type, a, b); } /* Expand vector addition to scalars. This does bit twiddling in order to increase parallelism: a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ (a ^ b) & 0x80808080 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ (a ^ ~b) & 0x80808080 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) This optimization should be done only if 4 vector items or more fit into a word. */ static tree do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b, tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, enum tree_code code) { tree inner_type = TREE_TYPE (TREE_TYPE (a)); unsigned HOST_WIDE_INT max; tree low_bits, high_bits, a_low, b_low, result_low, signs; max = GET_MODE_MASK (TYPE_MODE (inner_type)); low_bits = build_replicated_const (word_type, inner_type, max >> 1); high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos); b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b); b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); if (code == PLUS_EXPR) a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits); else { a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits); signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs); } signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low); return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); } static tree do_negate (block_stmt_iterator *bsi, tree word_type, tree b, tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, enum tree_code code ATTRIBUTE_UNUSED) { tree inner_type = TREE_TYPE (TREE_TYPE (b)); HOST_WIDE_INT max; tree low_bits, high_bits, b_low, result_low, signs; max = GET_MODE_MASK (TYPE_MODE (inner_type)); low_bits = build_replicated_const (word_type, inner_type, max >> 1); high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b); signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low); return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); } /* Expand a vector operation to scalars, by using many operations whose type is the vector type's inner type. */ static tree expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f, tree type, tree inner_type, tree a, tree b, enum tree_code code) { tree head, *chain = &head; tree part_width = TYPE_SIZE (inner_type); tree index = bitsize_int (0); int nunits = TYPE_VECTOR_SUBPARTS (type); int delta = tree_low_cst (part_width, 1) / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); int i; for (i = 0; i < nunits; i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) { tree result = f (bsi, inner_type, a, b, index, part_width, code); *chain = tree_cons (NULL_TREE, result, NULL_TREE); chain = &TREE_CHAIN (*chain); } return build1 (CONSTRUCTOR, type, head); } /* Expand a vector operation to scalars with the freedom to use a scalar integer type, or to use a different size for the items in the vector type. */ static tree expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type, tree a, tree b, enum tree_code code) { tree result, compute_type; enum machine_mode mode; int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; /* We have three strategies. If the type is already correct, just do the operation an element at a time. Else, if the vector is wider than one word, do it a word at a time; finally, if the vector is smaller than one word, do it as a scalar. */ if (TYPE_MODE (TREE_TYPE (type)) == word_mode) return expand_vector_piecewise (bsi, f, type, TREE_TYPE (type), a, b, code); else if (n_words > 1) { tree word_type = build_word_mode_vector_type (n_words); result = expand_vector_piecewise (bsi, f, word_type, TREE_TYPE (word_type), a, b, code); result = gimplify_val (bsi, word_type, result); } else { /* Use a single scalar operation with a mode no wider than word_mode. */ mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); compute_type = lang_hooks.types.type_for_mode (mode, 1); result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); } return result; } /* Expand a vector operation to scalars; for integer types we can use special bit twiddling tricks to do the sums a word at a time, using function F_PARALLEL instead of F. These tricks are done only if they can process at least four items, that is, only if the vector holds at least four items and if a word can hold four items. */ static tree expand_vector_addition (block_stmt_iterator *bsi, elem_op_func f, elem_op_func f_parallel, tree type, tree a, tree b, enum tree_code code) { int parts_per_word = UNITS_PER_WORD / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); if (INTEGRAL_TYPE_P (TREE_TYPE (type)) && parts_per_word >= 4 && TYPE_VECTOR_SUBPARTS (type) >= 4) return expand_vector_parallel (bsi, f_parallel, type, a, b, code); else return expand_vector_piecewise (bsi, f, type, TREE_TYPE (type), a, b, code); } static tree expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type, tree rhs, enum tree_code code) { enum machine_mode compute_mode = TYPE_MODE (compute_type); /* If the compute mode is not a vector mode (hence we are not decomposing a BLKmode vector to smaller, hardware-supported vectors), we may want to expand the operations in parallel. */ if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT) switch (code) { case PLUS_EXPR: case MINUS_EXPR: if (!TYPE_TRAP_SIGNED (type)) return expand_vector_addition (bsi, do_binop, do_plus_minus, type, TREE_OPERAND (rhs, 0), TREE_OPERAND (rhs, 1), code); break; case NEGATE_EXPR: if (!TYPE_TRAP_SIGNED (type)) return expand_vector_addition (bsi, do_unop, do_negate, type, TREE_OPERAND (rhs, 0), NULL_TREE, code); break; case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: return expand_vector_parallel (bsi, do_binop, type, TREE_OPERAND (rhs, 0), TREE_OPERAND (rhs, 1), code); case BIT_NOT_EXPR: return expand_vector_parallel (bsi, do_unop, type, TREE_OPERAND (rhs, 0), NULL_TREE, code); default: break; } if (TREE_CODE_CLASS (code) == tcc_unary) return expand_vector_piecewise (bsi, do_unop, type, compute_type, TREE_OPERAND (rhs, 0), NULL_TREE, code); else return expand_vector_piecewise (bsi, do_binop, type, compute_type, TREE_OPERAND (rhs, 0), TREE_OPERAND (rhs, 1), code); } /* Return a type for the widest vector mode whose components are of mode INNER_MODE, or NULL_TREE if none is found. */ static tree type_for_widest_vector_mode (enum machine_mode inner_mode, optab op) { enum machine_mode best_mode = VOIDmode, mode; int best_nunits = 0; if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) mode = MIN_MODE_VECTOR_FLOAT; else mode = MIN_MODE_VECTOR_INT; for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) if (GET_MODE_INNER (mode) == inner_mode && GET_MODE_NUNITS (mode) > best_nunits && op->handlers[mode].insn_code != CODE_FOR_nothing) best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); if (best_mode == VOIDmode) return NULL_TREE; else return lang_hooks.types.type_for_mode (best_mode, 1); } /* Process one statement. If we identify a vector operation, expand it. */ static void expand_vector_operations_1 (block_stmt_iterator *bsi) { tree stmt = bsi_stmt (*bsi); tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type; enum tree_code code; enum machine_mode compute_mode; optab op; switch (TREE_CODE (stmt)) { case RETURN_EXPR: stmt = TREE_OPERAND (stmt, 0); if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR) return; /* FALLTHRU */ case MODIFY_EXPR: p_lhs = &TREE_OPERAND (stmt, 0); p_rhs = &TREE_OPERAND (stmt, 1); lhs = *p_lhs; rhs = *p_rhs; break; default: return; } type = TREE_TYPE (rhs); if (TREE_CODE (type) != VECTOR_TYPE) return; code = TREE_CODE (rhs); if (TREE_CODE_CLASS (code) != tcc_unary && TREE_CODE_CLASS (code) != tcc_binary) return; if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR) return; gcc_assert (code != CONVERT_EXPR); op = optab_for_tree_code (code, type); /* Optabs will try converting a negation into a subtraction, so look for it as well. TODO: negation of floating-point vectors might be turned into an exclusive OR toggling the sign bit. */ if (op == NULL && code == NEGATE_EXPR && INTEGRAL_TYPE_P (TREE_TYPE (type))) op = optab_for_tree_code (MINUS_EXPR, type); /* For very wide vectors, try using a smaller vector mode. */ compute_type = type; if (TYPE_MODE (type) == BLKmode && op) { tree vector_compute_type = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op); if (vector_compute_type != NULL_TREE) compute_type = vector_compute_type; } /* If we are breaking a BLKmode vector into smaller pieces, type_for_widest_vector_mode has already looked into the optab, so skip these checks. */ if (compute_type == type) { compute_mode = TYPE_MODE (compute_type); if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT) && op != NULL && op->handlers[compute_mode].insn_code != CODE_FOR_nothing) return; else /* There is no operation in hardware, so fall back to scalars. */ compute_type = TREE_TYPE (type); } rhs = expand_vector_operation (bsi, type, compute_type, rhs, code); if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) *p_rhs = rhs; else { /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will be stored in memory anyway, so prefer NOP_EXPR. We should also try performing the VIEW_CONVERT_EXPR on the left side of the assignment. */ if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode) *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs); else *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs); } mark_stmt_modified (bsi_stmt (*bsi)); } /* Use this to lower vector operations introduced by the vectorizer, if it may need the bit-twiddling tricks implemented in this file. */ static bool gate_expand_vector_operations (void) { return flag_tree_vectorize != 0; } static void expand_vector_operations (void) { block_stmt_iterator bsi; basic_block bb; FOR_EACH_BB (bb) { for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) { expand_vector_operations_1 (&bsi); update_stmt_if_modified (bsi_stmt (bsi)); } } } static void tree_lower_operations (void) { int old_last_basic_block = last_basic_block; block_stmt_iterator bsi; basic_block bb; FOR_EACH_BB (bb) { if (bb->index >= old_last_basic_block) continue; for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) { expand_complex_operations_1 (&bsi); expand_vector_operations_1 (&bsi); } } } struct tree_opt_pass pass_lower_vector_ssa = { "veclower", /* name */ gate_expand_vector_operations, /* gate */ expand_vector_operations, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ 0, /* tv_id */ PROP_cfg, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ | TODO_ggc_collect | TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow, 0 /* letter */ }; struct tree_opt_pass pass_pre_expand = { "oplower", /* name */ 0, /* gate */ tree_lower_operations, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ 0, /* tv_id */ PROP_cfg, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_dump_func | TODO_ggc_collect | TODO_verify_stmts, /* todo_flags_finish */ 0 /* letter */ }; #include "gt-tree-complex.h"