diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 15:13:54 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 15:13:54 +0000 |
commit | 8cff878b277f9af6c2827a87581baac5f768e12a (patch) | |
tree | d4e178503efd243eed24ff3b753cd998370d75d4 /gcc/config/s390/s390.c | |
parent | 9610b14f8599a9db94822d3f0923b58b2f1177dc (diff) | |
download | gcc-8cff878b277f9af6c2827a87581baac5f768e12a.tar.gz |
[./]
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with some of GCC 6, using
svn merge -r222130:226090 ^/trunk ; UNSTABLE}}
[gcc/]
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{ merging with trunk 226090 ; UNSTABLE }}
* melt-run.proto.h: include tree-ssa-scopedtables.h
* tree-ssa-dom.c: skip second record_edge_info
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233272 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/s390/s390.c')
-rw-r--r-- | gcc/config/s390/s390.c | 2511 |
1 files changed, 2084 insertions, 427 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 7d16048db8e..861dfb2bb35 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -23,18 +23,13 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" +#include "backend.h" +#include "cfghooks.h" +#include "tree.h" +#include "gimple.h" #include "rtl.h" -#include "hash-set.h" -#include "machmode.h" -#include "vec.h" -#include "double-int.h" -#include "input.h" +#include "df.h" #include "alias.h" -#include "symtab.h" -#include "wide-int.h" -#include "inchash.h" -#include "tree.h" #include "fold-const.h" #include "print-tree.h" #include "stringpool.h" @@ -43,19 +38,13 @@ along with GCC; see the file COPYING3. If not see #include "calls.h" #include "tm_p.h" #include "regs.h" -#include "hard-reg-set.h" #include "insn-config.h" #include "conditions.h" #include "output.h" #include "insn-attr.h" #include "flags.h" #include "except.h" -#include "function.h" #include "recog.h" -#include "hashtab.h" -#include "statistics.h" -#include "real.h" -#include "fixed-value.h" #include "expmed.h" #include "dojump.h" #include "explow.h" @@ -64,32 +53,20 @@ along with GCC; see the file COPYING3. If not see #include "expr.h" #include "reload.h" #include "diagnostic-core.h" -#include "predict.h" -#include "dominance.h" -#include "cfg.h" #include "cfgrtl.h" #include "cfganal.h" #include "lcm.h" #include "cfgbuild.h" #include "cfgcleanup.h" -#include "basic-block.h" -#include "ggc.h" #include "target.h" -#include "target-def.h" #include "debug.h" #include "langhooks.h" #include "insn-codes.h" #include "optabs.h" -#include "hash-table.h" -#include "tree-ssa-alias.h" #include "internal-fn.h" #include "gimple-fold.h" #include "tree-eh.h" -#include "gimple-expr.h" -#include "is-a.h" -#include "gimple.h" #include "gimplify.h" -#include "df.h" #include "params.h" #include "cfgloop.h" #include "opts.h" @@ -97,6 +74,11 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "builtins.h" #include "rtl-iter.h" +#include "intl.h" +#include "cgraph.h" + +/* This file should be included last. */ +#include "target-def.h" /* Define the specific costs for a given cpu. */ @@ -440,6 +422,7 @@ struct GTY(()) machine_function /* Number of GPRs and FPRs used for argument passing. */ #define GP_ARG_NUM_REG 5 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2) +#define VEC_ARG_NUM_REG 8 /* A couple of shortcuts. */ #define CONST_OK_FOR_J(x) \ @@ -460,6 +443,525 @@ struct GTY(()) machine_function bytes on a z10 (or higher) CPU. */ #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) + +/* Indicate which ABI has been used for passing vector args. + 0 - no vector type arguments have been passed where the ABI is relevant + 1 - the old ABI has been used + 2 - a vector type argument has been passed either in a vector register + or on the stack by value */ +static int s390_vector_abi = 0; + +/* Set the vector ABI marker if TYPE is subject to the vector ABI + switch. The vector ABI affects only vector data types. There are + two aspects of the vector ABI relevant here: + + 1. vectors >= 16 bytes have an alignment of 8 bytes with the new + ABI and natural alignment with the old. + + 2. vector <= 16 bytes are passed in VRs or by value on the stack + with the new ABI but by reference on the stack with the old. + + If ARG_P is true TYPE is used for a function argument or return + value. The ABI marker then is set for all vector data types. If + ARG_P is false only type 1 vectors are being checked. */ + +static void +s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p) +{ + static hash_set<const_tree> visited_types_hash; + + if (s390_vector_abi) + return; + + if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK) + return; + + if (visited_types_hash.contains (type)) + return; + + visited_types_hash.add (type); + + if (VECTOR_TYPE_P (type)) + { + int type_size = int_size_in_bytes (type); + + /* Outside arguments only the alignment is changing and this + only happens for vector types >= 16 bytes. */ + if (!arg_p && type_size < 16) + return; + + /* In arguments vector types > 16 are passed as before (GCC + never enforced the bigger alignment for arguments which was + required by the old vector ABI). However, it might still be + ABI relevant due to the changed alignment if it is a struct + member. */ + if (arg_p && type_size > 16 && !in_struct_p) + return; + + s390_vector_abi = TARGET_VX_ABI ? 2 : 1; + } + else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) + { + /* ARRAY_TYPE: Since with neither of the ABIs we have more than + natural alignment there will never be ABI dependent padding + in an array type. That's why we do not set in_struct_p to + true here. */ + s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p); + } + else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) + { + tree arg_chain; + + /* Check the return type. */ + s390_check_type_for_vector_abi (TREE_TYPE (type), true, false); + + for (arg_chain = TYPE_ARG_TYPES (type); + arg_chain; + arg_chain = TREE_CHAIN (arg_chain)) + s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false); + } + else if (RECORD_OR_UNION_TYPE_P (type)) + { + tree field; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true); + } + } +} + + +/* System z builtins. */ + +#include "s390-builtins.h" + +const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] = + { +#undef B_DEF +#undef OB_DEF +#undef OB_DEF_VAR +#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS, +#define OB_DEF(...) +#define OB_DEF_VAR(...) +#include "s390-builtins.def" + 0 + }; + +const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] = + { +#undef B_DEF +#undef OB_DEF +#undef OB_DEF_VAR +#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS, +#define OB_DEF(...) +#define OB_DEF_VAR(...) +#include "s390-builtins.def" + 0 + }; + +const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = + { +#undef B_DEF +#undef OB_DEF +#undef OB_DEF_VAR +#define B_DEF(...) +#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS, +#define OB_DEF_VAR(...) +#include "s390-builtins.def" + 0 + }; + +const unsigned int +opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = + { +#undef B_DEF +#undef OB_DEF +#undef OB_DEF_VAR +#define B_DEF(...) +#define OB_DEF(...) +#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS, +#include "s390-builtins.def" + 0 + }; + +tree s390_builtin_types[BT_MAX]; +tree s390_builtin_fn_types[BT_FN_MAX]; +tree s390_builtin_decls[S390_BUILTIN_MAX + + S390_OVERLOADED_BUILTIN_MAX + + S390_OVERLOADED_BUILTIN_VAR_MAX]; + +static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = { +#undef B_DEF +#undef OB_DEF +#undef OB_DEF_VAR +#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN, +#define OB_DEF(...) +#define OB_DEF_VAR(...) + +#include "s390-builtins.def" + CODE_FOR_nothing +}; + +static void +s390_init_builtins (void) +{ + /* These definitions are being used in s390-builtins.def. */ + tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), + NULL, NULL); + tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); + tree c_uint64_type_node; + unsigned int bflags_mask = (BFLAGS_MASK_INIT); + + bflags_mask |= (TARGET_VX) ? B_VX : 0; + bflags_mask |= (TARGET_HTM) ? B_HTM : 0; + + /* The uint64_type_node from tree.c is not compatible to the C99 + uint64_t data type. What we want is c_uint64_type_node from + c-common.c. But since backend code is not supposed to interface + with the frontend we recreate it here. */ + if (TARGET_64BIT) + c_uint64_type_node = long_unsigned_type_node; + else + c_uint64_type_node = long_long_unsigned_type_node; + +#undef DEF_TYPE +#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \ + if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + s390_builtin_types[INDEX] = (!CONST_P) ? \ + (NODE) : build_type_variant ((NODE), 1, 0); + +#undef DEF_POINTER_TYPE +#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \ + if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + s390_builtin_types[INDEX] = \ + build_pointer_type (s390_builtin_types[INDEX_BASE]); + +#undef DEF_DISTINCT_TYPE +#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \ + if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + s390_builtin_types[INDEX] = \ + build_distinct_type_copy (s390_builtin_types[INDEX_BASE]); + +#undef DEF_VECTOR_TYPE +#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ + if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + s390_builtin_types[INDEX] = \ + build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); + +#undef DEF_OPAQUE_VECTOR_TYPE +#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ + if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + s390_builtin_types[INDEX] = \ + build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); + +#undef DEF_FN_TYPE +#define DEF_FN_TYPE(INDEX, BFLAGS, args...) \ + if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + s390_builtin_fn_types[INDEX] = \ + build_function_type_list (args, NULL_TREE); +#undef DEF_OV_TYPE +#define DEF_OV_TYPE(...) +#include "s390-builtin-types.def" + +#undef B_DEF +#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \ + if (((BFLAGS) & ~bflags_mask) == 0) \ + s390_builtin_decls[S390_BUILTIN_##NAME] = \ + add_builtin_function ("__builtin_" #NAME, \ + s390_builtin_fn_types[FNTYPE], \ + S390_BUILTIN_##NAME, \ + BUILT_IN_MD, \ + NULL, \ + ATTRS); +#undef OB_DEF +#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ + if (((BFLAGS) & ~bflags_mask) == 0) \ + s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ + add_builtin_function ("__builtin_" #NAME, \ + s390_builtin_fn_types[FNTYPE], \ + S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \ + BUILT_IN_MD, \ + NULL, \ + 0); +#undef OB_DEF_VAR +#define OB_DEF_VAR(...) +#include "s390-builtins.def" + +} + +/* Return true if ARG is appropriate as argument number ARGNUM of + builtin DECL. The operand flags from s390-builtins.def have to + passed as OP_FLAGS. */ +bool +s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl) +{ + if (O_UIMM_P (op_flags)) + { + int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 }; + int bitwidth = bitwidths[op_flags - O_U1]; + + if (!tree_fits_uhwi_p (arg) + || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1) + { + error("constant argument %d for builtin %qF is out of range (0.." + HOST_WIDE_INT_PRINT_UNSIGNED ")", + argnum, decl, + ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1); + return false; + } + } + + if (O_SIMM_P (op_flags)) + { + int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 }; + int bitwidth = bitwidths[op_flags - O_S2]; + + if (!tree_fits_shwi_p (arg) + || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1)) + || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1)) + { + error("constant argument %d for builtin %qF is out of range (" + HOST_WIDE_INT_PRINT_DEC ".." + HOST_WIDE_INT_PRINT_DEC ")", + argnum, decl, + -(HOST_WIDE_INT)1 << (bitwidth - 1), + ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1); + return false; + } + } + return true; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ +#define MAX_ARGS 5 + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + tree arg; + call_expr_arg_iterator iter; + unsigned int all_op_flags = opflags_for_builtin (fcode); + machine_mode last_vec_mode = VOIDmode; + + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, + "s390_expand_builtin, code = %4d, %s\n", + (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); + } + + if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET + && fcode < S390_ALL_BUILTIN_MAX) + { + gcc_unreachable (); + } + else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET) + { + icode = code_for_builtin[fcode]; + /* Set a flag in the machine specific cfun part in order to support + saving/restoring of FPRs. */ + if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry) + cfun->machine->tbegin_p = true; + } + else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET) + { + error ("Unresolved overloaded builtin"); + return const0_rtx; + } + else + internal_error ("bad builtin fcode"); + + if (icode == 0) + internal_error ("bad builtin icode"); + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + if (nonvoid) + { + machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + /* There are builtins (e.g. vec_promote) with no vector + arguments but an element selector. So we have to also look + at the vector return type when emitting the modulo + operation. */ + if (VECTOR_MODE_P (insn_data[icode].operand[0].mode)) + last_vec_mode = insn_data[icode].operand[0].mode; + } + + arity = 0; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + const struct insn_operand_data *insn_op; + unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); + + all_op_flags = all_op_flags >> O_SHIFT; + + if (arg == error_mark_node) + return NULL_RTX; + if (arity >= MAX_ARGS) + return NULL_RTX; + + if (O_IMM_P (op_flags) + && TREE_CODE (arg) != INTEGER_CST) + { + error ("constant value required for builtin %qF argument %d", + fndecl, arity + 1); + return const0_rtx; + } + + if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl)) + return const0_rtx; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + + /* Wrap the expanded RTX for pointer types into a MEM expr with + the proper mode. This allows us to use e.g. (match_operand + "memory_operand"..) in the insn patterns instead of (mem + (match_operand "address_operand)). This is helpful for + patterns not just accepting MEMs. */ + if (POINTER_TYPE_P (TREE_TYPE (arg)) + && insn_op->predicate != address_operand) + op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); + + /* Expand the module operation required on element selectors. */ + if (op_flags == O_ELEM) + { + gcc_assert (last_vec_mode != VOIDmode); + op[arity] = simplify_expand_binop (SImode, code_to_optab (AND), + op[arity], + GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1), + NULL_RTX, 1, OPTAB_DIRECT); + } + + /* Record the vector mode used for an element selector. This assumes: + 1. There is no builtin with two different vector modes and an element selector + 2. The element selector comes after the vector type it is referring to. + This currently the true for all the builtins but FIXME we + should better check for that. */ + if (VECTOR_MODE_P (insn_op->mode)) + last_vec_mode = insn_op->mode; + + if (insn_op->predicate (op[arity], insn_op->mode)) + { + arity++; + continue; + } + + if (MEM_P (op[arity]) + && insn_op->predicate == memory_operand + && (GET_MODE (XEXP (op[arity], 0)) == Pmode + || GET_MODE (XEXP (op[arity], 0)) == VOIDmode)) + { + op[arity] = replace_equiv_address (op[arity], + copy_to_mode_reg (Pmode, + XEXP (op[arity], 0))); + } + else if (GET_MODE (op[arity]) == insn_op->mode + || GET_MODE (op[arity]) == VOIDmode + || (insn_op->predicate == address_operand + && GET_MODE (op[arity]) == Pmode)) + { + /* An address_operand usually has VOIDmode in the expander + so we cannot use this. */ + machine_mode target_mode = + (insn_op->predicate == address_operand + ? Pmode : insn_op->mode); + op[arity] = copy_to_mode_reg (target_mode, op[arity]); + } + + if (!insn_op->predicate (op[arity], insn_op->mode)) + { + error ("Invalid argument %d for builtin %qF", arity + 1, fndecl); + return const0_rtx; + } + arity++; + } + + if (last_vec_mode != VOIDmode && !TARGET_VX) + { + error ("Vector type builtin %qF is not supported without -mvx " + "(default with -march=z13).", + fndecl); + return const0_rtx; + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1]); + else + pat = GEN_FCN (icode) (op[0], op[1]); + break; + case 3: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); + else + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + case 4: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); + else + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + case 5: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); + else + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); + break; + case 6: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]); + else + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + + static const int s390_hotpatch_hw_max = 1000000; static int s390_hotpatch_hw_before_label = 0; static int s390_hotpatch_hw_after_label = 0; @@ -509,9 +1011,43 @@ s390_handle_hotpatch_attribute (tree *node, tree name, tree args, return NULL_TREE; } +/* Expand the s390_vector_bool type attribute. */ + +static tree +s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + tree type = *node, result = NULL_TREE; + machine_mode mode; + + while (POINTER_TYPE_P (type) + || TREE_CODE (type) == FUNCTION_TYPE + || TREE_CODE (type) == METHOD_TYPE + || TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + + mode = TYPE_MODE (type); + switch (mode) + { + case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break; + case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break; + case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break; + case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI]; + default: break; + } + + *no_add_attrs = true; /* No need to hang on to the attribute. */ + + if (result) + *node = lang_hooks.types.reconstruct_complex_type (*node, result); + + return NULL_TREE; +} + static const struct attribute_spec s390_attribute_table[] = { - { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false - }, + { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false }, + { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true }, /* End element. */ { NULL, 0, 0, false, false, false, NULL, false } }; @@ -576,6 +1112,35 @@ s390_scalar_mode_supported_p (machine_mode mode) return default_scalar_mode_supported_p (mode); } +/* Return true if the back end supports vector mode MODE. */ +static bool +s390_vector_mode_supported_p (machine_mode mode) +{ + machine_mode inner; + + if (!VECTOR_MODE_P (mode) + || !TARGET_VX + || GET_MODE_SIZE (mode) > 16) + return false; + + inner = GET_MODE_INNER (mode); + + switch (inner) + { + case QImode: + case HImode: + case SImode: + case DImode: + case TImode: + case SFmode: + case DFmode: + case TFmode: + return true; + default: + return false; + } +} + /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */ void @@ -647,6 +1212,11 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode) case CCT1mode: case CCT2mode: case CCT3mode: + case CCVEQmode: + case CCVHmode: + case CCVHUmode: + case CCVFHmode: + case CCVFHEmode: if (req_mode != set_mode) return 0; break; @@ -747,6 +1317,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool mixed) machine_mode s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1) { + if (TARGET_VX + && register_operand (op0, DFmode) + && register_operand (op1, DFmode)) + { + /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either + s390_emit_compare or s390_canonicalize_comparison will take + care of it. */ + switch (code) + { + case EQ: + case NE: + return CCVEQmode; + case GT: + case UNLE: + return CCVFHmode; + case GE: + case UNLT: + return CCVFHEmode; + default: + ; + } + } + switch (code) { case EQ: @@ -1024,8 +1617,73 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, rtx tem = *op0; *op0 = *op1; *op1 = tem; *code = (int)swap_condition ((enum rtx_code)*code); } + + /* Using the scalar variants of vector instructions for 64 bit FP + comparisons might require swapping the operands. */ + if (TARGET_VX + && register_operand (*op0, DFmode) + && register_operand (*op1, DFmode) + && (*code == LT || *code == LE || *code == UNGT || *code == UNGE)) + { + rtx tmp; + + switch (*code) + { + case LT: *code = GT; break; + case LE: *code = GE; break; + case UNGT: *code = UNLE; break; + case UNGE: *code = UNLT; break; + default: ; + } + tmp = *op0; *op0 = *op1; *op1 = tmp; + } +} + +/* Helper function for s390_emit_compare. If possible emit a 64 bit + FP compare using the single element variant of vector instructions. + Replace CODE with the comparison code to be used in the CC reg + compare and return the condition code register RTX in CC. */ + +static bool +s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2, + rtx *cc) +{ + machine_mode cmp_mode; + bool swap_p = false; + + switch (*code) + { + case EQ: cmp_mode = CCVEQmode; break; + case NE: cmp_mode = CCVEQmode; break; + case GT: cmp_mode = CCVFHmode; break; + case GE: cmp_mode = CCVFHEmode; break; + case UNLE: cmp_mode = CCVFHmode; break; + case UNLT: cmp_mode = CCVFHEmode; break; + case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break; + case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break; + case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break; + case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break; + default: return false; + } + + if (swap_p) + { + rtx tmp = cmp2; + cmp2 = cmp1; + cmp1 = tmp; + } + *cc = gen_rtx_REG (cmp_mode, CC_REGNUM); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (*cc, + gen_rtx_COMPARE (cmp_mode, cmp1, + cmp2)), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_SCRATCH (V2DImode))))); + return true; } + /* Emit a compare instruction suitable to implement the comparison OP0 CODE OP1. Return the correct condition RTL to be placed in the IF_THEN_ELSE of the conditional branch testing the result. */ @@ -1036,10 +1694,18 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) machine_mode mode = s390_select_ccmode (code, op0, op1); rtx cc; - /* Do not output a redundant compare instruction if a compare_and_swap - pattern already computed the result and the machine modes are compatible. */ - if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) + if (TARGET_VX + && register_operand (op0, DFmode) + && register_operand (op1, DFmode) + && s390_expand_vec_compare_scalar (&code, op0, op1, &cc)) + { + /* Work has been done by s390_expand_vec_compare_scalar already. */ + } + else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) { + /* Do not output a redundant compare instruction if a + compare_and_swap pattern already computed the result and the + machine modes are compatible. */ gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode) == GET_MODE (op0)); cc = op0; @@ -1047,7 +1713,7 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) else { cc = gen_rtx_REG (mode, CC_REGNUM); - emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1))); + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1))); } return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx); @@ -1080,7 +1746,7 @@ s390_emit_jump (rtx target, rtx cond) if (cond) target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx); - insn = gen_rtx_SET (VOIDmode, pc_rtx, target); + insn = gen_rtx_SET (pc_rtx, target); return emit_jump_insn (insn); } @@ -1274,6 +1940,93 @@ s390_branch_condition_mask (rtx code) } break; + /* Vector comparison modes. */ + + case CCVEQmode: + switch (GET_CODE (code)) + { + case EQ: return CC0; + case NE: return CC3; + default: return -1; + } + + case CCVEQANYmode: + switch (GET_CODE (code)) + { + case EQ: return CC0 | CC1; + case NE: return CC3 | CC1; + default: return -1; + } + + /* Integer vector compare modes. */ + + case CCVHmode: + switch (GET_CODE (code)) + { + case GT: return CC0; + case LE: return CC3; + default: return -1; + } + + case CCVHANYmode: + switch (GET_CODE (code)) + { + case GT: return CC0 | CC1; + case LE: return CC3 | CC1; + default: return -1; + } + + case CCVHUmode: + switch (GET_CODE (code)) + { + case GTU: return CC0; + case LEU: return CC3; + default: return -1; + } + + case CCVHUANYmode: + switch (GET_CODE (code)) + { + case GTU: return CC0 | CC1; + case LEU: return CC3 | CC1; + default: return -1; + } + + /* FP vector compare modes. */ + + case CCVFHmode: + switch (GET_CODE (code)) + { + case GT: return CC0; + case UNLE: return CC3; + default: return -1; + } + + case CCVFHANYmode: + switch (GET_CODE (code)) + { + case GT: return CC0 | CC1; + case UNLE: return CC3 | CC1; + default: return -1; + } + + case CCVFHEmode: + switch (GET_CODE (code)) + { + case GE: return CC0; + case UNLT: return CC3; + default: return -1; + } + + case CCVFHEANYmode: + switch (GET_CODE (code)) + { + case GE: return CC0 | CC1; + case UNLT: return CC3 | CC1; + default: return -1; + } + + case CCRAWmode: switch (GET_CODE (code)) { @@ -1473,6 +2226,9 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size, /* Calculate a mask for all bits beyond the contiguous bits. */ mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1)); + if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT) + mask &= (HOST_WIDE_INT_1U << size) - 1; + if (mask & in) return false; @@ -1488,6 +2244,101 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size, return true; } +/* Return true if OP contains the same contiguous bitfield in *all* + its elements. START and END can be used to obtain the start and + end position of the bitfield. + + START/STOP give the position of the first/last bit of the bitfield + counting from the lowest order bit starting with zero. In order to + use these values for S/390 instructions this has to be converted to + "bits big endian" style. */ + +bool +s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end) +{ + unsigned HOST_WIDE_INT mask; + int length, size; + + if (!VECTOR_MODE_P (GET_MODE (op)) + || GET_CODE (op) != CONST_VECTOR + || !CONST_INT_P (XVECEXP (op, 0, 0))) + return false; + + if (GET_MODE_NUNITS (GET_MODE (op)) > 1) + { + int i; + + for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) + if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) + return false; + } + + size = GET_MODE_UNIT_BITSIZE (GET_MODE (op)); + mask = UINTVAL (XVECEXP (op, 0, 0)); + if (s390_contiguous_bitmask_p (mask, size, start, + end != NULL ? &length : NULL)) + { + if (end != NULL) + *end = *start + length - 1; + return true; + } + /* 0xff00000f style immediates can be covered by swapping start and + end indices in vgm. */ + if (s390_contiguous_bitmask_p (~mask, size, start, + end != NULL ? &length : NULL)) + { + if (end != NULL) + *end = *start - 1; + if (start != NULL) + *start = *start + length; + return true; + } + return false; +} + +/* Return true if C consists only of byte chunks being either 0 or + 0xff. If MASK is !=NULL a byte mask is generated which is + appropriate for the vector generate byte mask instruction. */ + +bool +s390_bytemask_vector_p (rtx op, unsigned *mask) +{ + int i; + unsigned tmp_mask = 0; + int nunit, unit_size; + + if (!VECTOR_MODE_P (GET_MODE (op)) + || GET_CODE (op) != CONST_VECTOR + || !CONST_INT_P (XVECEXP (op, 0, 0))) + return false; + + nunit = GET_MODE_NUNITS (GET_MODE (op)); + unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op)); + + for (i = 0; i < nunit; i++) + { + unsigned HOST_WIDE_INT c; + int j; + + if (!CONST_INT_P (XVECEXP (op, 0, i))) + return false; + + c = UINTVAL (XVECEXP (op, 0, i)); + for (j = 0; j < unit_size; j++) + { + if ((c & 0xff) != 0 && (c & 0xff) != 0xff) + return false; + tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j); + c = c >> BITS_PER_UNIT; + } + } + + if (mask != NULL) + *mask = tmp_mask; + + return true; +} + /* Check whether a rotate of ROTL followed by an AND of CONTIG is equivalent to a shift followed by the AND. In particular, CONTIG should not overlap the (rotated) bit 0/bit 63 gap. Negative values @@ -1513,8 +2364,8 @@ s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig) bool s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword) { - /* Floating point registers cannot be split. */ - if (FP_REG_P (src) || FP_REG_P (dst)) + /* Floating point and vector registers cannot be split. */ + if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst)) return false; /* We don't need to split if operands are directly accessible. */ @@ -1648,7 +2499,7 @@ s390_expand_logical_operator (enum rtx_code code, machine_mode mode, } /* Emit the instruction. */ - op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2)); + op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2)); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); @@ -1705,16 +2556,20 @@ s390_init_machine_status (void) /* Map for smallest class containing reg regno. */ const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = -{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, - ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, - ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, - ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - FP_REGS, FP_REGS, FP_REGS, FP_REGS, - ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, - ACCESS_REGS, ACCESS_REGS +{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */ + ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */ + ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */ + ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */ + FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */ + FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */ + FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */ + FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */ + ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */ + ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */ + VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */ + VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */ + VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */ + VEC_REGS, VEC_REGS /* 52 */ }; /* Return attribute type of insn. */ @@ -2447,13 +3302,14 @@ s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. - CODE contains GET_CODE (x), OUTER_CODE contains the code - of the superexpression of x. */ + OUTER_CODE contains the code of the superexpression of x. */ static bool -s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, +s390_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, int *total, bool speed ATTRIBUTE_UNUSED) { + int code = GET_CODE (x); switch (code) { case CONST: @@ -2484,7 +3340,7 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, return false; case MULT: - switch (GET_MODE (x)) + switch (mode) { case SImode: { @@ -2543,7 +3399,7 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, return false; case FMA: - switch (GET_MODE (x)) + switch (mode) { case DFmode: *total = s390_cost->madbr; @@ -2557,18 +3413,18 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, /* Negate in the third argument is free: FMSUB. */ if (GET_CODE (XEXP (x, 2)) == NEG) { - *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed) - + rtx_cost (XEXP (x, 1), FMA, 1, speed) - + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed)); + *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed) + + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed) + + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed)); return true; } return false; case UDIV: case UMOD: - if (GET_MODE (x) == TImode) /* 128 bit division */ + if (mode == TImode) /* 128 bit division */ *total = s390_cost->dlgr; - else if (GET_MODE (x) == DImode) + else if (mode == DImode) { rtx right = XEXP (x, 1); if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ @@ -2576,13 +3432,13 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, else /* 64 by 64 bit division */ *total = s390_cost->dlgr; } - else if (GET_MODE (x) == SImode) /* 32 bit division */ + else if (mode == SImode) /* 32 bit division */ *total = s390_cost->dlr; return false; case DIV: case MOD: - if (GET_MODE (x) == DImode) + if (mode == DImode) { rtx right = XEXP (x, 1); if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ @@ -2593,26 +3449,26 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, else /* 64 by 64 bit division */ *total = s390_cost->dsgr; } - else if (GET_MODE (x) == SImode) /* 32 bit division */ + else if (mode == SImode) /* 32 bit division */ *total = s390_cost->dlr; - else if (GET_MODE (x) == SFmode) + else if (mode == SFmode) { *total = s390_cost->debr; } - else if (GET_MODE (x) == DFmode) + else if (mode == DFmode) { *total = s390_cost->ddbr; } - else if (GET_MODE (x) == TFmode) + else if (mode == TFmode) { *total = s390_cost->dxbr; } return false; case SQRT: - if (GET_MODE (x) == SFmode) + if (mode == SFmode) *total = s390_cost->sqebr; - else if (GET_MODE (x) == DFmode) + else if (mode == DFmode) *total = s390_cost->sqdbr; else /* TFmode */ *total = s390_cost->sqxbr; @@ -2775,6 +3631,17 @@ legitimate_pic_operand_p (rtx op) static bool s390_legitimate_constant_p (machine_mode mode, rtx op) { + if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) + { + if (GET_MODE_SIZE (mode) != 16) + return 0; + + if (!const0_operand (op, mode) + && !s390_contiguous_bitmask_vector_p (op, NULL, NULL) + && !s390_bytemask_vector_p (op, NULL)) + return 0; + } + /* Accept all non-symbolic constants. */ if (!SYMBOLIC_CONST (op)) return 1; @@ -2811,6 +3678,7 @@ s390_cannot_force_const_mem (machine_mode mode, rtx x) { case CONST_INT: case CONST_DOUBLE: + case CONST_VECTOR: /* Accept all non-symbolic constants. */ return false; @@ -2943,6 +3811,27 @@ legitimate_reload_fp_constant_p (rtx op) return false; } +/* Returns true if the constant value OP is a legitimate vector operand + during and after reload. + This function accepts all constants which can be loaded directly + into an VR. */ + +static bool +legitimate_reload_vector_constant_p (rtx op) +{ + /* FIXME: Support constant vectors with all the same 16 bit unsigned + operands. These can be loaded with vrepi. */ + + if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16 + && (const0_operand (op, GET_MODE (op)) + || constm1_operand (op, GET_MODE (op)) + || s390_contiguous_bitmask_vector_p (op, NULL, NULL) + || s390_bytemask_vector_p (op, NULL))) + return true; + + return false; +} + /* Given an rtx OP being reloaded into a reg required to be in class RCLASS, return the class of reg to actually use. */ @@ -2953,6 +3842,7 @@ s390_preferred_reload_class (rtx op, reg_class_t rclass) { /* Constants we cannot reload into general registers must be forced into the literal pool. */ + case CONST_VECTOR: case CONST_DOUBLE: case CONST_INT: if (reg_class_subset_p (GENERAL_REGS, rclass) @@ -2964,6 +3854,10 @@ s390_preferred_reload_class (rtx op, reg_class_t rclass) else if (reg_class_subset_p (FP_REGS, rclass) && legitimate_reload_fp_constant_p (op)) return FP_REGS; + else if (reg_class_subset_p (VEC_REGS, rclass) + && legitimate_reload_vector_constant_p (op)) + return VEC_REGS; + return NO_REGS; /* If a symbolic constant or a PLUS is reloaded, @@ -3087,6 +3981,7 @@ s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem) /* Reload might have pulled a constant out of the literal pool. Force it back in. */ if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE + || GET_CODE (mem) == CONST_VECTOR || GET_CODE (mem) == CONST) mem = force_const_mem (GET_MODE (reg), mem); @@ -3126,6 +4021,30 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, if (reg_classes_intersect_p (CC_REGS, rclass)) return GENERAL_REGS; + if (TARGET_VX) + { + /* The vst/vl vector move instructions allow only for short + displacements. */ + if (MEM_P (x) + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1))) + && reg_class_subset_p (rclass, VEC_REGS) + && (!reg_class_subset_p (rclass, FP_REGS) + || (GET_MODE_SIZE (mode) > 8 + && s390_class_max_nregs (FP_REGS, mode) == 1))) + { + if (in_p) + sri->icode = (TARGET_64BIT ? + CODE_FOR_reloaddi_la_in : + CODE_FOR_reloadsi_la_in); + else + sri->icode = (TARGET_64BIT ? + CODE_FOR_reloaddi_la_out : + CODE_FOR_reloadsi_la_out); + } + } + if (TARGET_Z10) { HOST_WIDE_INT offset; @@ -3141,17 +4060,15 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10 : CODE_FOR_reloadsi_larl_odd_addend_z10); - /* On z10 we need a scratch register when moving QI, TI or floating - point mode values from or to a memory location with a SYMBOL_REF - or if the symref addend of a SI or DI move is not aligned to the - width of the access. */ + /* Handle all the (mem (symref)) accesses we cannot use the z10 + instructions for. */ if (MEM_P (x) && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL) - && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode) - || (!TARGET_ZARCH && mode == DImode) - || ((mode == HImode || mode == SImode || mode == DImode) - && (!s390_check_symref_alignment (XEXP (x, 0), - GET_MODE_SIZE (mode)))))) + && (mode == QImode + || !reg_class_subset_p (rclass, GENERAL_REGS) + || GET_MODE_SIZE (mode) > UNITS_PER_WORD + || !s390_check_symref_alignment (XEXP (x, 0), + GET_MODE_SIZE (mode)))) { #define __SECONDARY_RELOAD_CASE(M,m) \ case M##mode: \ @@ -3176,7 +4093,27 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, __SECONDARY_RELOAD_CASE (SD, sd); __SECONDARY_RELOAD_CASE (DD, dd); __SECONDARY_RELOAD_CASE (TD, td); - + __SECONDARY_RELOAD_CASE (V1QI, v1qi); + __SECONDARY_RELOAD_CASE (V2QI, v2qi); + __SECONDARY_RELOAD_CASE (V4QI, v4qi); + __SECONDARY_RELOAD_CASE (V8QI, v8qi); + __SECONDARY_RELOAD_CASE (V16QI, v16qi); + __SECONDARY_RELOAD_CASE (V1HI, v1hi); + __SECONDARY_RELOAD_CASE (V2HI, v2hi); + __SECONDARY_RELOAD_CASE (V4HI, v4hi); + __SECONDARY_RELOAD_CASE (V8HI, v8hi); + __SECONDARY_RELOAD_CASE (V1SI, v1si); + __SECONDARY_RELOAD_CASE (V2SI, v2si); + __SECONDARY_RELOAD_CASE (V4SI, v4si); + __SECONDARY_RELOAD_CASE (V1DI, v1di); + __SECONDARY_RELOAD_CASE (V2DI, v2di); + __SECONDARY_RELOAD_CASE (V1TI, v1ti); + __SECONDARY_RELOAD_CASE (V1SF, v1sf); + __SECONDARY_RELOAD_CASE (V2SF, v2sf); + __SECONDARY_RELOAD_CASE (V4SF, v4sf); + __SECONDARY_RELOAD_CASE (V1DF, v1df); + __SECONDARY_RELOAD_CASE (V2DF, v2df); + __SECONDARY_RELOAD_CASE (V1TF, v1tf); default: gcc_unreachable (); } @@ -3215,12 +4152,12 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, { if (in_p) sri->icode = (TARGET_64BIT ? - CODE_FOR_reloaddi_nonoffmem_in : - CODE_FOR_reloadsi_nonoffmem_in); + CODE_FOR_reloaddi_la_in : + CODE_FOR_reloadsi_la_in); else sri->icode = (TARGET_64BIT ? - CODE_FOR_reloaddi_nonoffmem_out : - CODE_FOR_reloadsi_nonoffmem_out); + CODE_FOR_reloaddi_la_out : + CODE_FOR_reloadsi_la_out); } } @@ -3847,7 +4784,7 @@ legitimize_tls_address (rtx addr, rtx reg) new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD); temp = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (Pmode, temp, new_rtx)); + emit_insn (gen_rtx_SET (temp, new_rtx)); } else { @@ -3864,7 +4801,7 @@ legitimize_tls_address (rtx addr, rtx reg) new_rtx = gen_const_mem (Pmode, new_rtx); new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD); temp = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (Pmode, temp, new_rtx)); + emit_insn (gen_rtx_SET (temp, new_rtx)); } new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); @@ -4426,7 +5363,7 @@ s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len) temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx); - temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); + temp = gen_rtx_SET (pc_rtx, temp); emit_jump_insn (temp); s390_load_address (addr0, @@ -4454,6 +5391,138 @@ s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len) return true; } +/* Emit a conditional jump to LABEL for condition code mask MASK using + comparsion operator COMPARISON. Return the emitted jump insn. */ + +static rtx +s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label) +{ + rtx temp; + + gcc_assert (comparison == EQ || comparison == NE); + gcc_assert (mask > 0 && mask < 15); + + temp = gen_rtx_fmt_ee (comparison, VOIDmode, + gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask)); + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, + gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); + temp = gen_rtx_SET (pc_rtx, temp); + return emit_jump_insn (temp); +} + +/* Emit the instructions to implement strlen of STRING and store the + result in TARGET. The string has the known ALIGNMENT. This + version uses vector instructions and is therefore not appropriate + for targets prior to z13. */ + +void +s390_expand_vec_strlen (rtx target, rtx string, rtx alignment) +{ + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + int very_likely = REG_BR_PROB_BASE - 1; + rtx highest_index_to_load_reg = gen_reg_rtx (Pmode); + rtx str_reg = gen_reg_rtx (V16QImode); + rtx str_addr_base_reg = gen_reg_rtx (Pmode); + rtx str_idx_reg = gen_reg_rtx (Pmode); + rtx result_reg = gen_reg_rtx (V16QImode); + rtx is_aligned_label = gen_label_rtx (); + rtx into_loop_label = NULL_RTX; + rtx loop_start_label = gen_label_rtx (); + rtx temp; + rtx len = gen_reg_rtx (QImode); + rtx cond; + + s390_load_address (str_addr_base_reg, XEXP (string, 0)); + emit_move_insn (str_idx_reg, const0_rtx); + + if (INTVAL (alignment) < 16) + { + /* Check whether the address happens to be aligned properly so + jump directly to the aligned loop. */ + emit_cmp_and_jump_insns (gen_rtx_AND (Pmode, + str_addr_base_reg, GEN_INT (15)), + const0_rtx, EQ, NULL_RTX, + Pmode, 1, is_aligned_label); + + temp = gen_reg_rtx (Pmode); + temp = expand_binop (Pmode, and_optab, str_addr_base_reg, + GEN_INT (15), temp, 1, OPTAB_DIRECT); + gcc_assert (REG_P (temp)); + highest_index_to_load_reg = + expand_binop (Pmode, sub_optab, GEN_INT (15), temp, + highest_index_to_load_reg, 1, OPTAB_DIRECT); + gcc_assert (REG_P (highest_index_to_load_reg)); + emit_insn (gen_vllv16qi (str_reg, + convert_to_mode (SImode, highest_index_to_load_reg, 1), + gen_rtx_MEM (BLKmode, str_addr_base_reg))); + + into_loop_label = gen_label_rtx (); + s390_emit_jump (into_loop_label, NULL_RTX); + emit_barrier (); + } + + emit_label (is_aligned_label); + LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1; + + /* Reaching this point we are only performing 16 bytes aligned + loads. */ + emit_move_insn (highest_index_to_load_reg, GEN_INT (15)); + + emit_label (loop_start_label); + LABEL_NUSES (loop_start_label) = 1; + + /* Load 16 bytes of the string into VR. */ + emit_move_insn (str_reg, + gen_rtx_MEM (V16QImode, + gen_rtx_PLUS (Pmode, str_idx_reg, + str_addr_base_reg))); + if (into_loop_label != NULL_RTX) + { + emit_label (into_loop_label); + LABEL_NUSES (into_loop_label) = 1; + } + + /* Increment string index by 16 bytes. */ + expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16), + str_idx_reg, 1, OPTAB_DIRECT); + + emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg, + GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); + + add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label), + REG_BR_PROB, very_likely); + emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7))); + + /* If the string pointer wasn't aligned we have loaded less then 16 + bytes and the remaining bytes got filled with zeros (by vll). + Now we have to check whether the resulting index lies within the + bytes actually part of the string. */ + + cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1), + highest_index_to_load_reg); + s390_load_address (highest_index_to_load_reg, + gen_rtx_PLUS (Pmode, highest_index_to_load_reg, + const1_rtx)); + if (TARGET_64BIT) + emit_insn (gen_movdicc (str_idx_reg, cond, + highest_index_to_load_reg, str_idx_reg)); + else + emit_insn (gen_movsicc (str_idx_reg, cond, + highest_index_to_load_reg, str_idx_reg)); + + add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB, + very_unlikely); + + expand_binop (Pmode, add_optab, str_idx_reg, + GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT); + /* FIXME: len is already zero extended - so avoid the llgcr emitted + here. */ + temp = expand_binop (Pmode, add_optab, str_idx_reg, + convert_to_mode (Pmode, len, 1), + target, 1, OPTAB_DIRECT); + if (temp != target) + emit_move_insn (target, temp); +} /* Expand conditional increment or decrement using alc/slb instructions. Should generate code setting DST to either SRC or SRC + INCREMENT, @@ -4536,7 +5605,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1, if (!register_operand (cmp_op0, cmp_mode)) cmp_op0 = force_reg (cmp_mode, cmp_op0); - insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM), + insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1)); /* We use insn_invalid_p here to add clobbers if required. */ ret = insn_invalid_p (emit_insn (insn), false); @@ -4558,7 +5627,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1, p = rtvec_alloc (2); RTVEC_ELT (p, 0) = - gen_rtx_SET (VOIDmode, dst, op_res); + gen_rtx_SET (dst, op_res); RTVEC_ELT (p, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); @@ -4608,7 +5677,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1, if (!register_operand (cmp_op0, cmp_mode)) cmp_op0 = force_reg (cmp_mode, cmp_op0); - insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM), + insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1)); /* We use insn_invalid_p here to add clobbers if required. */ ret = insn_invalid_p (emit_insn (insn), false); @@ -4625,7 +5694,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1, const0_rtx)); p = rtvec_alloc (2); RTVEC_ELT (p, 0) = - gen_rtx_SET (VOIDmode, dst, op_res); + gen_rtx_SET (dst, op_res); RTVEC_ELT (p, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); @@ -4753,7 +5822,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) { op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest)); - op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src)); + op = gen_rtx_SET (op, gen_lowpart (smode, src)); clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber))); return true; @@ -4781,7 +5850,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) } op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2), - op = gen_rtx_SET (VOIDmode, op, src); + op = gen_rtx_SET (op, src); if (!TARGET_ZEC12) { @@ -4808,6 +5877,302 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) NULL_RTX, 1, OPTAB_DIRECT); } +/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store + the result in TARGET. */ + +void +s390_expand_vec_compare (rtx target, enum rtx_code cond, + rtx cmp_op1, rtx cmp_op2) +{ + machine_mode mode = GET_MODE (target); + bool neg_p = false, swap_p = false; + rtx tmp; + + if (GET_MODE (cmp_op1) == V2DFmode) + { + switch (cond) + { + /* NE a != b -> !(a == b) */ + case NE: cond = EQ; neg_p = true; break; + /* UNGT a u> b -> !(b >= a) */ + case UNGT: cond = GE; neg_p = true; swap_p = true; break; + /* UNGE a u>= b -> !(b > a) */ + case UNGE: cond = GT; neg_p = true; swap_p = true; break; + /* LE: a <= b -> b >= a */ + case LE: cond = GE; swap_p = true; break; + /* UNLE: a u<= b -> !(a > b) */ + case UNLE: cond = GT; neg_p = true; break; + /* LT: a < b -> b > a */ + case LT: cond = GT; swap_p = true; break; + /* UNLT: a u< b -> !(a >= b) */ + case UNLT: cond = GE; neg_p = true; break; + case UNEQ: + emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2)); + return; + case LTGT: + emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2)); + return; + case ORDERED: + emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2)); + return; + case UNORDERED: + emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2)); + return; + default: break; + } + } + else + { + switch (cond) + { + /* NE: a != b -> !(a == b) */ + case NE: cond = EQ; neg_p = true; break; + /* GE: a >= b -> !(b > a) */ + case GE: cond = GT; neg_p = true; swap_p = true; break; + /* GEU: a >= b -> !(b > a) */ + case GEU: cond = GTU; neg_p = true; swap_p = true; break; + /* LE: a <= b -> !(a > b) */ + case LE: cond = GT; neg_p = true; break; + /* LEU: a <= b -> !(a > b) */ + case LEU: cond = GTU; neg_p = true; break; + /* LT: a < b -> b > a */ + case LT: cond = GT; swap_p = true; break; + /* LTU: a < b -> b > a */ + case LTU: cond = GTU; swap_p = true; break; + default: break; + } + } + + if (swap_p) + { + tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp; + } + + emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond, + mode, + cmp_op1, cmp_op2))); + if (neg_p) + emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target))); +} + +/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into + TARGET if either all (ALL_P is true) or any (ALL_P is false) of the + elements in CMP1 and CMP2 fulfill the comparison. */ +void +s390_expand_vec_compare_cc (rtx target, enum rtx_code code, + rtx cmp1, rtx cmp2, bool all_p) +{ + enum rtx_code new_code = code; + machine_mode cmp_mode, full_cmp_mode, scratch_mode; + rtx tmp_reg = gen_reg_rtx (SImode); + bool swap_p = false; + + if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT) + { + switch (code) + { + case EQ: cmp_mode = CCVEQmode; break; + case NE: cmp_mode = CCVEQmode; break; + case GT: cmp_mode = CCVHmode; break; + case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break; + case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break; + case LE: cmp_mode = CCVHmode; new_code = LE; break; + case GTU: cmp_mode = CCVHUmode; break; + case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break; + case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break; + case LEU: cmp_mode = CCVHUmode; new_code = LEU; break; + default: gcc_unreachable (); + } + scratch_mode = GET_MODE (cmp1); + } + else if (GET_MODE (cmp1) == V2DFmode) + { + switch (code) + { + case EQ: cmp_mode = CCVEQmode; break; + case NE: cmp_mode = CCVEQmode; break; + case GT: cmp_mode = CCVFHmode; break; + case GE: cmp_mode = CCVFHEmode; break; + case UNLE: cmp_mode = CCVFHmode; break; + case UNLT: cmp_mode = CCVFHEmode; break; + case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break; + case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break; + default: gcc_unreachable (); + } + scratch_mode = V2DImode; + } + else + gcc_unreachable (); + + if (!all_p) + switch (cmp_mode) + { + case CCVEQmode: full_cmp_mode = CCVEQANYmode; break; + case CCVHmode: full_cmp_mode = CCVHANYmode; break; + case CCVHUmode: full_cmp_mode = CCVHUANYmode; break; + case CCVFHmode: full_cmp_mode = CCVFHANYmode; break; + case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break; + default: gcc_unreachable (); + } + else + /* The modes without ANY match the ALL modes. */ + full_cmp_mode = cmp_mode; + + if (swap_p) + { + rtx tmp = cmp2; + cmp2 = cmp1; + cmp1 = tmp; + } + + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, gen_rtx_SET ( + gen_rtx_REG (cmp_mode, CC_REGNUM), + gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_SCRATCH (scratch_mode))))); + emit_move_insn (target, const0_rtx); + emit_move_insn (tmp_reg, const1_rtx); + + emit_move_insn (target, + gen_rtx_IF_THEN_ELSE (SImode, + gen_rtx_fmt_ee (new_code, VOIDmode, + gen_rtx_REG (full_cmp_mode, CC_REGNUM), + const0_rtx), + target, tmp_reg)); +} + +/* Generate a vector comparison expression loading either elements of + THEN or ELS into TARGET depending on the comparison COND of CMP_OP1 + and CMP_OP2. */ + +void +s390_expand_vcond (rtx target, rtx then, rtx els, + enum rtx_code cond, rtx cmp_op1, rtx cmp_op2) +{ + rtx tmp; + machine_mode result_mode; + rtx result_target; + + /* We always use an integral type vector to hold the comparison + result. */ + result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1); + result_target = gen_reg_rtx (result_mode); + + /* Alternatively this could be done by reload by lowering the cmp* + predicates. But it appears to be better for scheduling etc. to + have that in early. */ + if (!REG_P (cmp_op1)) + cmp_op1 = force_reg (GET_MODE (target), cmp_op1); + + if (!REG_P (cmp_op2)) + cmp_op2 = force_reg (GET_MODE (target), cmp_op2); + + s390_expand_vec_compare (result_target, cond, + cmp_op1, cmp_op2); + + /* If the results are supposed to be either -1 or 0 we are done + since this is what our compare instructions generate anyway. */ + if (constm1_operand (then, GET_MODE (then)) + && const0_operand (els, GET_MODE (els))) + { + emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target), + result_target, 0)); + return; + } + + /* Otherwise we will do a vsel afterwards. */ + /* This gets triggered e.g. + with gcc.c-torture/compile/pr53410-1.c */ + if (!REG_P (then)) + then = force_reg (GET_MODE (target), then); + + if (!REG_P (els)) + els = force_reg (GET_MODE (target), els); + + tmp = gen_rtx_fmt_ee (EQ, VOIDmode, + result_target, + CONST0_RTX (result_mode)); + + /* We compared the result against zero above so we have to swap then + and els here. */ + tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then); + + gcc_assert (GET_MODE (target) == GET_MODE (then)); + emit_insn (gen_rtx_SET (target, tmp)); +} + +/* Emit the RTX necessary to initialize the vector TARGET with values + in VALS. */ +void +s390_expand_vec_init (rtx target, rtx vals) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + bool all_same = true, all_regs = true, all_const_int = true; + rtx x; + int i; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + + if (!CONST_INT_P (x)) + all_const_int = false; + + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + + if (!REG_P (x)) + all_regs = false; + } + + /* Use vector gen mask or vector gen byte mask if possible. */ + if (all_same && all_const_int + && (XVECEXP (vals, 0, 0) == const0_rtx + || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0), + NULL, NULL) + || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL))) + { + emit_insn (gen_rtx_SET (target, + gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)))); + return; + } + + if (all_same) + { + emit_insn (gen_rtx_SET (target, + gen_rtx_VEC_DUPLICATE (mode, + XVECEXP (vals, 0, 0)))); + return; + } + + if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode) + { + /* Use vector load pair. */ + emit_insn (gen_rtx_SET (target, + gen_rtx_VEC_CONCAT (mode, + XVECEXP (vals, 0, 0), + XVECEXP (vals, 0, 1)))); + return; + } + + /* We are about to set the vector elements one by one. Zero out the + full register first in order to help the data flow framework to + detect it as full VR set. */ + emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); + + /* Unfortunately the vec_init expander is not allowed to fail. So + we have to implement the fallback ourselves. */ + for (i = 0; i < n_elts; i++) + emit_insn (gen_rtx_SET (target, + gen_rtx_UNSPEC (mode, + gen_rtvec (3, XVECEXP (vals, 0, i), + GEN_INT (i), target), + UNSPEC_VEC_SET))); +} + /* Structure to hold the initial parameters for a compare_and_swap operation in HImode and QImode. */ @@ -5103,12 +6468,37 @@ s390_output_dwarf_dtprel (FILE *file, int size, rtx x) fputs ("@DTPOFF", file); } +/* Return the proper mode for REGNO being represented in the dwarf + unwind table. */ +machine_mode +s390_dwarf_frame_reg_mode (int regno) +{ + machine_mode save_mode = default_dwarf_frame_reg_mode (regno); + + /* The rightmost 64 bits of vector registers are call-clobbered. */ + if (GET_MODE_SIZE (save_mode) > 8) + save_mode = DImode; + + return save_mode; +} + #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING /* Implement TARGET_MANGLE_TYPE. */ static const char * s390_mangle_type (const_tree type) { + type = TYPE_MAIN_VARIANT (type); + + if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE + && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) + return NULL; + + if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc"; + if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools"; + if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli"; + if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll"; + if (TYPE_MAIN_VARIANT (type) == long_double_type_node && TARGET_LONG_DOUBLE_128) return "g"; @@ -5429,24 +6819,26 @@ print_operand_address (FILE *file, rtx addr) 'J': print tls_load/tls_gdcall/tls_ldcall suffix 'M': print the second word of a TImode operand. 'N': print the second word of a DImode operand. - 'O': print only the displacement of a memory reference. - 'R': print only the base register of a memory reference. + 'O': print only the displacement of a memory reference or address. + 'R': print only the base register of a memory reference or address. 'S': print S-type memory reference (base+displacement). 'Y': print shift count operand. 'b': print integer X as if it's an unsigned byte. 'c': print integer X as if it's an signed byte. - 'e': "end" of DImode contiguous bitmask X. - 'f': "end" of SImode contiguous bitmask X. + 'e': "end" contiguous bitmask X in either DImode or vector inner mode. + 'f': "end" contiguous bitmask X in SImode. 'h': print integer X as if it's a signed halfword. 'i': print the first nonzero HImode part of X. 'j': print the first HImode part unequal to -1 of X. 'k': print the first nonzero SImode part of X. 'm': print the first SImode part unequal to -1 of X. 'o': print integer X as if it's an unsigned 32bit word. - 's': "start" of DImode contiguous bitmask X. - 't': "start" of SImode contiguous bitmask X. + 's': "start" of contiguous bitmask X in either DImode or vector inner mode. + 't': CONST_INT: "start" of contiguous bitmask X in SImode. + CONST_VECTOR: Generate a bitmask for vgbm instruction. 'x': print integer X as if it's an unsigned halfword. + 'v': print register number as vector register (v1 instead of f1). */ void @@ -5505,14 +6897,7 @@ print_operand (FILE *file, rtx x, int code) struct s390_address ad; int ret; - if (!MEM_P (x)) - { - output_operand_lossage ("memory reference expected for " - "'O' output modifier"); - return; - } - - ret = s390_decompose_address (XEXP (x, 0), &ad); + ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); if (!ret || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) @@ -5534,14 +6919,7 @@ print_operand (FILE *file, rtx x, int code) struct s390_address ad; int ret; - if (!MEM_P (x)) - { - output_operand_lossage ("memory reference expected for " - "'R' output modifier"); - return; - } - - ret = s390_decompose_address (XEXP (x, 0), &ad); + ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); if (!ret || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) @@ -5619,7 +6997,17 @@ print_operand (FILE *file, rtx x, int code) switch (GET_CODE (x)) { case REG: - fprintf (file, "%s", reg_names[REGNO (x)]); + /* Print FP regs as fx instead of vx when they are accessed + through non-vector mode. */ + if (code == 'v' + || VECTOR_NOFP_REG_P (x) + || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x))) + || (VECTOR_REG_P (x) + && (GET_MODE_SIZE (GET_MODE (x)) / + s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8)) + fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2); + else + fprintf (file, "%s", reg_names[REGNO (x)]); break; case MEM: @@ -5706,6 +7094,39 @@ print_operand (FILE *file, rtx x, int code) code); } break; + case CONST_VECTOR: + switch (code) + { + case 'e': + case 's': + { + int start, stop, inner_len; + bool ok; + + inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x)); + ok = s390_contiguous_bitmask_vector_p (x, &start, &stop); + gcc_assert (ok); + if (code == 's' || code == 't') + ival = inner_len - stop - 1; + else + ival = inner_len - start - 1; + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); + } + break; + case 't': + { + unsigned mask; + bool ok = s390_bytemask_vector_p (x, &mask); + gcc_assert (ok); + fprintf (file, "%u", mask); + } + break; + + default: + output_operand_lossage ("invalid constant vector for output " + "modifier '%c'", code); + } + break; default: if (code == 0) @@ -5853,7 +7274,8 @@ s390_adjust_priority (rtx_insn *insn, int priority) && s390_tune != PROCESSOR_2094_Z9_109 && s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196 - && s390_tune != PROCESSOR_2827_ZEC12) + && s390_tune != PROCESSOR_2827_ZEC12 + && s390_tune != PROCESSOR_2964_Z13) return priority; switch (s390_safe_attr_type (insn)) @@ -5885,8 +7307,12 @@ s390_issue_rate (void) case PROCESSOR_2817_Z196: return 3; case PROCESSOR_2097_Z10: - case PROCESSOR_2827_ZEC12: return 2; + /* Starting with EC12 we use the sched_reorder hook to take care + of instruction dispatch constraints. The algorithm only + picks the best instruction and assumes only a single + instruction gets issued per cycle. */ + case PROCESSOR_2827_ZEC12: default: return 1; } @@ -6057,7 +7483,8 @@ s390_split_branches (void) { new_literal = 1; rtx mem = force_const_mem (Pmode, *label); - rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, mem), insn); + rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem), + insn); INSN_ADDRESSES_NEW (set_insn, -1); annotate_constant_pool_refs (&PATTERN (set_insn)); @@ -6070,7 +7497,8 @@ s390_split_branches (void) UNSPEC_LTREL_OFFSET); target = gen_rtx_CONST (Pmode, target); target = force_const_mem (Pmode, target); - rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn); + rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target), + insn); INSN_ADDRESSES_NEW (set_insn, -1); annotate_constant_pool_refs (&PATTERN (set_insn)); @@ -6258,14 +7686,19 @@ replace_ltrel_base (rtx *x) /* We keep a list of constants which we have to add to internal constant tables in the middle of large functions. */ -#define NR_C_MODES 11 +#define NR_C_MODES 31 machine_mode constant_modes[NR_C_MODES] = { TFmode, TImode, TDmode, + V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode, DFmode, DImode, DDmode, + V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, SFmode, SImode, SDmode, + V4QImode, V2HImode, V1SImode, V1SFmode, HImode, - QImode + V2QImode, V1HImode, + QImode, + V1QImode }; struct constant @@ -7280,6 +8713,23 @@ s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align) mark_symbol_refs_as_used (exp); break; + case MODE_VECTOR_INT: + case MODE_VECTOR_FLOAT: + { + int i; + machine_mode inner_mode; + gcc_assert (GET_CODE (exp) == CONST_VECTOR); + + inner_mode = GET_MODE_INNER (GET_MODE (exp)); + for (i = 0; i < XVECLEN (exp, 0); i++) + s390_output_pool_entry (XVECEXP (exp, 0, i), + inner_mode, + i == 0 + ? align + : GET_MODE_BITSIZE (inner_mode)); + } + break; + default: gcc_unreachable (); } @@ -7894,7 +9344,10 @@ s390_init_frame_layout (void) HOST_WIDE_INT frame_size; int base_used; - gcc_assert (!reload_completed); + /* After LRA the frame layout is supposed to be read-only and should + not be re-computed. */ + if (reload_completed) + return; /* On S/390 machines, we may need to perform branch splitting, which will require both base and return address register. We have no @@ -8091,9 +9544,25 @@ s390_optimize_nonescaping_tx (void) bool s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode) { + if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno)) + return false; + switch (REGNO_REG_CLASS (regno)) { + case VEC_REGS: + return ((GET_MODE_CLASS (mode) == MODE_INT + && s390_class_max_nregs (VEC_REGS, mode) == 1) + || mode == DFmode + || s390_vector_mode_supported_p (mode)); + break; case FP_REGS: + if (TARGET_VX + && ((GET_MODE_CLASS (mode) == MODE_INT + && s390_class_max_nregs (FP_REGS, mode) == 1) + || mode == DFmode + || s390_vector_mode_supported_p (mode))) + return true; + if (REGNO_PAIR_OK (regno, mode)) { if (mode == SImode || mode == DImode) @@ -8180,19 +9649,86 @@ s390_hard_regno_scratch_ok (unsigned int regno) int s390_class_max_nregs (enum reg_class rclass, machine_mode mode) { + int reg_size; + bool reg_pair_required_p = false; + switch (rclass) { case FP_REGS: + case VEC_REGS: + reg_size = TARGET_VX ? 16 : 8; + + /* TF and TD modes would fit into a VR but we put them into a + register pair since we do not have 128bit FP instructions on + full VRs. */ + if (TARGET_VX + && SCALAR_FLOAT_MODE_P (mode) + && GET_MODE_SIZE (mode) >= 16) + reg_pair_required_p = true; + + /* Even if complex types would fit into a single FPR/VR we force + them into a register pair to deal with the parts more easily. + (FIXME: What about complex ints?) */ if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) - return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8); - else - return (GET_MODE_SIZE (mode) + 8 - 1) / 8; + reg_pair_required_p = true; + break; case ACCESS_REGS: - return (GET_MODE_SIZE (mode) + 4 - 1) / 4; + reg_size = 4; + break; default: + reg_size = UNITS_PER_WORD; break; } - return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + if (reg_pair_required_p) + return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size); + + return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; +} + +/* Return TRUE if changing mode from FROM to TO should not be allowed + for register class CLASS. */ + +int +s390_cannot_change_mode_class (machine_mode from_mode, + machine_mode to_mode, + enum reg_class rclass) +{ + machine_mode small_mode; + machine_mode big_mode; + + if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode)) + return 0; + + if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode)) + { + small_mode = from_mode; + big_mode = to_mode; + } + else + { + small_mode = to_mode; + big_mode = from_mode; + } + + /* Values residing in VRs are little-endian style. All modes are + placed left-aligned in an VR. This means that we cannot allow + switching between modes with differing sizes. Also if the vector + facility is available we still place TFmode values in VR register + pairs, since the only instructions we have operating on TFmodes + only deal with register pairs. Therefore we have to allow DFmode + subregs of TFmodes to enable the TFmode splitters. */ + if (reg_classes_intersect_p (VEC_REGS, rclass) + && (GET_MODE_SIZE (small_mode) < 8 + || s390_class_max_nregs (VEC_REGS, big_mode) == 1)) + return 1; + + /* Likewise for access registers, since they have only half the + word size on 64-bit. */ + if (reg_classes_intersect_p (ACCESS_REGS, rclass)) + return 1; + + return 0; } /* Return true if we use LRA instead of reload pass. */ @@ -8844,7 +10380,7 @@ s390_emit_prologue (void) if (DISP_IN_RANGE (INTVAL (frame_off))) { - insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + insn = gen_rtx_SET (stack_pointer_rtx, gen_rtx_PLUS (Pmode, stack_pointer_rtx, frame_off)); insn = emit_insn (insn); @@ -8861,7 +10397,7 @@ s390_emit_prologue (void) RTX_FRAME_RELATED_P (insn) = 1; real_frame_off = GEN_INT (-cfun_frame_layout.frame_size); add_reg_note (insn, REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_SET (stack_pointer_rtx, gen_rtx_PLUS (Pmode, stack_pointer_rtx, real_frame_off))); @@ -8915,8 +10451,7 @@ s390_emit_prologue (void) offset += 8; RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, - gen_rtx_MEM (DFmode, addr), + gen_rtx_SET (gen_rtx_MEM (DFmode, addr), gen_rtx_REG (DFmode, i))); } } @@ -9006,11 +10541,11 @@ s390_emit_epilogue (bool sibcall) offset = area_bottom < 0 ? -area_bottom : 0; frame_off = GEN_INT (cfun_frame_layout.frame_size - offset); - cfa = gen_rtx_SET (VOIDmode, frame_pointer, + cfa = gen_rtx_SET (frame_pointer, gen_rtx_PLUS (Pmode, frame_pointer, frame_off)); if (DISP_IN_RANGE (INTVAL (frame_off))) { - insn = gen_rtx_SET (VOIDmode, frame_pointer, + insn = gen_rtx_SET (frame_pointer, gen_rtx_PLUS (Pmode, frame_pointer, frame_off)); insn = emit_insn (insn); } @@ -9224,6 +10759,23 @@ s390_can_use_return_insn (void) return cfun_frame_layout.frame_size == 0; } +/* The VX ABI differs for vararg functions. Therefore we need the + prototype of the callee to be available when passing vector type + values. */ +static const char * +s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) +{ + return ((TARGET_VX_ABI + && typelist == 0 + && VECTOR_TYPE_P (TREE_TYPE (val)) + && (funcdecl == NULL_TREE + || (TREE_CODE (funcdecl) == FUNCTION_DECL + && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) + ? N_("Vector argument passed to unprototyped function") + : NULL); +} + + /* Return the size in bytes of a function argument of type TYPE and/or mode MODE. At least one of TYPE or MODE must be specified. */ @@ -9243,13 +10795,61 @@ s390_function_arg_size (machine_mode mode, const_tree type) } /* Return true if a function argument of type TYPE and mode MODE + is to be passed in a vector register, if available. */ + +bool +s390_function_arg_vector (machine_mode mode, const_tree type) +{ + if (!TARGET_VX_ABI) + return false; + + if (s390_function_arg_size (mode, type) > 16) + return false; + + /* No type info available for some library calls ... */ + if (!type) + return VECTOR_MODE_P (mode); + + /* The ABI says that record types with a single member are treated + just like that member would be. */ + while (TREE_CODE (type) == RECORD_TYPE) + { + tree field, single = NULL_TREE; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + if (single == NULL_TREE) + single = TREE_TYPE (field); + else + return false; + } + + if (single == NULL_TREE) + return false; + else + { + /* If the field declaration adds extra byte due to + e.g. padding this is not accepted as vector type. */ + if (int_size_in_bytes (single) <= 0 + || int_size_in_bytes (single) != int_size_in_bytes (type)) + return false; + type = single; + } + } + + return VECTOR_TYPE_P (type); +} + +/* Return true if a function argument of type TYPE and mode MODE is to be passed in a floating-point register, if available. */ static bool s390_function_arg_float (machine_mode mode, const_tree type) { - int size = s390_function_arg_size (mode, type); - if (size > 8) + if (s390_function_arg_size (mode, type) > 8) return false; /* Soft-float changes the ABI: no floating-point registers are used. */ @@ -9332,20 +10932,24 @@ s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) { int size = s390_function_arg_size (mode, type); + + if (s390_function_arg_vector (mode, type)) + return false; + if (size > 8) return true; if (type) { if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0) - return 1; + return true; if (TREE_CODE (type) == COMPLEX_TYPE || TREE_CODE (type) == VECTOR_TYPE) - return 1; + return true; } - return 0; + return false; } /* Update the data in CUM to advance over an argument of mode MODE and @@ -9356,11 +10960,21 @@ s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, static void s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, - const_tree type, bool named ATTRIBUTE_UNUSED) + const_tree type, bool named) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - if (s390_function_arg_float (mode, type)) + if (s390_function_arg_vector (mode, type)) + { + /* We are called for unnamed vector stdarg arguments which are + passed on the stack. In this case this hook does not have to + do anything since stack arguments are tracked by common + code. */ + if (!named) + return; + cum->vrs += 1; + } + else if (s390_function_arg_float (mode, type)) { cum->fprs += 1; } @@ -9394,14 +11008,26 @@ s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, static rtx s390_function_arg (cumulative_args_t cum_v, machine_mode mode, - const_tree type, bool named ATTRIBUTE_UNUSED) + const_tree type, bool named) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - if (s390_function_arg_float (mode, type)) + if (!named) + s390_check_type_for_vector_abi (type, true, false); + + if (s390_function_arg_vector (mode, type)) + { + /* Vector arguments being part of the ellipsis are passed on the + stack. */ + if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG)) + return NULL_RTX; + + return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO); + } + else if (s390_function_arg_float (mode, type)) { if (cum->fprs + 1 > FP_ARG_NUM_REG) - return 0; + return NULL_RTX; else return gen_rtx_REG (mode, cum->fprs + 16); } @@ -9411,7 +11037,7 @@ s390_function_arg (cumulative_args_t cum_v, machine_mode mode, int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; if (cum->gprs + n_gprs > GP_ARG_NUM_REG) - return 0; + return NULL_RTX; else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG) return gen_rtx_REG (mode, cum->gprs + 2); else if (n_gprs == 2) @@ -9454,11 +11080,17 @@ s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED) || TREE_CODE (type) == REAL_TYPE) return int_size_in_bytes (type) > 8; + /* vector types which fit into a VR. */ + if (TARGET_VX_ABI + && VECTOR_TYPE_P (type) + && int_size_in_bytes (type) <= 16) + return false; + /* Aggregates and similar constructs are always returned in memory. */ if (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE - || TREE_CODE (type) == VECTOR_TYPE) + || VECTOR_TYPE_P (type)) return true; /* ??? We get called on all sorts of random stuff from @@ -9496,6 +11128,12 @@ s390_function_and_libcall_value (machine_mode mode, const_tree fntype_or_decl, bool outgoing ATTRIBUTE_UNUSED) { + /* For vector return types it is important to use the RET_TYPE + argument whenever available since the middle-end might have + changed the mode to a scalar mode. */ + bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type)) + || (!ret_type && VECTOR_MODE_P (mode))); + /* For normal functions perform the promotion as promote_function_mode would do. */ if (ret_type) @@ -9505,10 +11143,14 @@ s390_function_and_libcall_value (machine_mode mode, fntype_or_decl, 1); } - gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode)); - gcc_assert (GET_MODE_SIZE (mode) <= 8); + gcc_assert (GET_MODE_CLASS (mode) == MODE_INT + || SCALAR_FLOAT_MODE_P (mode) + || (TARGET_VX_ABI && vector_ret_type_p)); + gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8)); - if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) + if (TARGET_VX_ABI && vector_ret_type_p) + return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO); + else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) return gen_rtx_REG (mode, 16); else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG || UNITS_PER_LONG == UNITS_PER_WORD) @@ -9672,9 +11314,13 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } - /* Find the overflow area. */ + /* Find the overflow area. + FIXME: This currently is too pessimistic when the vector ABI is + enabled. In that case we *always* set up the overflow area + pointer. */ if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG - || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG) + || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG + || TARGET_VX_ABI) { t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); @@ -9716,6 +11362,9 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) ret = args.reg_save_area[args.gpr+8] else ret = *args.overflow_arg_area++; + } else if (vector value) { + ret = *args.overflow_arg_area; + args.overflow_arg_area += size / 8; } else if (float value) { if (args.fgpr < 2) ret = args.reg_save_area[args.fpr+64] @@ -9735,14 +11384,16 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, tree f_gpr, f_fpr, f_ovf, f_sav; tree gpr, fpr, ovf, sav, reg, t, u; int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg; - tree lab_false, lab_over, addr; + tree lab_false, lab_over; + tree addr = create_tmp_var (ptr_type_node, "addr"); + bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within + a stack slot. */ f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); f_fpr = DECL_CHAIN (f_gpr); f_ovf = DECL_CHAIN (f_fpr); f_sav = DECL_CHAIN (f_ovf); - valist = build_va_arg_indirect_ref (valist); gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); @@ -9754,6 +11405,8 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, size = int_size_in_bytes (type); + s390_check_type_for_vector_abi (type, true, false); + if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) { if (TARGET_DEBUG_ARG) @@ -9774,6 +11427,23 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, sav_scale = UNITS_PER_LONG; size = UNITS_PER_LONG; max_reg = GP_ARG_NUM_REG - n_reg; + left_align_p = false; + } + else if (s390_function_arg_vector (TYPE_MODE (type), type)) + { + if (TARGET_DEBUG_ARG) + { + fprintf (stderr, "va_arg: vector type"); + debug_tree (type); + } + + indirect_p = 0; + reg = NULL_TREE; + n_reg = 0; + sav_ofs = 0; + sav_scale = 8; + max_reg = 0; + left_align_p = true; } else if (s390_function_arg_float (TYPE_MODE (type), type)) { @@ -9790,6 +11460,7 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, sav_ofs = 16 * UNITS_PER_LONG; sav_scale = 8; max_reg = FP_ARG_NUM_REG - n_reg; + left_align_p = false; } else { @@ -9814,53 +11485,74 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, sav_scale = UNITS_PER_LONG; max_reg = GP_ARG_NUM_REG - n_reg; + left_align_p = false; } /* Pull the value out of the saved registers ... */ - lab_false = create_artificial_label (UNKNOWN_LOCATION); - lab_over = create_artificial_label (UNKNOWN_LOCATION); - addr = create_tmp_var (ptr_type_node, "addr"); + if (reg != NULL_TREE) + { + /* + if (reg > ((typeof (reg))max_reg)) + goto lab_false; - t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); - t = build2 (GT_EXPR, boolean_type_node, reg, t); - u = build1 (GOTO_EXPR, void_type_node, lab_false); - t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); - gimplify_and_add (t, pre_p); + addr = sav + sav_ofs + reg * save_scale; - t = fold_build_pointer_plus_hwi (sav, sav_ofs); - u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, - fold_convert (TREE_TYPE (reg), size_int (sav_scale))); - t = fold_build_pointer_plus (t, u); + goto lab_over; - gimplify_assign (addr, t, pre_p); + lab_false: + */ + + lab_false = create_artificial_label (UNKNOWN_LOCATION); + lab_over = create_artificial_label (UNKNOWN_LOCATION); + + t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); + t = build2 (GT_EXPR, boolean_type_node, reg, t); + u = build1 (GOTO_EXPR, void_type_node, lab_false); + t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); + gimplify_and_add (t, pre_p); + + t = fold_build_pointer_plus_hwi (sav, sav_ofs); + u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, + fold_convert (TREE_TYPE (reg), size_int (sav_scale))); + t = fold_build_pointer_plus (t, u); - gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); + gimplify_assign (addr, t, pre_p); - gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); + gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); + gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); + } /* ... Otherwise out of the overflow area. */ t = ovf; - if (size < UNITS_PER_LONG) + if (size < UNITS_PER_LONG && !left_align_p) t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size); gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); gimplify_assign (addr, t, pre_p); - t = fold_build_pointer_plus_hwi (t, size); + if (size < UNITS_PER_LONG && left_align_p) + t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG); + else + t = fold_build_pointer_plus_hwi (t, size); + gimplify_assign (ovf, t, pre_p); - gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); + if (reg != NULL_TREE) + gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); /* Increment register save count. */ - u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, - fold_convert (TREE_TYPE (reg), size_int (n_reg))); - gimplify_and_add (u, pre_p); + if (n_reg > 0) + { + u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, + fold_convert (TREE_TYPE (reg), size_int (n_reg))); + gimplify_and_add (u, pre_p); + } if (indirect_p) { @@ -9943,241 +11635,6 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p) } } -/* Builtins. */ - -enum s390_builtin -{ - S390_BUILTIN_TBEGIN, - S390_BUILTIN_TBEGIN_NOFLOAT, - S390_BUILTIN_TBEGIN_RETRY, - S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, - S390_BUILTIN_TBEGINC, - S390_BUILTIN_TEND, - S390_BUILTIN_TABORT, - S390_BUILTIN_NON_TX_STORE, - S390_BUILTIN_TX_NESTING_DEPTH, - S390_BUILTIN_TX_ASSIST, - - S390_BUILTIN_S390_SFPC, - S390_BUILTIN_S390_EFPC, - - S390_BUILTIN_MAX -}; - -tree s390_builtin_decls[S390_BUILTIN_MAX]; - -static enum insn_code const code_for_builtin[S390_BUILTIN_MAX] = { - CODE_FOR_tbegin, - CODE_FOR_tbegin_nofloat, - CODE_FOR_tbegin_retry, - CODE_FOR_tbegin_retry_nofloat, - CODE_FOR_tbeginc, - CODE_FOR_tend, - CODE_FOR_tabort, - CODE_FOR_ntstg, - CODE_FOR_etnd, - CODE_FOR_tx_assist, - - CODE_FOR_s390_sfpc, - CODE_FOR_s390_efpc -}; - -static void -s390_init_builtins (void) -{ - tree ftype, uint64_type; - tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), - NULL, NULL); - tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); - - /* void foo (void) */ - ftype = build_function_type_list (void_type_node, NULL_TREE); - s390_builtin_decls[S390_BUILTIN_TBEGINC] = - add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC, - BUILT_IN_MD, NULL, NULL_TREE); - - /* void foo (int) */ - ftype = build_function_type_list (void_type_node, integer_type_node, - NULL_TREE); - s390_builtin_decls[S390_BUILTIN_TABORT] = - add_builtin_function ("__builtin_tabort", ftype, - S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, - noreturn_attr); - s390_builtin_decls[S390_BUILTIN_TX_ASSIST] = - add_builtin_function ("__builtin_tx_assist", ftype, - S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE); - - /* void foo (unsigned) */ - ftype = build_function_type_list (void_type_node, unsigned_type_node, - NULL_TREE); - s390_builtin_decls[S390_BUILTIN_S390_SFPC] = - add_builtin_function ("__builtin_s390_sfpc", ftype, - S390_BUILTIN_S390_SFPC, BUILT_IN_MD, NULL, NULL_TREE); - - /* int foo (void *) */ - ftype = build_function_type_list (integer_type_node, ptr_type_node, - NULL_TREE); - s390_builtin_decls[S390_BUILTIN_TBEGIN] = - add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN, - BUILT_IN_MD, NULL, returns_twice_attr); - s390_builtin_decls[S390_BUILTIN_TBEGIN_NOFLOAT] = - add_builtin_function ("__builtin_tbegin_nofloat", ftype, - S390_BUILTIN_TBEGIN_NOFLOAT, - BUILT_IN_MD, NULL, returns_twice_attr); - - /* int foo (void *, int) */ - ftype = build_function_type_list (integer_type_node, ptr_type_node, - integer_type_node, NULL_TREE); - s390_builtin_decls[S390_BUILTIN_TBEGIN_RETRY] = - add_builtin_function ("__builtin_tbegin_retry", ftype, - S390_BUILTIN_TBEGIN_RETRY, - BUILT_IN_MD, - NULL, returns_twice_attr); - s390_builtin_decls[S390_BUILTIN_TBEGIN_RETRY_NOFLOAT] = - add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype, - S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, - BUILT_IN_MD, - NULL, returns_twice_attr); - - /* int foo (void) */ - ftype = build_function_type_list (integer_type_node, NULL_TREE); - s390_builtin_decls[S390_BUILTIN_TX_NESTING_DEPTH] = - add_builtin_function ("__builtin_tx_nesting_depth", ftype, - S390_BUILTIN_TX_NESTING_DEPTH, - BUILT_IN_MD, NULL, NULL_TREE); - s390_builtin_decls[S390_BUILTIN_TEND] = - add_builtin_function ("__builtin_tend", ftype, - S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE); - - /* unsigned foo (void) */ - ftype = build_function_type_list (unsigned_type_node, NULL_TREE); - s390_builtin_decls[S390_BUILTIN_S390_EFPC] = - add_builtin_function ("__builtin_s390_efpc", ftype, - S390_BUILTIN_S390_EFPC, BUILT_IN_MD, NULL, NULL_TREE); - - /* void foo (uint64_t *, uint64_t) */ - if (TARGET_64BIT) - uint64_type = long_unsigned_type_node; - else - uint64_type = long_long_unsigned_type_node; - - ftype = build_function_type_list (void_type_node, - build_pointer_type (uint64_type), - uint64_type, NULL_TREE); - s390_builtin_decls[S390_BUILTIN_NON_TX_STORE] = - add_builtin_function ("__builtin_non_tx_store", ftype, - S390_BUILTIN_NON_TX_STORE, - BUILT_IN_MD, NULL, NULL_TREE); -} - -/* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient - (and in mode MODE if that's convenient). - SUBTARGET may be used as the target for computing one of EXP's operands. - IGNORE is nonzero if the value is to be ignored. */ - -static rtx -s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, - machine_mode mode ATTRIBUTE_UNUSED, - int ignore ATTRIBUTE_UNUSED) -{ -#define MAX_ARGS 2 - - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - unsigned int fcode = DECL_FUNCTION_CODE (fndecl); - enum insn_code icode; - rtx op[MAX_ARGS], pat; - int arity; - bool nonvoid; - tree arg; - call_expr_arg_iterator iter; - - if (fcode >= S390_BUILTIN_MAX) - internal_error ("bad builtin fcode"); - icode = code_for_builtin[fcode]; - if (icode == 0) - internal_error ("bad builtin fcode"); - - if (!TARGET_HTM && fcode <= S390_BUILTIN_TX_ASSIST) - error ("Transactional execution builtins not enabled (-mhtm)\n"); - - /* Set a flag in the machine specific cfun part in order to support - saving/restoring of FPRs. */ - if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY) - cfun->machine->tbegin_p = true; - - nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; - - arity = 0; - FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) - { - const struct insn_operand_data *insn_op; - - if (arg == error_mark_node) - return NULL_RTX; - if (arity >= MAX_ARGS) - return NULL_RTX; - - insn_op = &insn_data[icode].operand[arity + nonvoid]; - - op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); - - if (!(*insn_op->predicate) (op[arity], insn_op->mode)) - { - if (insn_op->predicate == memory_operand) - { - /* Don't move a NULL pointer into a register. Otherwise - we have to rely on combine being able to move it back - in order to get an immediate 0 in the instruction. */ - if (op[arity] != const0_rtx) - op[arity] = copy_to_mode_reg (Pmode, op[arity]); - op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); - } - else - op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); - } - - arity++; - } - - if (nonvoid) - { - machine_mode tmode = insn_data[icode].operand[0].mode; - if (!target - || GET_MODE (target) != tmode - || !(*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - } - - switch (arity) - { - case 0: - pat = GEN_FCN (icode) (target); - break; - case 1: - if (nonvoid) - pat = GEN_FCN (icode) (target, op[0]); - else - pat = GEN_FCN (icode) (op[0]); - break; - case 2: - if (nonvoid) - pat = GEN_FCN (icode) (target, op[0], op[1]); - else - pat = GEN_FCN (icode) (op[0], op[1]); - break; - default: - gcc_unreachable (); - } - if (!pat) - return NULL_RTX; - emit_insn (pat); - - if (nonvoid) - return target; - else - return const0_rtx; -} /* Return the decl for the target specific builtin with the function code FCODE. */ @@ -10661,15 +12118,18 @@ s390_call_saved_register_used (tree call_expr) mode = TYPE_MODE (type); gcc_assert (mode); + /* We assume that in the target function all parameters are + named. This only has an impact on vector argument register + usage none of which is call-saved. */ if (pass_by_reference (&cum_v, mode, type, true)) { mode = Pmode; type = build_pointer_type (type); } - parm_rtx = s390_function_arg (cum, mode, type, 0); + parm_rtx = s390_function_arg (cum, mode, type, true); - s390_function_arg_advance (cum, mode, type, 0); + s390_function_arg_advance (cum, mode, type, true); if (!parm_rtx) continue; @@ -10813,7 +12273,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); if (result_reg != NULL_RTX) - call = gen_rtx_SET (VOIDmode, result_reg, call); + call = gen_rtx_SET (result_reg, call); if (retaddr_reg != NULL_RTX) { @@ -10876,6 +12336,13 @@ s390_conditional_register_usage (void) for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) call_used_regs[i] = fixed_regs[i] = 1; } + + /* Disable v16 - v31 for non-vector target. */ + if (!TARGET_VX) + { + for (i = VR16_REGNUM; i <= VR31_REGNUM; i++) + fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; + } } /* Corresponding function to eh_return expander. */ @@ -11155,7 +12622,7 @@ s390_fix_long_loop_prediction (rtx_insn *insn) new_label = gen_label_rtx (); uncond_jump = emit_jump_insn_after ( - gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, code_label)), insn); emit_label_after (new_label, uncond_jump); @@ -11453,7 +12920,8 @@ s390_reorg (void) /* Walk over the insns and do some >=z10 specific changes. */ if (s390_tune == PROCESSOR_2097_Z10 || s390_tune == PROCESSOR_2817_Z196 - || s390_tune == PROCESSOR_2827_ZEC12) + || s390_tune == PROCESSOR_2827_ZEC12 + || s390_tune == PROCESSOR_2964_Z13) { rtx_insn *insn; bool insn_added_p = false; @@ -11488,31 +12956,37 @@ s390_reorg (void) /* Insert NOPs for hotpatching. */ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG) - break; - } - gcc_assert (insn); - /* Output a series of NOPs after the NOTE_INSN_FUNCTION_BEG. */ - while (hw_after > 0) + /* Emit NOPs + 1. inside the area covered by debug information to allow setting + breakpoints at the NOPs, + 2. before any insn which results in an asm instruction, + 3. before in-function labels to avoid jumping to the NOPs, for + example as part of a loop, + 4. before any barrier in case the function is completely empty + (__builtin_unreachable ()) and has neither internal labels nor + active insns. + */ + if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn)) + break; + /* Output a series of NOPs before the first active insn. */ + while (insn && hw_after > 0) { if (hw_after >= 3 && TARGET_CPU_ZARCH) { - insn = emit_insn_after (gen_nop_6_byte (), insn); + emit_insn_before (gen_nop_6_byte (), insn); hw_after -= 3; } else if (hw_after >= 2) { - insn = emit_insn_after (gen_nop_4_byte (), insn); + emit_insn_before (gen_nop_4_byte (), insn); hw_after -= 2; } else { - insn = emit_insn_after (gen_nop_2_byte (), insn); + emit_insn_before (gen_nop_2_byte (), insn); hw_after -= 1; } } - gcc_assert (hw_after == 0); } } @@ -11702,7 +13176,8 @@ s390_sched_reorder (FILE *file, int verbose, if (reload_completed && *nreadyp > 1) s390_z10_prevent_earlyload_conflicts (ready, nreadyp); - if (s390_tune == PROCESSOR_2827_ZEC12 + if ((s390_tune == PROCESSOR_2827_ZEC12 + || s390_tune == PROCESSOR_2964_Z13) && reload_completed && *nreadyp > 1) { @@ -11785,7 +13260,8 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) { last_scheduled_insn = insn; - if (s390_tune == PROCESSOR_2827_ZEC12 + if ((s390_tune == PROCESSOR_2827_ZEC12 + || s390_tune == PROCESSOR_2964_Z13) && reload_completed && recog_memoized (insn) >= 0) { @@ -11865,7 +13341,8 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) if (s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196 - && s390_tune != PROCESSOR_2827_ZEC12) + && s390_tune != PROCESSOR_2827_ZEC12 + && s390_tune != PROCESSOR_2964_Z13) return nunroll; /* Count the number of memory references within the loop body. */ @@ -11981,6 +13458,8 @@ s390_option_override (void) } /* Sanity checks. */ + if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE) + gcc_unreachable (); if (TARGET_ZARCH && !TARGET_CPU_ZARCH) error ("z/Architecture mode not supported on %s", s390_arch_string); if (TARGET_64BIT && !TARGET_ZARCH) @@ -11996,6 +13475,22 @@ s390_option_override (void) if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) target_flags |= MASK_OPT_HTM; + if (target_flags_explicit & MASK_OPT_VX) + { + if (TARGET_OPT_VX) + { + if (!TARGET_CPU_VX) + error ("hardware vector support not available on %s", + s390_arch_string); + if (TARGET_SOFT_FLOAT) + error ("hardware vector support not available with -msoft-float"); + } + } + else if (TARGET_CPU_VX) + /* Enable vector support if available and not explicitly disabled + by user. E.g. with -m31 -march=z13 -mzarch */ + target_flags |= MASK_OPT_VX; + if (TARGET_HARD_DFP && !TARGET_DFP) { if (target_flags_explicit & MASK_HARD_DFP) @@ -12035,6 +13530,7 @@ s390_option_override (void) s390_cost = &z196_cost; break; case PROCESSOR_2827_ZEC12: + case PROCESSOR_2964_Z13: s390_cost = &zEC12_cost; break; default: @@ -12062,7 +13558,8 @@ s390_option_override (void) if (s390_tune == PROCESSOR_2097_Z10 || s390_tune == PROCESSOR_2817_Z196 - || s390_tune == PROCESSOR_2827_ZEC12) + || s390_tune == PROCESSOR_2827_ZEC12 + || s390_tune == PROCESSOR_2964_Z13) { maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100, global_options.x_param_values, @@ -12155,8 +13652,8 @@ s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, static void s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) { - tree sfpc = s390_builtin_decls[S390_BUILTIN_S390_SFPC]; - tree efpc = s390_builtin_decls[S390_BUILTIN_S390_EFPC]; + tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc]; + tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc]; tree call_efpc = build_call_expr (efpc, 0); tree fenv_var = create_tmp_var (unsigned_type_node); @@ -12224,6 +13721,140 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef FPC_DXC_SHIFT } +/* Return the vector mode to be used for inner mode MODE when doing + vectorization. */ +static machine_mode +s390_preferred_simd_mode (machine_mode mode) +{ + if (TARGET_VX) + switch (mode) + { + case DFmode: + return V2DFmode; + case DImode: + return V2DImode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + default:; + } + return word_mode; +} + +/* Our hardware does not require vectors to be strictly aligned. */ +static bool +s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED, + int misalignment ATTRIBUTE_UNUSED, + bool is_packed ATTRIBUTE_UNUSED) +{ + if (TARGET_VX) + return true; + + return default_builtin_support_vector_misalignment (mode, type, misalignment, + is_packed); +} + +/* The vector ABI requires vector types to be aligned on an 8 byte + boundary (our stack alignment). However, we allow this to be + overriden by the user, while this definitely breaks the ABI. */ +static HOST_WIDE_INT +s390_vector_alignment (const_tree type) +{ + if (!TARGET_VX_ABI) + return default_vector_alignment (type); + + if (TYPE_USER_ALIGN (type)) + return TYPE_ALIGN (type); + + return MIN (64, tree_to_shwi (TYPE_SIZE (type))); +} + +/* Implement TARGET_ASM_FILE_END. */ +static void +s390_asm_file_end (void) +{ +#ifdef HAVE_AS_GNU_ATTRIBUTE + varpool_node *vnode; + cgraph_node *cnode; + + FOR_EACH_VARIABLE (vnode) + if (TREE_PUBLIC (vnode->decl)) + s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false); + + FOR_EACH_FUNCTION (cnode) + if (TREE_PUBLIC (cnode->decl)) + s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false); + + + if (s390_vector_abi != 0) + fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", + s390_vector_abi); +#endif + file_end_indicate_exec_stack (); +} + +/* Return true if TYPE is a vector bool type. */ +static inline bool +s390_vector_bool_type_p (const_tree type) +{ + return TYPE_VECTOR_OPAQUE (type); +} + +/* Return the diagnostic message string if the binary operation OP is + not permitted on TYPE1 and TYPE2, NULL otherwise. */ +static const char* +s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) +{ + bool bool1_p, bool2_p; + bool plusminus_p; + bool muldiv_p; + bool compare_p; + machine_mode mode1, mode2; + + if (!TARGET_ZVECTOR) + return NULL; + + if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2)) + return NULL; + + bool1_p = s390_vector_bool_type_p (type1); + bool2_p = s390_vector_bool_type_p (type2); + + /* Mixing signed and unsigned types is forbidden for all + operators. */ + if (!bool1_p && !bool2_p + && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) + return N_("types differ in signess"); + + plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR); + muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR + || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR + || op == ROUND_DIV_EXPR); + compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR + || op == EQ_EXPR || op == NE_EXPR); + + if (bool1_p && bool2_p && (plusminus_p || muldiv_p)) + return N_("binary operator does not support two vector bool operands"); + + if (bool1_p != bool2_p && (muldiv_p || compare_p)) + return N_("binary operator does not support vector bool operand"); + + mode1 = TYPE_MODE (type1); + mode2 = TYPE_MODE (type2); + + if (bool1_p != bool2_p && plusminus_p + && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT + || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT)) + return N_("binary operator does not support mixing vector " + "bool with floating point vector operands"); + + return NULL; +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -12332,6 +13963,8 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #define TARGET_FUNCTION_VALUE s390_function_value #undef TARGET_LIBCALL_VALUE #define TARGET_LIBCALL_VALUE s390_libcall_value +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true #undef TARGET_KEEP_LEAF_WHEN_PROFILED #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled @@ -12350,6 +13983,9 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel #endif +#undef TARGET_DWARF_FRAME_REG_MODE +#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode + #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE s390_mangle_type @@ -12358,6 +13994,9 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef TARGET_SCALAR_MODE_SUPPORTED_P #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p + #undef TARGET_PREFERRED_RELOAD_CLASS #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class @@ -12418,6 +14057,24 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv +#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN +#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn + +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode + +#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT +#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment + +#undef TARGET_VECTOR_ALIGNMENT +#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment + +#undef TARGET_INVALID_BINARY_OP +#define TARGET_INVALID_BINARY_OP s390_invalid_binary_op + +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END s390_asm_file_end + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" |