summaryrefslogtreecommitdiff
path: root/gcc/config/s390/s390.c
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 15:13:54 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 15:13:54 +0000
commit8cff878b277f9af6c2827a87581baac5f768e12a (patch)
treed4e178503efd243eed24ff3b753cd998370d75d4 /gcc/config/s390/s390.c
parent9610b14f8599a9db94822d3f0923b58b2f1177dc (diff)
downloadgcc-8cff878b277f9af6c2827a87581baac5f768e12a.tar.gz
[./]
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net> {{merging with some of GCC 6, using svn merge -r222130:226090 ^/trunk ; UNSTABLE}} [gcc/] 2016-02-10 Basile Starynkevitch <basile@starynkevitch.net> {{ merging with trunk 226090 ; UNSTABLE }} * melt-run.proto.h: include tree-ssa-scopedtables.h * tree-ssa-dom.c: skip second record_edge_info git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233272 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/s390/s390.c')
-rw-r--r--gcc/config/s390/s390.c2511
1 files changed, 2084 insertions, 427 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 7d16048db8e..861dfb2bb35 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -23,18 +23,13 @@ along with GCC; see the file COPYING3. If not see
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
+#include "cfghooks.h"
+#include "tree.h"
+#include "gimple.h"
#include "rtl.h"
-#include "hash-set.h"
-#include "machmode.h"
-#include "vec.h"
-#include "double-int.h"
-#include "input.h"
+#include "df.h"
#include "alias.h"
-#include "symtab.h"
-#include "wide-int.h"
-#include "inchash.h"
-#include "tree.h"
#include "fold-const.h"
#include "print-tree.h"
#include "stringpool.h"
@@ -43,19 +38,13 @@ along with GCC; see the file COPYING3. If not see
#include "calls.h"
#include "tm_p.h"
#include "regs.h"
-#include "hard-reg-set.h"
#include "insn-config.h"
#include "conditions.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "except.h"
-#include "function.h"
#include "recog.h"
-#include "hashtab.h"
-#include "statistics.h"
-#include "real.h"
-#include "fixed-value.h"
#include "expmed.h"
#include "dojump.h"
#include "explow.h"
@@ -64,32 +53,20 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h"
#include "reload.h"
#include "diagnostic-core.h"
-#include "predict.h"
-#include "dominance.h"
-#include "cfg.h"
#include "cfgrtl.h"
#include "cfganal.h"
#include "lcm.h"
#include "cfgbuild.h"
#include "cfgcleanup.h"
-#include "basic-block.h"
-#include "ggc.h"
#include "target.h"
-#include "target-def.h"
#include "debug.h"
#include "langhooks.h"
#include "insn-codes.h"
#include "optabs.h"
-#include "hash-table.h"
-#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-fold.h"
#include "tree-eh.h"
-#include "gimple-expr.h"
-#include "is-a.h"
-#include "gimple.h"
#include "gimplify.h"
-#include "df.h"
#include "params.h"
#include "cfgloop.h"
#include "opts.h"
@@ -97,6 +74,11 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "intl.h"
+#include "cgraph.h"
+
+/* This file should be included last. */
+#include "target-def.h"
/* Define the specific costs for a given cpu. */
@@ -440,6 +422,7 @@ struct GTY(()) machine_function
/* Number of GPRs and FPRs used for argument passing. */
#define GP_ARG_NUM_REG 5
#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
+#define VEC_ARG_NUM_REG 8
/* A couple of shortcuts. */
#define CONST_OK_FOR_J(x) \
@@ -460,6 +443,525 @@ struct GTY(()) machine_function
bytes on a z10 (or higher) CPU. */
#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
+
+/* Indicate which ABI has been used for passing vector args.
+ 0 - no vector type arguments have been passed where the ABI is relevant
+ 1 - the old ABI has been used
+ 2 - a vector type argument has been passed either in a vector register
+ or on the stack by value */
+static int s390_vector_abi = 0;
+
+/* Set the vector ABI marker if TYPE is subject to the vector ABI
+ switch. The vector ABI affects only vector data types. There are
+ two aspects of the vector ABI relevant here:
+
+ 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
+ ABI and natural alignment with the old.
+
+ 2. vector <= 16 bytes are passed in VRs or by value on the stack
+ with the new ABI but by reference on the stack with the old.
+
+ If ARG_P is true TYPE is used for a function argument or return
+ value. The ABI marker then is set for all vector data types. If
+ ARG_P is false only type 1 vectors are being checked. */
+
+static void
+s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
+{
+ static hash_set<const_tree> visited_types_hash;
+
+ if (s390_vector_abi)
+ return;
+
+ if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
+ return;
+
+ if (visited_types_hash.contains (type))
+ return;
+
+ visited_types_hash.add (type);
+
+ if (VECTOR_TYPE_P (type))
+ {
+ int type_size = int_size_in_bytes (type);
+
+ /* Outside arguments only the alignment is changing and this
+ only happens for vector types >= 16 bytes. */
+ if (!arg_p && type_size < 16)
+ return;
+
+ /* In arguments vector types > 16 are passed as before (GCC
+ never enforced the bigger alignment for arguments which was
+ required by the old vector ABI). However, it might still be
+ ABI relevant due to the changed alignment if it is a struct
+ member. */
+ if (arg_p && type_size > 16 && !in_struct_p)
+ return;
+
+ s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
+ }
+ else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
+ {
+ /* ARRAY_TYPE: Since with neither of the ABIs we have more than
+ natural alignment there will never be ABI dependent padding
+ in an array type. That's why we do not set in_struct_p to
+ true here. */
+ s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
+ }
+ else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
+ {
+ tree arg_chain;
+
+ /* Check the return type. */
+ s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
+
+ for (arg_chain = TYPE_ARG_TYPES (type);
+ arg_chain;
+ arg_chain = TREE_CHAIN (arg_chain))
+ s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
+ }
+ else if (RECORD_OR_UNION_TYPE_P (type))
+ {
+ tree field;
+
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
+ }
+ }
+}
+
+
+/* System z builtins. */
+
+#include "s390-builtins.h"
+
+const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
+ {
+#undef B_DEF
+#undef OB_DEF
+#undef OB_DEF_VAR
+#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
+#define OB_DEF(...)
+#define OB_DEF_VAR(...)
+#include "s390-builtins.def"
+ 0
+ };
+
+const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
+ {
+#undef B_DEF
+#undef OB_DEF
+#undef OB_DEF_VAR
+#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
+#define OB_DEF(...)
+#define OB_DEF_VAR(...)
+#include "s390-builtins.def"
+ 0
+ };
+
+const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
+ {
+#undef B_DEF
+#undef OB_DEF
+#undef OB_DEF_VAR
+#define B_DEF(...)
+#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
+#define OB_DEF_VAR(...)
+#include "s390-builtins.def"
+ 0
+ };
+
+const unsigned int
+opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
+ {
+#undef B_DEF
+#undef OB_DEF
+#undef OB_DEF_VAR
+#define B_DEF(...)
+#define OB_DEF(...)
+#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
+#include "s390-builtins.def"
+ 0
+ };
+
+tree s390_builtin_types[BT_MAX];
+tree s390_builtin_fn_types[BT_FN_MAX];
+tree s390_builtin_decls[S390_BUILTIN_MAX +
+ S390_OVERLOADED_BUILTIN_MAX +
+ S390_OVERLOADED_BUILTIN_VAR_MAX];
+
+static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
+#undef B_DEF
+#undef OB_DEF
+#undef OB_DEF_VAR
+#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
+#define OB_DEF(...)
+#define OB_DEF_VAR(...)
+
+#include "s390-builtins.def"
+ CODE_FOR_nothing
+};
+
+static void
+s390_init_builtins (void)
+{
+ /* These definitions are being used in s390-builtins.def. */
+ tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
+ NULL, NULL);
+ tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
+ tree c_uint64_type_node;
+ unsigned int bflags_mask = (BFLAGS_MASK_INIT);
+
+ bflags_mask |= (TARGET_VX) ? B_VX : 0;
+ bflags_mask |= (TARGET_HTM) ? B_HTM : 0;
+
+ /* The uint64_type_node from tree.c is not compatible to the C99
+ uint64_t data type. What we want is c_uint64_type_node from
+ c-common.c. But since backend code is not supposed to interface
+ with the frontend we recreate it here. */
+ if (TARGET_64BIT)
+ c_uint64_type_node = long_unsigned_type_node;
+ else
+ c_uint64_type_node = long_long_unsigned_type_node;
+
+#undef DEF_TYPE
+#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
+ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
+ s390_builtin_types[INDEX] = (!CONST_P) ? \
+ (NODE) : build_type_variant ((NODE), 1, 0);
+
+#undef DEF_POINTER_TYPE
+#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
+ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
+ s390_builtin_types[INDEX] = \
+ build_pointer_type (s390_builtin_types[INDEX_BASE]);
+
+#undef DEF_DISTINCT_TYPE
+#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
+ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
+ s390_builtin_types[INDEX] = \
+ build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
+
+#undef DEF_VECTOR_TYPE
+#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
+ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
+ s390_builtin_types[INDEX] = \
+ build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
+
+#undef DEF_OPAQUE_VECTOR_TYPE
+#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
+ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
+ s390_builtin_types[INDEX] = \
+ build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
+
+#undef DEF_FN_TYPE
+#define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
+ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
+ s390_builtin_fn_types[INDEX] = \
+ build_function_type_list (args, NULL_TREE);
+#undef DEF_OV_TYPE
+#define DEF_OV_TYPE(...)
+#include "s390-builtin-types.def"
+
+#undef B_DEF
+#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
+ if (((BFLAGS) & ~bflags_mask) == 0) \
+ s390_builtin_decls[S390_BUILTIN_##NAME] = \
+ add_builtin_function ("__builtin_" #NAME, \
+ s390_builtin_fn_types[FNTYPE], \
+ S390_BUILTIN_##NAME, \
+ BUILT_IN_MD, \
+ NULL, \
+ ATTRS);
+#undef OB_DEF
+#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
+ if (((BFLAGS) & ~bflags_mask) == 0) \
+ s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
+ add_builtin_function ("__builtin_" #NAME, \
+ s390_builtin_fn_types[FNTYPE], \
+ S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
+ BUILT_IN_MD, \
+ NULL, \
+ 0);
+#undef OB_DEF_VAR
+#define OB_DEF_VAR(...)
+#include "s390-builtins.def"
+
+}
+
+/* Return true if ARG is appropriate as argument number ARGNUM of
+ builtin DECL. The operand flags from s390-builtins.def have to
+ passed as OP_FLAGS. */
+bool
+s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
+{
+ if (O_UIMM_P (op_flags))
+ {
+ int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
+ int bitwidth = bitwidths[op_flags - O_U1];
+
+ if (!tree_fits_uhwi_p (arg)
+ || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
+ {
+ error("constant argument %d for builtin %qF is out of range (0.."
+ HOST_WIDE_INT_PRINT_UNSIGNED ")",
+ argnum, decl,
+ ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
+ return false;
+ }
+ }
+
+ if (O_SIMM_P (op_flags))
+ {
+ int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
+ int bitwidth = bitwidths[op_flags - O_S2];
+
+ if (!tree_fits_shwi_p (arg)
+ || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
+ || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
+ {
+ error("constant argument %d for builtin %qF is out of range ("
+ HOST_WIDE_INT_PRINT_DEC ".."
+ HOST_WIDE_INT_PRINT_DEC ")",
+ argnum, decl,
+ -(HOST_WIDE_INT)1 << (bitwidth - 1),
+ ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 5
+
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ enum insn_code icode;
+ rtx op[MAX_ARGS], pat;
+ int arity;
+ bool nonvoid;
+ tree arg;
+ call_expr_arg_iterator iter;
+ unsigned int all_op_flags = opflags_for_builtin (fcode);
+ machine_mode last_vec_mode = VOIDmode;
+
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr,
+ "s390_expand_builtin, code = %4d, %s\n",
+ (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+ }
+
+ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
+ && fcode < S390_ALL_BUILTIN_MAX)
+ {
+ gcc_unreachable ();
+ }
+ else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
+ {
+ icode = code_for_builtin[fcode];
+ /* Set a flag in the machine specific cfun part in order to support
+ saving/restoring of FPRs. */
+ if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
+ cfun->machine->tbegin_p = true;
+ }
+ else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
+ {
+ error ("Unresolved overloaded builtin");
+ return const0_rtx;
+ }
+ else
+ internal_error ("bad builtin fcode");
+
+ if (icode == 0)
+ internal_error ("bad builtin icode");
+
+ nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+ if (nonvoid)
+ {
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ /* There are builtins (e.g. vec_promote) with no vector
+ arguments but an element selector. So we have to also look
+ at the vector return type when emitting the modulo
+ operation. */
+ if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
+ last_vec_mode = insn_data[icode].operand[0].mode;
+ }
+
+ arity = 0;
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ const struct insn_operand_data *insn_op;
+ unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
+
+ all_op_flags = all_op_flags >> O_SHIFT;
+
+ if (arg == error_mark_node)
+ return NULL_RTX;
+ if (arity >= MAX_ARGS)
+ return NULL_RTX;
+
+ if (O_IMM_P (op_flags)
+ && TREE_CODE (arg) != INTEGER_CST)
+ {
+ error ("constant value required for builtin %qF argument %d",
+ fndecl, arity + 1);
+ return const0_rtx;
+ }
+
+ if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
+ return const0_rtx;
+
+ insn_op = &insn_data[icode].operand[arity + nonvoid];
+ op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+ /* Wrap the expanded RTX for pointer types into a MEM expr with
+ the proper mode. This allows us to use e.g. (match_operand
+ "memory_operand"..) in the insn patterns instead of (mem
+ (match_operand "address_operand)). This is helpful for
+ patterns not just accepting MEMs. */
+ if (POINTER_TYPE_P (TREE_TYPE (arg))
+ && insn_op->predicate != address_operand)
+ op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
+
+ /* Expand the module operation required on element selectors. */
+ if (op_flags == O_ELEM)
+ {
+ gcc_assert (last_vec_mode != VOIDmode);
+ op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
+ op[arity],
+ GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ }
+
+ /* Record the vector mode used for an element selector. This assumes:
+ 1. There is no builtin with two different vector modes and an element selector
+ 2. The element selector comes after the vector type it is referring to.
+ This currently the true for all the builtins but FIXME we
+ should better check for that. */
+ if (VECTOR_MODE_P (insn_op->mode))
+ last_vec_mode = insn_op->mode;
+
+ if (insn_op->predicate (op[arity], insn_op->mode))
+ {
+ arity++;
+ continue;
+ }
+
+ if (MEM_P (op[arity])
+ && insn_op->predicate == memory_operand
+ && (GET_MODE (XEXP (op[arity], 0)) == Pmode
+ || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
+ {
+ op[arity] = replace_equiv_address (op[arity],
+ copy_to_mode_reg (Pmode,
+ XEXP (op[arity], 0)));
+ }
+ else if (GET_MODE (op[arity]) == insn_op->mode
+ || GET_MODE (op[arity]) == VOIDmode
+ || (insn_op->predicate == address_operand
+ && GET_MODE (op[arity]) == Pmode))
+ {
+ /* An address_operand usually has VOIDmode in the expander
+ so we cannot use this. */
+ machine_mode target_mode =
+ (insn_op->predicate == address_operand
+ ? Pmode : insn_op->mode);
+ op[arity] = copy_to_mode_reg (target_mode, op[arity]);
+ }
+
+ if (!insn_op->predicate (op[arity], insn_op->mode))
+ {
+ error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
+ return const0_rtx;
+ }
+ arity++;
+ }
+
+ if (last_vec_mode != VOIDmode && !TARGET_VX)
+ {
+ error ("Vector type builtin %qF is not supported without -mvx "
+ "(default with -march=z13).",
+ fndecl);
+ return const0_rtx;
+ }
+
+ switch (arity)
+ {
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0]);
+ else
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ if (nonvoid)
+ return target;
+ else
+ return const0_rtx;
+}
+
+
static const int s390_hotpatch_hw_max = 1000000;
static int s390_hotpatch_hw_before_label = 0;
static int s390_hotpatch_hw_after_label = 0;
@@ -509,9 +1011,43 @@ s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
return NULL_TREE;
}
+/* Expand the s390_vector_bool type attribute. */
+
+static tree
+s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ tree type = *node, result = NULL_TREE;
+ machine_mode mode;
+
+ while (POINTER_TYPE_P (type)
+ || TREE_CODE (type) == FUNCTION_TYPE
+ || TREE_CODE (type) == METHOD_TYPE
+ || TREE_CODE (type) == ARRAY_TYPE)
+ type = TREE_TYPE (type);
+
+ mode = TYPE_MODE (type);
+ switch (mode)
+ {
+ case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
+ case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
+ case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
+ case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
+ default: break;
+ }
+
+ *no_add_attrs = true; /* No need to hang on to the attribute. */
+
+ if (result)
+ *node = lang_hooks.types.reconstruct_complex_type (*node, result);
+
+ return NULL_TREE;
+}
+
static const struct attribute_spec s390_attribute_table[] = {
- { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false
- },
+ { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
+ { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
/* End element. */
{ NULL, 0, 0, false, false, false, NULL, false }
};
@@ -576,6 +1112,35 @@ s390_scalar_mode_supported_p (machine_mode mode)
return default_scalar_mode_supported_p (mode);
}
+/* Return true if the back end supports vector mode MODE. */
+static bool
+s390_vector_mode_supported_p (machine_mode mode)
+{
+ machine_mode inner;
+
+ if (!VECTOR_MODE_P (mode)
+ || !TARGET_VX
+ || GET_MODE_SIZE (mode) > 16)
+ return false;
+
+ inner = GET_MODE_INNER (mode);
+
+ switch (inner)
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case TImode:
+ case SFmode:
+ case DFmode:
+ case TFmode:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
void
@@ -647,6 +1212,11 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
case CCT1mode:
case CCT2mode:
case CCT3mode:
+ case CCVEQmode:
+ case CCVHmode:
+ case CCVHUmode:
+ case CCVFHmode:
+ case CCVFHEmode:
if (req_mode != set_mode)
return 0;
break;
@@ -747,6 +1317,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
machine_mode
s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
{
+ if (TARGET_VX
+ && register_operand (op0, DFmode)
+ && register_operand (op1, DFmode))
+ {
+ /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
+ s390_emit_compare or s390_canonicalize_comparison will take
+ care of it. */
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ return CCVEQmode;
+ case GT:
+ case UNLE:
+ return CCVFHmode;
+ case GE:
+ case UNLT:
+ return CCVFHEmode;
+ default:
+ ;
+ }
+ }
+
switch (code)
{
case EQ:
@@ -1024,8 +1617,73 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
rtx tem = *op0; *op0 = *op1; *op1 = tem;
*code = (int)swap_condition ((enum rtx_code)*code);
}
+
+ /* Using the scalar variants of vector instructions for 64 bit FP
+ comparisons might require swapping the operands. */
+ if (TARGET_VX
+ && register_operand (*op0, DFmode)
+ && register_operand (*op1, DFmode)
+ && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
+ {
+ rtx tmp;
+
+ switch (*code)
+ {
+ case LT: *code = GT; break;
+ case LE: *code = GE; break;
+ case UNGT: *code = UNLE; break;
+ case UNGE: *code = UNLT; break;
+ default: ;
+ }
+ tmp = *op0; *op0 = *op1; *op1 = tmp;
+ }
+}
+
+/* Helper function for s390_emit_compare. If possible emit a 64 bit
+ FP compare using the single element variant of vector instructions.
+ Replace CODE with the comparison code to be used in the CC reg
+ compare and return the condition code register RTX in CC. */
+
+static bool
+s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
+ rtx *cc)
+{
+ machine_mode cmp_mode;
+ bool swap_p = false;
+
+ switch (*code)
+ {
+ case EQ: cmp_mode = CCVEQmode; break;
+ case NE: cmp_mode = CCVEQmode; break;
+ case GT: cmp_mode = CCVFHmode; break;
+ case GE: cmp_mode = CCVFHEmode; break;
+ case UNLE: cmp_mode = CCVFHmode; break;
+ case UNLT: cmp_mode = CCVFHEmode; break;
+ case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
+ case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
+ case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
+ case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
+ default: return false;
+ }
+
+ if (swap_p)
+ {
+ rtx tmp = cmp2;
+ cmp2 = cmp1;
+ cmp1 = tmp;
+ }
+ *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (*cc,
+ gen_rtx_COMPARE (cmp_mode, cmp1,
+ cmp2)),
+ gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_SCRATCH (V2DImode)))));
+ return true;
}
+
/* Emit a compare instruction suitable to implement the comparison
OP0 CODE OP1. Return the correct condition RTL to be placed in
the IF_THEN_ELSE of the conditional branch testing the result. */
@@ -1036,10 +1694,18 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
machine_mode mode = s390_select_ccmode (code, op0, op1);
rtx cc;
- /* Do not output a redundant compare instruction if a compare_and_swap
- pattern already computed the result and the machine modes are compatible. */
- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+ if (TARGET_VX
+ && register_operand (op0, DFmode)
+ && register_operand (op1, DFmode)
+ && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
+ {
+ /* Work has been done by s390_expand_vec_compare_scalar already. */
+ }
+ else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
{
+ /* Do not output a redundant compare instruction if a
+ compare_and_swap pattern already computed the result and the
+ machine modes are compatible. */
gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
== GET_MODE (op0));
cc = op0;
@@ -1047,7 +1713,7 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
else
{
cc = gen_rtx_REG (mode, CC_REGNUM);
- emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
+ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
}
return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
@@ -1080,7 +1746,7 @@ s390_emit_jump (rtx target, rtx cond)
if (cond)
target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
- insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
+ insn = gen_rtx_SET (pc_rtx, target);
return emit_jump_insn (insn);
}
@@ -1274,6 +1940,93 @@ s390_branch_condition_mask (rtx code)
}
break;
+ /* Vector comparison modes. */
+
+ case CCVEQmode:
+ switch (GET_CODE (code))
+ {
+ case EQ: return CC0;
+ case NE: return CC3;
+ default: return -1;
+ }
+
+ case CCVEQANYmode:
+ switch (GET_CODE (code))
+ {
+ case EQ: return CC0 | CC1;
+ case NE: return CC3 | CC1;
+ default: return -1;
+ }
+
+ /* Integer vector compare modes. */
+
+ case CCVHmode:
+ switch (GET_CODE (code))
+ {
+ case GT: return CC0;
+ case LE: return CC3;
+ default: return -1;
+ }
+
+ case CCVHANYmode:
+ switch (GET_CODE (code))
+ {
+ case GT: return CC0 | CC1;
+ case LE: return CC3 | CC1;
+ default: return -1;
+ }
+
+ case CCVHUmode:
+ switch (GET_CODE (code))
+ {
+ case GTU: return CC0;
+ case LEU: return CC3;
+ default: return -1;
+ }
+
+ case CCVHUANYmode:
+ switch (GET_CODE (code))
+ {
+ case GTU: return CC0 | CC1;
+ case LEU: return CC3 | CC1;
+ default: return -1;
+ }
+
+ /* FP vector compare modes. */
+
+ case CCVFHmode:
+ switch (GET_CODE (code))
+ {
+ case GT: return CC0;
+ case UNLE: return CC3;
+ default: return -1;
+ }
+
+ case CCVFHANYmode:
+ switch (GET_CODE (code))
+ {
+ case GT: return CC0 | CC1;
+ case UNLE: return CC3 | CC1;
+ default: return -1;
+ }
+
+ case CCVFHEmode:
+ switch (GET_CODE (code))
+ {
+ case GE: return CC0;
+ case UNLT: return CC3;
+ default: return -1;
+ }
+
+ case CCVFHEANYmode:
+ switch (GET_CODE (code))
+ {
+ case GE: return CC0 | CC1;
+ case UNLT: return CC3 | CC1;
+ default: return -1;
+ }
+
+
case CCRAWmode:
switch (GET_CODE (code))
{
@@ -1473,6 +2226,9 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
/* Calculate a mask for all bits beyond the contiguous bits. */
mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
+ if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
+ mask &= (HOST_WIDE_INT_1U << size) - 1;
+
if (mask & in)
return false;
@@ -1488,6 +2244,101 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
return true;
}
+/* Return true if OP contains the same contiguous bitfield in *all*
+ its elements. START and END can be used to obtain the start and
+ end position of the bitfield.
+
+ START/STOP give the position of the first/last bit of the bitfield
+ counting from the lowest order bit starting with zero. In order to
+ use these values for S/390 instructions this has to be converted to
+ "bits big endian" style. */
+
+bool
+s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
+{
+ unsigned HOST_WIDE_INT mask;
+ int length, size;
+
+ if (!VECTOR_MODE_P (GET_MODE (op))
+ || GET_CODE (op) != CONST_VECTOR
+ || !CONST_INT_P (XVECEXP (op, 0, 0)))
+ return false;
+
+ if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
+ {
+ int i;
+
+ for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
+ if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
+ return false;
+ }
+
+ size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
+ mask = UINTVAL (XVECEXP (op, 0, 0));
+ if (s390_contiguous_bitmask_p (mask, size, start,
+ end != NULL ? &length : NULL))
+ {
+ if (end != NULL)
+ *end = *start + length - 1;
+ return true;
+ }
+ /* 0xff00000f style immediates can be covered by swapping start and
+ end indices in vgm. */
+ if (s390_contiguous_bitmask_p (~mask, size, start,
+ end != NULL ? &length : NULL))
+ {
+ if (end != NULL)
+ *end = *start - 1;
+ if (start != NULL)
+ *start = *start + length;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if C consists only of byte chunks being either 0 or
+ 0xff. If MASK is !=NULL a byte mask is generated which is
+ appropriate for the vector generate byte mask instruction. */
+
+bool
+s390_bytemask_vector_p (rtx op, unsigned *mask)
+{
+ int i;
+ unsigned tmp_mask = 0;
+ int nunit, unit_size;
+
+ if (!VECTOR_MODE_P (GET_MODE (op))
+ || GET_CODE (op) != CONST_VECTOR
+ || !CONST_INT_P (XVECEXP (op, 0, 0)))
+ return false;
+
+ nunit = GET_MODE_NUNITS (GET_MODE (op));
+ unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
+
+ for (i = 0; i < nunit; i++)
+ {
+ unsigned HOST_WIDE_INT c;
+ int j;
+
+ if (!CONST_INT_P (XVECEXP (op, 0, i)))
+ return false;
+
+ c = UINTVAL (XVECEXP (op, 0, i));
+ for (j = 0; j < unit_size; j++)
+ {
+ if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
+ return false;
+ tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
+ c = c >> BITS_PER_UNIT;
+ }
+ }
+
+ if (mask != NULL)
+ *mask = tmp_mask;
+
+ return true;
+}
+
/* Check whether a rotate of ROTL followed by an AND of CONTIG is
equivalent to a shift followed by the AND. In particular, CONTIG
should not overlap the (rotated) bit 0/bit 63 gap. Negative values
@@ -1513,8 +2364,8 @@ s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
bool
s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
{
- /* Floating point registers cannot be split. */
- if (FP_REG_P (src) || FP_REG_P (dst))
+ /* Floating point and vector registers cannot be split. */
+ if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
return false;
/* We don't need to split if operands are directly accessible. */
@@ -1648,7 +2499,7 @@ s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
}
/* Emit the instruction. */
- op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
+ op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
@@ -1705,16 +2556,20 @@ s390_init_machine_status (void)
/* Map for smallest class containing reg regno. */
const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
-{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
- ACCESS_REGS, ACCESS_REGS
+{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
+ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
+ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
+ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
+ ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
+ ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
+ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
+ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
+ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
+ VEC_REGS, VEC_REGS /* 52 */
};
/* Return attribute type of insn. */
@@ -2447,13 +3302,14 @@ s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result.
- CODE contains GET_CODE (x), OUTER_CODE contains the code
- of the superexpression of x. */
+ OUTER_CODE contains the code of the superexpression of x. */
static bool
-s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ int opno ATTRIBUTE_UNUSED,
int *total, bool speed ATTRIBUTE_UNUSED)
{
+ int code = GET_CODE (x);
switch (code)
{
case CONST:
@@ -2484,7 +3340,7 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
return false;
case MULT:
- switch (GET_MODE (x))
+ switch (mode)
{
case SImode:
{
@@ -2543,7 +3399,7 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
return false;
case FMA:
- switch (GET_MODE (x))
+ switch (mode)
{
case DFmode:
*total = s390_cost->madbr;
@@ -2557,18 +3413,18 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
/* Negate in the third argument is free: FMSUB. */
if (GET_CODE (XEXP (x, 2)) == NEG)
{
- *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
- + rtx_cost (XEXP (x, 1), FMA, 1, speed)
- + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
+ *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
+ + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
+ + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
return true;
}
return false;
case UDIV:
case UMOD:
- if (GET_MODE (x) == TImode) /* 128 bit division */
+ if (mode == TImode) /* 128 bit division */
*total = s390_cost->dlgr;
- else if (GET_MODE (x) == DImode)
+ else if (mode == DImode)
{
rtx right = XEXP (x, 1);
if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
@@ -2576,13 +3432,13 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
else /* 64 by 64 bit division */
*total = s390_cost->dlgr;
}
- else if (GET_MODE (x) == SImode) /* 32 bit division */
+ else if (mode == SImode) /* 32 bit division */
*total = s390_cost->dlr;
return false;
case DIV:
case MOD:
- if (GET_MODE (x) == DImode)
+ if (mode == DImode)
{
rtx right = XEXP (x, 1);
if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
@@ -2593,26 +3449,26 @@ s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
else /* 64 by 64 bit division */
*total = s390_cost->dsgr;
}
- else if (GET_MODE (x) == SImode) /* 32 bit division */
+ else if (mode == SImode) /* 32 bit division */
*total = s390_cost->dlr;
- else if (GET_MODE (x) == SFmode)
+ else if (mode == SFmode)
{
*total = s390_cost->debr;
}
- else if (GET_MODE (x) == DFmode)
+ else if (mode == DFmode)
{
*total = s390_cost->ddbr;
}
- else if (GET_MODE (x) == TFmode)
+ else if (mode == TFmode)
{
*total = s390_cost->dxbr;
}
return false;
case SQRT:
- if (GET_MODE (x) == SFmode)
+ if (mode == SFmode)
*total = s390_cost->sqebr;
- else if (GET_MODE (x) == DFmode)
+ else if (mode == DFmode)
*total = s390_cost->sqdbr;
else /* TFmode */
*total = s390_cost->sqxbr;
@@ -2775,6 +3631,17 @@ legitimate_pic_operand_p (rtx op)
static bool
s390_legitimate_constant_p (machine_mode mode, rtx op)
{
+ if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
+ {
+ if (GET_MODE_SIZE (mode) != 16)
+ return 0;
+
+ if (!const0_operand (op, mode)
+ && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
+ && !s390_bytemask_vector_p (op, NULL))
+ return 0;
+ }
+
/* Accept all non-symbolic constants. */
if (!SYMBOLIC_CONST (op))
return 1;
@@ -2811,6 +3678,7 @@ s390_cannot_force_const_mem (machine_mode mode, rtx x)
{
case CONST_INT:
case CONST_DOUBLE:
+ case CONST_VECTOR:
/* Accept all non-symbolic constants. */
return false;
@@ -2943,6 +3811,27 @@ legitimate_reload_fp_constant_p (rtx op)
return false;
}
+/* Returns true if the constant value OP is a legitimate vector operand
+ during and after reload.
+ This function accepts all constants which can be loaded directly
+ into an VR. */
+
+static bool
+legitimate_reload_vector_constant_p (rtx op)
+{
+ /* FIXME: Support constant vectors with all the same 16 bit unsigned
+ operands. These can be loaded with vrepi. */
+
+ if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
+ && (const0_operand (op, GET_MODE (op))
+ || constm1_operand (op, GET_MODE (op))
+ || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
+ || s390_bytemask_vector_p (op, NULL)))
+ return true;
+
+ return false;
+}
+
/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
return the class of reg to actually use. */
@@ -2953,6 +3842,7 @@ s390_preferred_reload_class (rtx op, reg_class_t rclass)
{
/* Constants we cannot reload into general registers
must be forced into the literal pool. */
+ case CONST_VECTOR:
case CONST_DOUBLE:
case CONST_INT:
if (reg_class_subset_p (GENERAL_REGS, rclass)
@@ -2964,6 +3854,10 @@ s390_preferred_reload_class (rtx op, reg_class_t rclass)
else if (reg_class_subset_p (FP_REGS, rclass)
&& legitimate_reload_fp_constant_p (op))
return FP_REGS;
+ else if (reg_class_subset_p (VEC_REGS, rclass)
+ && legitimate_reload_vector_constant_p (op))
+ return VEC_REGS;
+
return NO_REGS;
/* If a symbolic constant or a PLUS is reloaded,
@@ -3087,6 +3981,7 @@ s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
/* Reload might have pulled a constant out of the literal pool.
Force it back in. */
if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
+ || GET_CODE (mem) == CONST_VECTOR
|| GET_CODE (mem) == CONST)
mem = force_const_mem (GET_MODE (reg), mem);
@@ -3126,6 +4021,30 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
if (reg_classes_intersect_p (CC_REGS, rclass))
return GENERAL_REGS;
+ if (TARGET_VX)
+ {
+ /* The vst/vl vector move instructions allow only for short
+ displacements. */
+ if (MEM_P (x)
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
+ && reg_class_subset_p (rclass, VEC_REGS)
+ && (!reg_class_subset_p (rclass, FP_REGS)
+ || (GET_MODE_SIZE (mode) > 8
+ && s390_class_max_nregs (FP_REGS, mode) == 1)))
+ {
+ if (in_p)
+ sri->icode = (TARGET_64BIT ?
+ CODE_FOR_reloaddi_la_in :
+ CODE_FOR_reloadsi_la_in);
+ else
+ sri->icode = (TARGET_64BIT ?
+ CODE_FOR_reloaddi_la_out :
+ CODE_FOR_reloadsi_la_out);
+ }
+ }
+
if (TARGET_Z10)
{
HOST_WIDE_INT offset;
@@ -3141,17 +4060,15 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
: CODE_FOR_reloadsi_larl_odd_addend_z10);
- /* On z10 we need a scratch register when moving QI, TI or floating
- point mode values from or to a memory location with a SYMBOL_REF
- or if the symref addend of a SI or DI move is not aligned to the
- width of the access. */
+ /* Handle all the (mem (symref)) accesses we cannot use the z10
+ instructions for. */
if (MEM_P (x)
&& s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
- && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
- || (!TARGET_ZARCH && mode == DImode)
- || ((mode == HImode || mode == SImode || mode == DImode)
- && (!s390_check_symref_alignment (XEXP (x, 0),
- GET_MODE_SIZE (mode))))))
+ && (mode == QImode
+ || !reg_class_subset_p (rclass, GENERAL_REGS)
+ || GET_MODE_SIZE (mode) > UNITS_PER_WORD
+ || !s390_check_symref_alignment (XEXP (x, 0),
+ GET_MODE_SIZE (mode))))
{
#define __SECONDARY_RELOAD_CASE(M,m) \
case M##mode: \
@@ -3176,7 +4093,27 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
__SECONDARY_RELOAD_CASE (SD, sd);
__SECONDARY_RELOAD_CASE (DD, dd);
__SECONDARY_RELOAD_CASE (TD, td);
-
+ __SECONDARY_RELOAD_CASE (V1QI, v1qi);
+ __SECONDARY_RELOAD_CASE (V2QI, v2qi);
+ __SECONDARY_RELOAD_CASE (V4QI, v4qi);
+ __SECONDARY_RELOAD_CASE (V8QI, v8qi);
+ __SECONDARY_RELOAD_CASE (V16QI, v16qi);
+ __SECONDARY_RELOAD_CASE (V1HI, v1hi);
+ __SECONDARY_RELOAD_CASE (V2HI, v2hi);
+ __SECONDARY_RELOAD_CASE (V4HI, v4hi);
+ __SECONDARY_RELOAD_CASE (V8HI, v8hi);
+ __SECONDARY_RELOAD_CASE (V1SI, v1si);
+ __SECONDARY_RELOAD_CASE (V2SI, v2si);
+ __SECONDARY_RELOAD_CASE (V4SI, v4si);
+ __SECONDARY_RELOAD_CASE (V1DI, v1di);
+ __SECONDARY_RELOAD_CASE (V2DI, v2di);
+ __SECONDARY_RELOAD_CASE (V1TI, v1ti);
+ __SECONDARY_RELOAD_CASE (V1SF, v1sf);
+ __SECONDARY_RELOAD_CASE (V2SF, v2sf);
+ __SECONDARY_RELOAD_CASE (V4SF, v4sf);
+ __SECONDARY_RELOAD_CASE (V1DF, v1df);
+ __SECONDARY_RELOAD_CASE (V2DF, v2df);
+ __SECONDARY_RELOAD_CASE (V1TF, v1tf);
default:
gcc_unreachable ();
}
@@ -3215,12 +4152,12 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
{
if (in_p)
sri->icode = (TARGET_64BIT ?
- CODE_FOR_reloaddi_nonoffmem_in :
- CODE_FOR_reloadsi_nonoffmem_in);
+ CODE_FOR_reloaddi_la_in :
+ CODE_FOR_reloadsi_la_in);
else
sri->icode = (TARGET_64BIT ?
- CODE_FOR_reloaddi_nonoffmem_out :
- CODE_FOR_reloadsi_nonoffmem_out);
+ CODE_FOR_reloaddi_la_out :
+ CODE_FOR_reloadsi_la_out);
}
}
@@ -3847,7 +4784,7 @@ legitimize_tls_address (rtx addr, rtx reg)
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
temp = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
+ emit_insn (gen_rtx_SET (temp, new_rtx));
}
else
{
@@ -3864,7 +4801,7 @@ legitimize_tls_address (rtx addr, rtx reg)
new_rtx = gen_const_mem (Pmode, new_rtx);
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
temp = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
+ emit_insn (gen_rtx_SET (temp, new_rtx));
}
new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
@@ -4426,7 +5363,7 @@ s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
- temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+ temp = gen_rtx_SET (pc_rtx, temp);
emit_jump_insn (temp);
s390_load_address (addr0,
@@ -4454,6 +5391,138 @@ s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
return true;
}
+/* Emit a conditional jump to LABEL for condition code mask MASK using
+ comparsion operator COMPARISON. Return the emitted jump insn. */
+
+static rtx
+s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
+{
+ rtx temp;
+
+ gcc_assert (comparison == EQ || comparison == NE);
+ gcc_assert (mask > 0 && mask < 15);
+
+ temp = gen_rtx_fmt_ee (comparison, VOIDmode,
+ gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+ temp = gen_rtx_SET (pc_rtx, temp);
+ return emit_jump_insn (temp);
+}
+
+/* Emit the instructions to implement strlen of STRING and store the
+ result in TARGET. The string has the known ALIGNMENT. This
+ version uses vector instructions and is therefore not appropriate
+ for targets prior to z13. */
+
+void
+s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
+{
+ int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+ int very_likely = REG_BR_PROB_BASE - 1;
+ rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
+ rtx str_reg = gen_reg_rtx (V16QImode);
+ rtx str_addr_base_reg = gen_reg_rtx (Pmode);
+ rtx str_idx_reg = gen_reg_rtx (Pmode);
+ rtx result_reg = gen_reg_rtx (V16QImode);
+ rtx is_aligned_label = gen_label_rtx ();
+ rtx into_loop_label = NULL_RTX;
+ rtx loop_start_label = gen_label_rtx ();
+ rtx temp;
+ rtx len = gen_reg_rtx (QImode);
+ rtx cond;
+
+ s390_load_address (str_addr_base_reg, XEXP (string, 0));
+ emit_move_insn (str_idx_reg, const0_rtx);
+
+ if (INTVAL (alignment) < 16)
+ {
+ /* Check whether the address happens to be aligned properly so
+ jump directly to the aligned loop. */
+ emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
+ str_addr_base_reg, GEN_INT (15)),
+ const0_rtx, EQ, NULL_RTX,
+ Pmode, 1, is_aligned_label);
+
+ temp = gen_reg_rtx (Pmode);
+ temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
+ GEN_INT (15), temp, 1, OPTAB_DIRECT);
+ gcc_assert (REG_P (temp));
+ highest_index_to_load_reg =
+ expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
+ highest_index_to_load_reg, 1, OPTAB_DIRECT);
+ gcc_assert (REG_P (highest_index_to_load_reg));
+ emit_insn (gen_vllv16qi (str_reg,
+ convert_to_mode (SImode, highest_index_to_load_reg, 1),
+ gen_rtx_MEM (BLKmode, str_addr_base_reg)));
+
+ into_loop_label = gen_label_rtx ();
+ s390_emit_jump (into_loop_label, NULL_RTX);
+ emit_barrier ();
+ }
+
+ emit_label (is_aligned_label);
+ LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
+
+ /* Reaching this point we are only performing 16 bytes aligned
+ loads. */
+ emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
+
+ emit_label (loop_start_label);
+ LABEL_NUSES (loop_start_label) = 1;
+
+ /* Load 16 bytes of the string into VR. */
+ emit_move_insn (str_reg,
+ gen_rtx_MEM (V16QImode,
+ gen_rtx_PLUS (Pmode, str_idx_reg,
+ str_addr_base_reg)));
+ if (into_loop_label != NULL_RTX)
+ {
+ emit_label (into_loop_label);
+ LABEL_NUSES (into_loop_label) = 1;
+ }
+
+ /* Increment string index by 16 bytes. */
+ expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
+ str_idx_reg, 1, OPTAB_DIRECT);
+
+ emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
+ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
+
+ add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
+ REG_BR_PROB, very_likely);
+ emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
+
+ /* If the string pointer wasn't aligned we have loaded less then 16
+ bytes and the remaining bytes got filled with zeros (by vll).
+ Now we have to check whether the resulting index lies within the
+ bytes actually part of the string. */
+
+ cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
+ highest_index_to_load_reg);
+ s390_load_address (highest_index_to_load_reg,
+ gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
+ const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_movdicc (str_idx_reg, cond,
+ highest_index_to_load_reg, str_idx_reg));
+ else
+ emit_insn (gen_movsicc (str_idx_reg, cond,
+ highest_index_to_load_reg, str_idx_reg));
+
+ add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
+ very_unlikely);
+
+ expand_binop (Pmode, add_optab, str_idx_reg,
+ GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
+ /* FIXME: len is already zero extended - so avoid the llgcr emitted
+ here. */
+ temp = expand_binop (Pmode, add_optab, str_idx_reg,
+ convert_to_mode (Pmode, len, 1),
+ target, 1, OPTAB_DIRECT);
+ if (temp != target)
+ emit_move_insn (target, temp);
+}
/* Expand conditional increment or decrement using alc/slb instructions.
Should generate code setting DST to either SRC or SRC + INCREMENT,
@@ -4536,7 +5605,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
if (!register_operand (cmp_op0, cmp_mode))
cmp_op0 = force_reg (cmp_mode, cmp_op0);
- insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+ insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
/* We use insn_invalid_p here to add clobbers if required. */
ret = insn_invalid_p (emit_insn (insn), false);
@@ -4558,7 +5627,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
p = rtvec_alloc (2);
RTVEC_ELT (p, 0) =
- gen_rtx_SET (VOIDmode, dst, op_res);
+ gen_rtx_SET (dst, op_res);
RTVEC_ELT (p, 1) =
gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
@@ -4608,7 +5677,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
if (!register_operand (cmp_op0, cmp_mode))
cmp_op0 = force_reg (cmp_mode, cmp_op0);
- insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+ insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
/* We use insn_invalid_p here to add clobbers if required. */
ret = insn_invalid_p (emit_insn (insn), false);
@@ -4625,7 +5694,7 @@ s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
const0_rtx));
p = rtvec_alloc (2);
RTVEC_ELT (p, 0) =
- gen_rtx_SET (VOIDmode, dst, op_res);
+ gen_rtx_SET (dst, op_res);
RTVEC_ELT (p, 1) =
gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
@@ -4753,7 +5822,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
{
op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
- op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
+ op = gen_rtx_SET (op, gen_lowpart (smode, src));
clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
return true;
@@ -4781,7 +5850,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
}
op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
- op = gen_rtx_SET (VOIDmode, op, src);
+ op = gen_rtx_SET (op, src);
if (!TARGET_ZEC12)
{
@@ -4808,6 +5877,302 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
NULL_RTX, 1, OPTAB_DIRECT);
}
+/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
+ the result in TARGET. */
+
+void
+s390_expand_vec_compare (rtx target, enum rtx_code cond,
+ rtx cmp_op1, rtx cmp_op2)
+{
+ machine_mode mode = GET_MODE (target);
+ bool neg_p = false, swap_p = false;
+ rtx tmp;
+
+ if (GET_MODE (cmp_op1) == V2DFmode)
+ {
+ switch (cond)
+ {
+ /* NE a != b -> !(a == b) */
+ case NE: cond = EQ; neg_p = true; break;
+ /* UNGT a u> b -> !(b >= a) */
+ case UNGT: cond = GE; neg_p = true; swap_p = true; break;
+ /* UNGE a u>= b -> !(b > a) */
+ case UNGE: cond = GT; neg_p = true; swap_p = true; break;
+ /* LE: a <= b -> b >= a */
+ case LE: cond = GE; swap_p = true; break;
+ /* UNLE: a u<= b -> !(a > b) */
+ case UNLE: cond = GT; neg_p = true; break;
+ /* LT: a < b -> b > a */
+ case LT: cond = GT; swap_p = true; break;
+ /* UNLT: a u< b -> !(a >= b) */
+ case UNLT: cond = GE; neg_p = true; break;
+ case UNEQ:
+ emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
+ return;
+ case LTGT:
+ emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
+ return;
+ case ORDERED:
+ emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
+ return;
+ case UNORDERED:
+ emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
+ return;
+ default: break;
+ }
+ }
+ else
+ {
+ switch (cond)
+ {
+ /* NE: a != b -> !(a == b) */
+ case NE: cond = EQ; neg_p = true; break;
+ /* GE: a >= b -> !(b > a) */
+ case GE: cond = GT; neg_p = true; swap_p = true; break;
+ /* GEU: a >= b -> !(b > a) */
+ case GEU: cond = GTU; neg_p = true; swap_p = true; break;
+ /* LE: a <= b -> !(a > b) */
+ case LE: cond = GT; neg_p = true; break;
+ /* LEU: a <= b -> !(a > b) */
+ case LEU: cond = GTU; neg_p = true; break;
+ /* LT: a < b -> b > a */
+ case LT: cond = GT; swap_p = true; break;
+ /* LTU: a < b -> b > a */
+ case LTU: cond = GTU; swap_p = true; break;
+ default: break;
+ }
+ }
+
+ if (swap_p)
+ {
+ tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
+ }
+
+ emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
+ mode,
+ cmp_op1, cmp_op2)));
+ if (neg_p)
+ emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
+}
+
+/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
+ TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
+ elements in CMP1 and CMP2 fulfill the comparison. */
+void
+s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
+ rtx cmp1, rtx cmp2, bool all_p)
+{
+ enum rtx_code new_code = code;
+ machine_mode cmp_mode, full_cmp_mode, scratch_mode;
+ rtx tmp_reg = gen_reg_rtx (SImode);
+ bool swap_p = false;
+
+ if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
+ {
+ switch (code)
+ {
+ case EQ: cmp_mode = CCVEQmode; break;
+ case NE: cmp_mode = CCVEQmode; break;
+ case GT: cmp_mode = CCVHmode; break;
+ case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
+ case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
+ case LE: cmp_mode = CCVHmode; new_code = LE; break;
+ case GTU: cmp_mode = CCVHUmode; break;
+ case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
+ case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
+ case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
+ default: gcc_unreachable ();
+ }
+ scratch_mode = GET_MODE (cmp1);
+ }
+ else if (GET_MODE (cmp1) == V2DFmode)
+ {
+ switch (code)
+ {
+ case EQ: cmp_mode = CCVEQmode; break;
+ case NE: cmp_mode = CCVEQmode; break;
+ case GT: cmp_mode = CCVFHmode; break;
+ case GE: cmp_mode = CCVFHEmode; break;
+ case UNLE: cmp_mode = CCVFHmode; break;
+ case UNLT: cmp_mode = CCVFHEmode; break;
+ case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
+ case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
+ default: gcc_unreachable ();
+ }
+ scratch_mode = V2DImode;
+ }
+ else
+ gcc_unreachable ();
+
+ if (!all_p)
+ switch (cmp_mode)
+ {
+ case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
+ case CCVHmode: full_cmp_mode = CCVHANYmode; break;
+ case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
+ case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
+ case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
+ default: gcc_unreachable ();
+ }
+ else
+ /* The modes without ANY match the ALL modes. */
+ full_cmp_mode = cmp_mode;
+
+ if (swap_p)
+ {
+ rtx tmp = cmp2;
+ cmp2 = cmp1;
+ cmp1 = tmp;
+ }
+
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2, gen_rtx_SET (
+ gen_rtx_REG (cmp_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
+ gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_SCRATCH (scratch_mode)))));
+ emit_move_insn (target, const0_rtx);
+ emit_move_insn (tmp_reg, const1_rtx);
+
+ emit_move_insn (target,
+ gen_rtx_IF_THEN_ELSE (SImode,
+ gen_rtx_fmt_ee (new_code, VOIDmode,
+ gen_rtx_REG (full_cmp_mode, CC_REGNUM),
+ const0_rtx),
+ target, tmp_reg));
+}
+
+/* Generate a vector comparison expression loading either elements of
+ THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
+ and CMP_OP2. */
+
+void
+s390_expand_vcond (rtx target, rtx then, rtx els,
+ enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
+{
+ rtx tmp;
+ machine_mode result_mode;
+ rtx result_target;
+
+ /* We always use an integral type vector to hold the comparison
+ result. */
+ result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
+ result_target = gen_reg_rtx (result_mode);
+
+ /* Alternatively this could be done by reload by lowering the cmp*
+ predicates. But it appears to be better for scheduling etc. to
+ have that in early. */
+ if (!REG_P (cmp_op1))
+ cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
+
+ if (!REG_P (cmp_op2))
+ cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
+
+ s390_expand_vec_compare (result_target, cond,
+ cmp_op1, cmp_op2);
+
+ /* If the results are supposed to be either -1 or 0 we are done
+ since this is what our compare instructions generate anyway. */
+ if (constm1_operand (then, GET_MODE (then))
+ && const0_operand (els, GET_MODE (els)))
+ {
+ emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
+ result_target, 0));
+ return;
+ }
+
+ /* Otherwise we will do a vsel afterwards. */
+ /* This gets triggered e.g.
+ with gcc.c-torture/compile/pr53410-1.c */
+ if (!REG_P (then))
+ then = force_reg (GET_MODE (target), then);
+
+ if (!REG_P (els))
+ els = force_reg (GET_MODE (target), els);
+
+ tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
+ result_target,
+ CONST0_RTX (result_mode));
+
+ /* We compared the result against zero above so we have to swap then
+ and els here. */
+ tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
+
+ gcc_assert (GET_MODE (target) == GET_MODE (then));
+ emit_insn (gen_rtx_SET (target, tmp));
+}
+
+/* Emit the RTX necessary to initialize the vector TARGET with values
+ in VALS. */
+void
+s390_expand_vec_init (rtx target, rtx vals)
+{
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true, all_regs = true, all_const_int = true;
+ rtx x;
+ int i;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+
+ if (!CONST_INT_P (x))
+ all_const_int = false;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+
+ if (!REG_P (x))
+ all_regs = false;
+ }
+
+ /* Use vector gen mask or vector gen byte mask if possible. */
+ if (all_same && all_const_int
+ && (XVECEXP (vals, 0, 0) == const0_rtx
+ || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
+ NULL, NULL)
+ || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
+ {
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
+ return;
+ }
+
+ if (all_same)
+ {
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_DUPLICATE (mode,
+ XVECEXP (vals, 0, 0))));
+ return;
+ }
+
+ if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
+ {
+ /* Use vector load pair. */
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (mode,
+ XVECEXP (vals, 0, 0),
+ XVECEXP (vals, 0, 1))));
+ return;
+ }
+
+ /* We are about to set the vector elements one by one. Zero out the
+ full register first in order to help the data flow framework to
+ detect it as full VR set. */
+ emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
+
+ /* Unfortunately the vec_init expander is not allowed to fail. So
+ we have to implement the fallback ourselves. */
+ for (i = 0; i < n_elts; i++)
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_UNSPEC (mode,
+ gen_rtvec (3, XVECEXP (vals, 0, i),
+ GEN_INT (i), target),
+ UNSPEC_VEC_SET)));
+}
+
/* Structure to hold the initial parameters for a compare_and_swap operation
in HImode and QImode. */
@@ -5103,12 +6468,37 @@ s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
fputs ("@DTPOFF", file);
}
+/* Return the proper mode for REGNO being represented in the dwarf
+ unwind table. */
+machine_mode
+s390_dwarf_frame_reg_mode (int regno)
+{
+ machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
+
+ /* The rightmost 64 bits of vector registers are call-clobbered. */
+ if (GET_MODE_SIZE (save_mode) > 8)
+ save_mode = DImode;
+
+ return save_mode;
+}
+
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
/* Implement TARGET_MANGLE_TYPE. */
static const char *
s390_mangle_type (const_tree type)
{
+ type = TYPE_MAIN_VARIANT (type);
+
+ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+ return NULL;
+
+ if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
+ if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
+ if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
+ if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
+
if (TYPE_MAIN_VARIANT (type) == long_double_type_node
&& TARGET_LONG_DOUBLE_128)
return "g";
@@ -5429,24 +6819,26 @@ print_operand_address (FILE *file, rtx addr)
'J': print tls_load/tls_gdcall/tls_ldcall suffix
'M': print the second word of a TImode operand.
'N': print the second word of a DImode operand.
- 'O': print only the displacement of a memory reference.
- 'R': print only the base register of a memory reference.
+ 'O': print only the displacement of a memory reference or address.
+ 'R': print only the base register of a memory reference or address.
'S': print S-type memory reference (base+displacement).
'Y': print shift count operand.
'b': print integer X as if it's an unsigned byte.
'c': print integer X as if it's an signed byte.
- 'e': "end" of DImode contiguous bitmask X.
- 'f': "end" of SImode contiguous bitmask X.
+ 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
+ 'f': "end" contiguous bitmask X in SImode.
'h': print integer X as if it's a signed halfword.
'i': print the first nonzero HImode part of X.
'j': print the first HImode part unequal to -1 of X.
'k': print the first nonzero SImode part of X.
'm': print the first SImode part unequal to -1 of X.
'o': print integer X as if it's an unsigned 32bit word.
- 's': "start" of DImode contiguous bitmask X.
- 't': "start" of SImode contiguous bitmask X.
+ 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
+ 't': CONST_INT: "start" of contiguous bitmask X in SImode.
+ CONST_VECTOR: Generate a bitmask for vgbm instruction.
'x': print integer X as if it's an unsigned halfword.
+ 'v': print register number as vector register (v1 instead of f1).
*/
void
@@ -5505,14 +6897,7 @@ print_operand (FILE *file, rtx x, int code)
struct s390_address ad;
int ret;
- if (!MEM_P (x))
- {
- output_operand_lossage ("memory reference expected for "
- "'O' output modifier");
- return;
- }
-
- ret = s390_decompose_address (XEXP (x, 0), &ad);
+ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
if (!ret
|| (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
@@ -5534,14 +6919,7 @@ print_operand (FILE *file, rtx x, int code)
struct s390_address ad;
int ret;
- if (!MEM_P (x))
- {
- output_operand_lossage ("memory reference expected for "
- "'R' output modifier");
- return;
- }
-
- ret = s390_decompose_address (XEXP (x, 0), &ad);
+ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
if (!ret
|| (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
@@ -5619,7 +6997,17 @@ print_operand (FILE *file, rtx x, int code)
switch (GET_CODE (x))
{
case REG:
- fprintf (file, "%s", reg_names[REGNO (x)]);
+ /* Print FP regs as fx instead of vx when they are accessed
+ through non-vector mode. */
+ if (code == 'v'
+ || VECTOR_NOFP_REG_P (x)
+ || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
+ || (VECTOR_REG_P (x)
+ && (GET_MODE_SIZE (GET_MODE (x)) /
+ s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
+ fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
+ else
+ fprintf (file, "%s", reg_names[REGNO (x)]);
break;
case MEM:
@@ -5706,6 +7094,39 @@ print_operand (FILE *file, rtx x, int code)
code);
}
break;
+ case CONST_VECTOR:
+ switch (code)
+ {
+ case 'e':
+ case 's':
+ {
+ int start, stop, inner_len;
+ bool ok;
+
+ inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
+ ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
+ gcc_assert (ok);
+ if (code == 's' || code == 't')
+ ival = inner_len - stop - 1;
+ else
+ ival = inner_len - start - 1;
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+ }
+ break;
+ case 't':
+ {
+ unsigned mask;
+ bool ok = s390_bytemask_vector_p (x, &mask);
+ gcc_assert (ok);
+ fprintf (file, "%u", mask);
+ }
+ break;
+
+ default:
+ output_operand_lossage ("invalid constant vector for output "
+ "modifier '%c'", code);
+ }
+ break;
default:
if (code == 0)
@@ -5853,7 +7274,8 @@ s390_adjust_priority (rtx_insn *insn, int priority)
&& s390_tune != PROCESSOR_2094_Z9_109
&& s390_tune != PROCESSOR_2097_Z10
&& s390_tune != PROCESSOR_2817_Z196
- && s390_tune != PROCESSOR_2827_ZEC12)
+ && s390_tune != PROCESSOR_2827_ZEC12
+ && s390_tune != PROCESSOR_2964_Z13)
return priority;
switch (s390_safe_attr_type (insn))
@@ -5885,8 +7307,12 @@ s390_issue_rate (void)
case PROCESSOR_2817_Z196:
return 3;
case PROCESSOR_2097_Z10:
- case PROCESSOR_2827_ZEC12:
return 2;
+ /* Starting with EC12 we use the sched_reorder hook to take care
+ of instruction dispatch constraints. The algorithm only
+ picks the best instruction and assumes only a single
+ instruction gets issued per cycle. */
+ case PROCESSOR_2827_ZEC12:
default:
return 1;
}
@@ -6057,7 +7483,8 @@ s390_split_branches (void)
{
new_literal = 1;
rtx mem = force_const_mem (Pmode, *label);
- rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, mem), insn);
+ rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
+ insn);
INSN_ADDRESSES_NEW (set_insn, -1);
annotate_constant_pool_refs (&PATTERN (set_insn));
@@ -6070,7 +7497,8 @@ s390_split_branches (void)
UNSPEC_LTREL_OFFSET);
target = gen_rtx_CONST (Pmode, target);
target = force_const_mem (Pmode, target);
- rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
+ rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
+ insn);
INSN_ADDRESSES_NEW (set_insn, -1);
annotate_constant_pool_refs (&PATTERN (set_insn));
@@ -6258,14 +7686,19 @@ replace_ltrel_base (rtx *x)
/* We keep a list of constants which we have to add to internal
constant tables in the middle of large functions. */
-#define NR_C_MODES 11
+#define NR_C_MODES 31
machine_mode constant_modes[NR_C_MODES] =
{
TFmode, TImode, TDmode,
+ V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode,
DFmode, DImode, DDmode,
+ V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
SFmode, SImode, SDmode,
+ V4QImode, V2HImode, V1SImode, V1SFmode,
HImode,
- QImode
+ V2QImode, V1HImode,
+ QImode,
+ V1QImode
};
struct constant
@@ -7280,6 +8713,23 @@ s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
mark_symbol_refs_as_used (exp);
break;
+ case MODE_VECTOR_INT:
+ case MODE_VECTOR_FLOAT:
+ {
+ int i;
+ machine_mode inner_mode;
+ gcc_assert (GET_CODE (exp) == CONST_VECTOR);
+
+ inner_mode = GET_MODE_INNER (GET_MODE (exp));
+ for (i = 0; i < XVECLEN (exp, 0); i++)
+ s390_output_pool_entry (XVECEXP (exp, 0, i),
+ inner_mode,
+ i == 0
+ ? align
+ : GET_MODE_BITSIZE (inner_mode));
+ }
+ break;
+
default:
gcc_unreachable ();
}
@@ -7894,7 +9344,10 @@ s390_init_frame_layout (void)
HOST_WIDE_INT frame_size;
int base_used;
- gcc_assert (!reload_completed);
+ /* After LRA the frame layout is supposed to be read-only and should
+ not be re-computed. */
+ if (reload_completed)
+ return;
/* On S/390 machines, we may need to perform branch splitting, which
will require both base and return address register. We have no
@@ -8091,9 +9544,25 @@ s390_optimize_nonescaping_tx (void)
bool
s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
{
+ if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
+ return false;
+
switch (REGNO_REG_CLASS (regno))
{
+ case VEC_REGS:
+ return ((GET_MODE_CLASS (mode) == MODE_INT
+ && s390_class_max_nregs (VEC_REGS, mode) == 1)
+ || mode == DFmode
+ || s390_vector_mode_supported_p (mode));
+ break;
case FP_REGS:
+ if (TARGET_VX
+ && ((GET_MODE_CLASS (mode) == MODE_INT
+ && s390_class_max_nregs (FP_REGS, mode) == 1)
+ || mode == DFmode
+ || s390_vector_mode_supported_p (mode)))
+ return true;
+
if (REGNO_PAIR_OK (regno, mode))
{
if (mode == SImode || mode == DImode)
@@ -8180,19 +9649,86 @@ s390_hard_regno_scratch_ok (unsigned int regno)
int
s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
{
+ int reg_size;
+ bool reg_pair_required_p = false;
+
switch (rclass)
{
case FP_REGS:
+ case VEC_REGS:
+ reg_size = TARGET_VX ? 16 : 8;
+
+ /* TF and TD modes would fit into a VR but we put them into a
+ register pair since we do not have 128bit FP instructions on
+ full VRs. */
+ if (TARGET_VX
+ && SCALAR_FLOAT_MODE_P (mode)
+ && GET_MODE_SIZE (mode) >= 16)
+ reg_pair_required_p = true;
+
+ /* Even if complex types would fit into a single FPR/VR we force
+ them into a register pair to deal with the parts more easily.
+ (FIXME: What about complex ints?) */
if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
- return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
- else
- return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
+ reg_pair_required_p = true;
+ break;
case ACCESS_REGS:
- return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
+ reg_size = 4;
+ break;
default:
+ reg_size = UNITS_PER_WORD;
break;
}
- return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (reg_pair_required_p)
+ return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
+
+ return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
+}
+
+/* Return TRUE if changing mode from FROM to TO should not be allowed
+ for register class CLASS. */
+
+int
+s390_cannot_change_mode_class (machine_mode from_mode,
+ machine_mode to_mode,
+ enum reg_class rclass)
+{
+ machine_mode small_mode;
+ machine_mode big_mode;
+
+ if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
+ return 0;
+
+ if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
+ {
+ small_mode = from_mode;
+ big_mode = to_mode;
+ }
+ else
+ {
+ small_mode = to_mode;
+ big_mode = from_mode;
+ }
+
+ /* Values residing in VRs are little-endian style. All modes are
+ placed left-aligned in an VR. This means that we cannot allow
+ switching between modes with differing sizes. Also if the vector
+ facility is available we still place TFmode values in VR register
+ pairs, since the only instructions we have operating on TFmodes
+ only deal with register pairs. Therefore we have to allow DFmode
+ subregs of TFmodes to enable the TFmode splitters. */
+ if (reg_classes_intersect_p (VEC_REGS, rclass)
+ && (GET_MODE_SIZE (small_mode) < 8
+ || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
+ return 1;
+
+ /* Likewise for access registers, since they have only half the
+ word size on 64-bit. */
+ if (reg_classes_intersect_p (ACCESS_REGS, rclass))
+ return 1;
+
+ return 0;
}
/* Return true if we use LRA instead of reload pass. */
@@ -8844,7 +10380,7 @@ s390_emit_prologue (void)
if (DISP_IN_RANGE (INTVAL (frame_off)))
{
- insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ insn = gen_rtx_SET (stack_pointer_rtx,
gen_rtx_PLUS (Pmode, stack_pointer_rtx,
frame_off));
insn = emit_insn (insn);
@@ -8861,7 +10397,7 @@ s390_emit_prologue (void)
RTX_FRAME_RELATED_P (insn) = 1;
real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
- gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_SET (stack_pointer_rtx,
gen_rtx_PLUS (Pmode, stack_pointer_rtx,
real_frame_off)));
@@ -8915,8 +10451,7 @@ s390_emit_prologue (void)
offset += 8;
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (DFmode, addr),
+ gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
gen_rtx_REG (DFmode, i)));
}
}
@@ -9006,11 +10541,11 @@ s390_emit_epilogue (bool sibcall)
offset = area_bottom < 0 ? -area_bottom : 0;
frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
- cfa = gen_rtx_SET (VOIDmode, frame_pointer,
+ cfa = gen_rtx_SET (frame_pointer,
gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
if (DISP_IN_RANGE (INTVAL (frame_off)))
{
- insn = gen_rtx_SET (VOIDmode, frame_pointer,
+ insn = gen_rtx_SET (frame_pointer,
gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
insn = emit_insn (insn);
}
@@ -9224,6 +10759,23 @@ s390_can_use_return_insn (void)
return cfun_frame_layout.frame_size == 0;
}
+/* The VX ABI differs for vararg functions. Therefore we need the
+ prototype of the callee to be available when passing vector type
+ values. */
+static const char *
+s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
+{
+ return ((TARGET_VX_ABI
+ && typelist == 0
+ && VECTOR_TYPE_P (TREE_TYPE (val))
+ && (funcdecl == NULL_TREE
+ || (TREE_CODE (funcdecl) == FUNCTION_DECL
+ && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
+ ? N_("Vector argument passed to unprototyped function")
+ : NULL);
+}
+
+
/* Return the size in bytes of a function argument of
type TYPE and/or mode MODE. At least one of TYPE or
MODE must be specified. */
@@ -9243,13 +10795,61 @@ s390_function_arg_size (machine_mode mode, const_tree type)
}
/* Return true if a function argument of type TYPE and mode MODE
+ is to be passed in a vector register, if available. */
+
+bool
+s390_function_arg_vector (machine_mode mode, const_tree type)
+{
+ if (!TARGET_VX_ABI)
+ return false;
+
+ if (s390_function_arg_size (mode, type) > 16)
+ return false;
+
+ /* No type info available for some library calls ... */
+ if (!type)
+ return VECTOR_MODE_P (mode);
+
+ /* The ABI says that record types with a single member are treated
+ just like that member would be. */
+ while (TREE_CODE (type) == RECORD_TYPE)
+ {
+ tree field, single = NULL_TREE;
+
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ if (single == NULL_TREE)
+ single = TREE_TYPE (field);
+ else
+ return false;
+ }
+
+ if (single == NULL_TREE)
+ return false;
+ else
+ {
+ /* If the field declaration adds extra byte due to
+ e.g. padding this is not accepted as vector type. */
+ if (int_size_in_bytes (single) <= 0
+ || int_size_in_bytes (single) != int_size_in_bytes (type))
+ return false;
+ type = single;
+ }
+ }
+
+ return VECTOR_TYPE_P (type);
+}
+
+/* Return true if a function argument of type TYPE and mode MODE
is to be passed in a floating-point register, if available. */
static bool
s390_function_arg_float (machine_mode mode, const_tree type)
{
- int size = s390_function_arg_size (mode, type);
- if (size > 8)
+ if (s390_function_arg_size (mode, type) > 8)
return false;
/* Soft-float changes the ABI: no floating-point registers are used. */
@@ -9332,20 +10932,24 @@ s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
bool named ATTRIBUTE_UNUSED)
{
int size = s390_function_arg_size (mode, type);
+
+ if (s390_function_arg_vector (mode, type))
+ return false;
+
if (size > 8)
return true;
if (type)
{
if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
- return 1;
+ return true;
if (TREE_CODE (type) == COMPLEX_TYPE
|| TREE_CODE (type) == VECTOR_TYPE)
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/* Update the data in CUM to advance over an argument of mode MODE and
@@ -9356,11 +10960,21 @@ s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
static void
s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
- const_tree type, bool named ATTRIBUTE_UNUSED)
+ const_tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
- if (s390_function_arg_float (mode, type))
+ if (s390_function_arg_vector (mode, type))
+ {
+ /* We are called for unnamed vector stdarg arguments which are
+ passed on the stack. In this case this hook does not have to
+ do anything since stack arguments are tracked by common
+ code. */
+ if (!named)
+ return;
+ cum->vrs += 1;
+ }
+ else if (s390_function_arg_float (mode, type))
{
cum->fprs += 1;
}
@@ -9394,14 +11008,26 @@ s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
static rtx
s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
- const_tree type, bool named ATTRIBUTE_UNUSED)
+ const_tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
- if (s390_function_arg_float (mode, type))
+ if (!named)
+ s390_check_type_for_vector_abi (type, true, false);
+
+ if (s390_function_arg_vector (mode, type))
+ {
+ /* Vector arguments being part of the ellipsis are passed on the
+ stack. */
+ if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
+ return NULL_RTX;
+
+ return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
+ }
+ else if (s390_function_arg_float (mode, type))
{
if (cum->fprs + 1 > FP_ARG_NUM_REG)
- return 0;
+ return NULL_RTX;
else
return gen_rtx_REG (mode, cum->fprs + 16);
}
@@ -9411,7 +11037,7 @@ s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
- return 0;
+ return NULL_RTX;
else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
return gen_rtx_REG (mode, cum->gprs + 2);
else if (n_gprs == 2)
@@ -9454,11 +11080,17 @@ s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
|| TREE_CODE (type) == REAL_TYPE)
return int_size_in_bytes (type) > 8;
+ /* vector types which fit into a VR. */
+ if (TARGET_VX_ABI
+ && VECTOR_TYPE_P (type)
+ && int_size_in_bytes (type) <= 16)
+ return false;
+
/* Aggregates and similar constructs are always returned
in memory. */
if (AGGREGATE_TYPE_P (type)
|| TREE_CODE (type) == COMPLEX_TYPE
- || TREE_CODE (type) == VECTOR_TYPE)
+ || VECTOR_TYPE_P (type))
return true;
/* ??? We get called on all sorts of random stuff from
@@ -9496,6 +11128,12 @@ s390_function_and_libcall_value (machine_mode mode,
const_tree fntype_or_decl,
bool outgoing ATTRIBUTE_UNUSED)
{
+ /* For vector return types it is important to use the RET_TYPE
+ argument whenever available since the middle-end might have
+ changed the mode to a scalar mode. */
+ bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
+ || (!ret_type && VECTOR_MODE_P (mode)));
+
/* For normal functions perform the promotion as
promote_function_mode would do. */
if (ret_type)
@@ -9505,10 +11143,14 @@ s390_function_and_libcall_value (machine_mode mode,
fntype_or_decl, 1);
}
- gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
- gcc_assert (GET_MODE_SIZE (mode) <= 8);
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
+ || SCALAR_FLOAT_MODE_P (mode)
+ || (TARGET_VX_ABI && vector_ret_type_p));
+ gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
- if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
+ if (TARGET_VX_ABI && vector_ret_type_p)
+ return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
+ else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
return gen_rtx_REG (mode, 16);
else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
|| UNITS_PER_LONG == UNITS_PER_WORD)
@@ -9672,9 +11314,13 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
- /* Find the overflow area. */
+ /* Find the overflow area.
+ FIXME: This currently is too pessimistic when the vector ABI is
+ enabled. In that case we *always* set up the overflow area
+ pointer. */
if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
- || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
+ || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
+ || TARGET_VX_ABI)
{
t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
@@ -9716,6 +11362,9 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
ret = args.reg_save_area[args.gpr+8]
else
ret = *args.overflow_arg_area++;
+ } else if (vector value) {
+ ret = *args.overflow_arg_area;
+ args.overflow_arg_area += size / 8;
} else if (float value) {
if (args.fgpr < 2)
ret = args.reg_save_area[args.fpr+64]
@@ -9735,14 +11384,16 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
tree f_gpr, f_fpr, f_ovf, f_sav;
tree gpr, fpr, ovf, sav, reg, t, u;
int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
- tree lab_false, lab_over, addr;
+ tree lab_false, lab_over;
+ tree addr = create_tmp_var (ptr_type_node, "addr");
+ bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
+ a stack slot. */
f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
f_fpr = DECL_CHAIN (f_gpr);
f_ovf = DECL_CHAIN (f_fpr);
f_sav = DECL_CHAIN (f_ovf);
- valist = build_va_arg_indirect_ref (valist);
gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
@@ -9754,6 +11405,8 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
size = int_size_in_bytes (type);
+ s390_check_type_for_vector_abi (type, true, false);
+
if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
{
if (TARGET_DEBUG_ARG)
@@ -9774,6 +11427,23 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
sav_scale = UNITS_PER_LONG;
size = UNITS_PER_LONG;
max_reg = GP_ARG_NUM_REG - n_reg;
+ left_align_p = false;
+ }
+ else if (s390_function_arg_vector (TYPE_MODE (type), type))
+ {
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr, "va_arg: vector type");
+ debug_tree (type);
+ }
+
+ indirect_p = 0;
+ reg = NULL_TREE;
+ n_reg = 0;
+ sav_ofs = 0;
+ sav_scale = 8;
+ max_reg = 0;
+ left_align_p = true;
}
else if (s390_function_arg_float (TYPE_MODE (type), type))
{
@@ -9790,6 +11460,7 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
sav_ofs = 16 * UNITS_PER_LONG;
sav_scale = 8;
max_reg = FP_ARG_NUM_REG - n_reg;
+ left_align_p = false;
}
else
{
@@ -9814,53 +11485,74 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
sav_scale = UNITS_PER_LONG;
max_reg = GP_ARG_NUM_REG - n_reg;
+ left_align_p = false;
}
/* Pull the value out of the saved registers ... */
- lab_false = create_artificial_label (UNKNOWN_LOCATION);
- lab_over = create_artificial_label (UNKNOWN_LOCATION);
- addr = create_tmp_var (ptr_type_node, "addr");
+ if (reg != NULL_TREE)
+ {
+ /*
+ if (reg > ((typeof (reg))max_reg))
+ goto lab_false;
- t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
- t = build2 (GT_EXPR, boolean_type_node, reg, t);
- u = build1 (GOTO_EXPR, void_type_node, lab_false);
- t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
- gimplify_and_add (t, pre_p);
+ addr = sav + sav_ofs + reg * save_scale;
- t = fold_build_pointer_plus_hwi (sav, sav_ofs);
- u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
- fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
- t = fold_build_pointer_plus (t, u);
+ goto lab_over;
- gimplify_assign (addr, t, pre_p);
+ lab_false:
+ */
+
+ lab_false = create_artificial_label (UNKNOWN_LOCATION);
+ lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+ t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
+ t = build2 (GT_EXPR, boolean_type_node, reg, t);
+ u = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+
+ t = fold_build_pointer_plus_hwi (sav, sav_ofs);
+ u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
+ fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
+ t = fold_build_pointer_plus (t, u);
- gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+ gimplify_assign (addr, t, pre_p);
- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+ }
/* ... Otherwise out of the overflow area. */
t = ovf;
- if (size < UNITS_PER_LONG)
+ if (size < UNITS_PER_LONG && !left_align_p)
t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
- t = fold_build_pointer_plus_hwi (t, size);
+ if (size < UNITS_PER_LONG && left_align_p)
+ t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
+ else
+ t = fold_build_pointer_plus_hwi (t, size);
+
gimplify_assign (ovf, t, pre_p);
- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+ if (reg != NULL_TREE)
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
/* Increment register save count. */
- u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
- fold_convert (TREE_TYPE (reg), size_int (n_reg)));
- gimplify_and_add (u, pre_p);
+ if (n_reg > 0)
+ {
+ u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
+ fold_convert (TREE_TYPE (reg), size_int (n_reg)));
+ gimplify_and_add (u, pre_p);
+ }
if (indirect_p)
{
@@ -9943,241 +11635,6 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
}
}
-/* Builtins. */
-
-enum s390_builtin
-{
- S390_BUILTIN_TBEGIN,
- S390_BUILTIN_TBEGIN_NOFLOAT,
- S390_BUILTIN_TBEGIN_RETRY,
- S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
- S390_BUILTIN_TBEGINC,
- S390_BUILTIN_TEND,
- S390_BUILTIN_TABORT,
- S390_BUILTIN_NON_TX_STORE,
- S390_BUILTIN_TX_NESTING_DEPTH,
- S390_BUILTIN_TX_ASSIST,
-
- S390_BUILTIN_S390_SFPC,
- S390_BUILTIN_S390_EFPC,
-
- S390_BUILTIN_MAX
-};
-
-tree s390_builtin_decls[S390_BUILTIN_MAX];
-
-static enum insn_code const code_for_builtin[S390_BUILTIN_MAX] = {
- CODE_FOR_tbegin,
- CODE_FOR_tbegin_nofloat,
- CODE_FOR_tbegin_retry,
- CODE_FOR_tbegin_retry_nofloat,
- CODE_FOR_tbeginc,
- CODE_FOR_tend,
- CODE_FOR_tabort,
- CODE_FOR_ntstg,
- CODE_FOR_etnd,
- CODE_FOR_tx_assist,
-
- CODE_FOR_s390_sfpc,
- CODE_FOR_s390_efpc
-};
-
-static void
-s390_init_builtins (void)
-{
- tree ftype, uint64_type;
- tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
- NULL, NULL);
- tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
-
- /* void foo (void) */
- ftype = build_function_type_list (void_type_node, NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_TBEGINC] =
- add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
- BUILT_IN_MD, NULL, NULL_TREE);
-
- /* void foo (int) */
- ftype = build_function_type_list (void_type_node, integer_type_node,
- NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_TABORT] =
- add_builtin_function ("__builtin_tabort", ftype,
- S390_BUILTIN_TABORT, BUILT_IN_MD, NULL,
- noreturn_attr);
- s390_builtin_decls[S390_BUILTIN_TX_ASSIST] =
- add_builtin_function ("__builtin_tx_assist", ftype,
- S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
-
- /* void foo (unsigned) */
- ftype = build_function_type_list (void_type_node, unsigned_type_node,
- NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_S390_SFPC] =
- add_builtin_function ("__builtin_s390_sfpc", ftype,
- S390_BUILTIN_S390_SFPC, BUILT_IN_MD, NULL, NULL_TREE);
-
- /* int foo (void *) */
- ftype = build_function_type_list (integer_type_node, ptr_type_node,
- NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_TBEGIN] =
- add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
- BUILT_IN_MD, NULL, returns_twice_attr);
- s390_builtin_decls[S390_BUILTIN_TBEGIN_NOFLOAT] =
- add_builtin_function ("__builtin_tbegin_nofloat", ftype,
- S390_BUILTIN_TBEGIN_NOFLOAT,
- BUILT_IN_MD, NULL, returns_twice_attr);
-
- /* int foo (void *, int) */
- ftype = build_function_type_list (integer_type_node, ptr_type_node,
- integer_type_node, NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_TBEGIN_RETRY] =
- add_builtin_function ("__builtin_tbegin_retry", ftype,
- S390_BUILTIN_TBEGIN_RETRY,
- BUILT_IN_MD,
- NULL, returns_twice_attr);
- s390_builtin_decls[S390_BUILTIN_TBEGIN_RETRY_NOFLOAT] =
- add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
- S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
- BUILT_IN_MD,
- NULL, returns_twice_attr);
-
- /* int foo (void) */
- ftype = build_function_type_list (integer_type_node, NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_TX_NESTING_DEPTH] =
- add_builtin_function ("__builtin_tx_nesting_depth", ftype,
- S390_BUILTIN_TX_NESTING_DEPTH,
- BUILT_IN_MD, NULL, NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_TEND] =
- add_builtin_function ("__builtin_tend", ftype,
- S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
-
- /* unsigned foo (void) */
- ftype = build_function_type_list (unsigned_type_node, NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_S390_EFPC] =
- add_builtin_function ("__builtin_s390_efpc", ftype,
- S390_BUILTIN_S390_EFPC, BUILT_IN_MD, NULL, NULL_TREE);
-
- /* void foo (uint64_t *, uint64_t) */
- if (TARGET_64BIT)
- uint64_type = long_unsigned_type_node;
- else
- uint64_type = long_long_unsigned_type_node;
-
- ftype = build_function_type_list (void_type_node,
- build_pointer_type (uint64_type),
- uint64_type, NULL_TREE);
- s390_builtin_decls[S390_BUILTIN_NON_TX_STORE] =
- add_builtin_function ("__builtin_non_tx_store", ftype,
- S390_BUILTIN_NON_TX_STORE,
- BUILT_IN_MD, NULL, NULL_TREE);
-}
-
-/* Expand an expression EXP that calls a built-in function,
- with result going to TARGET if that's convenient
- (and in mode MODE if that's convenient).
- SUBTARGET may be used as the target for computing one of EXP's operands.
- IGNORE is nonzero if the value is to be ignored. */
-
-static rtx
-s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
- machine_mode mode ATTRIBUTE_UNUSED,
- int ignore ATTRIBUTE_UNUSED)
-{
-#define MAX_ARGS 2
-
- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
- enum insn_code icode;
- rtx op[MAX_ARGS], pat;
- int arity;
- bool nonvoid;
- tree arg;
- call_expr_arg_iterator iter;
-
- if (fcode >= S390_BUILTIN_MAX)
- internal_error ("bad builtin fcode");
- icode = code_for_builtin[fcode];
- if (icode == 0)
- internal_error ("bad builtin fcode");
-
- if (!TARGET_HTM && fcode <= S390_BUILTIN_TX_ASSIST)
- error ("Transactional execution builtins not enabled (-mhtm)\n");
-
- /* Set a flag in the machine specific cfun part in order to support
- saving/restoring of FPRs. */
- if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
- cfun->machine->tbegin_p = true;
-
- nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
-
- arity = 0;
- FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
- {
- const struct insn_operand_data *insn_op;
-
- if (arg == error_mark_node)
- return NULL_RTX;
- if (arity >= MAX_ARGS)
- return NULL_RTX;
-
- insn_op = &insn_data[icode].operand[arity + nonvoid];
-
- op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
-
- if (!(*insn_op->predicate) (op[arity], insn_op->mode))
- {
- if (insn_op->predicate == memory_operand)
- {
- /* Don't move a NULL pointer into a register. Otherwise
- we have to rely on combine being able to move it back
- in order to get an immediate 0 in the instruction. */
- if (op[arity] != const0_rtx)
- op[arity] = copy_to_mode_reg (Pmode, op[arity]);
- op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
- }
- else
- op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
- }
-
- arity++;
- }
-
- if (nonvoid)
- {
- machine_mode tmode = insn_data[icode].operand[0].mode;
- if (!target
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- }
-
- switch (arity)
- {
- case 0:
- pat = GEN_FCN (icode) (target);
- break;
- case 1:
- if (nonvoid)
- pat = GEN_FCN (icode) (target, op[0]);
- else
- pat = GEN_FCN (icode) (op[0]);
- break;
- case 2:
- if (nonvoid)
- pat = GEN_FCN (icode) (target, op[0], op[1]);
- else
- pat = GEN_FCN (icode) (op[0], op[1]);
- break;
- default:
- gcc_unreachable ();
- }
- if (!pat)
- return NULL_RTX;
- emit_insn (pat);
-
- if (nonvoid)
- return target;
- else
- return const0_rtx;
-}
/* Return the decl for the target specific builtin with the function
code FCODE. */
@@ -10661,15 +12118,18 @@ s390_call_saved_register_used (tree call_expr)
mode = TYPE_MODE (type);
gcc_assert (mode);
+ /* We assume that in the target function all parameters are
+ named. This only has an impact on vector argument register
+ usage none of which is call-saved. */
if (pass_by_reference (&cum_v, mode, type, true))
{
mode = Pmode;
type = build_pointer_type (type);
}
- parm_rtx = s390_function_arg (cum, mode, type, 0);
+ parm_rtx = s390_function_arg (cum, mode, type, true);
- s390_function_arg_advance (cum, mode, type, 0);
+ s390_function_arg_advance (cum, mode, type, true);
if (!parm_rtx)
continue;
@@ -10813,7 +12273,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
if (result_reg != NULL_RTX)
- call = gen_rtx_SET (VOIDmode, result_reg, call);
+ call = gen_rtx_SET (result_reg, call);
if (retaddr_reg != NULL_RTX)
{
@@ -10876,6 +12336,13 @@ s390_conditional_register_usage (void)
for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
call_used_regs[i] = fixed_regs[i] = 1;
}
+
+ /* Disable v16 - v31 for non-vector target. */
+ if (!TARGET_VX)
+ {
+ for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
+ fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
+ }
}
/* Corresponding function to eh_return expander. */
@@ -11155,7 +12622,7 @@ s390_fix_long_loop_prediction (rtx_insn *insn)
new_label = gen_label_rtx ();
uncond_jump = emit_jump_insn_after (
- gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_SET (pc_rtx,
gen_rtx_LABEL_REF (VOIDmode, code_label)),
insn);
emit_label_after (new_label, uncond_jump);
@@ -11453,7 +12920,8 @@ s390_reorg (void)
/* Walk over the insns and do some >=z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10
|| s390_tune == PROCESSOR_2817_Z196
- || s390_tune == PROCESSOR_2827_ZEC12)
+ || s390_tune == PROCESSOR_2827_ZEC12
+ || s390_tune == PROCESSOR_2964_Z13)
{
rtx_insn *insn;
bool insn_added_p = false;
@@ -11488,31 +12956,37 @@ s390_reorg (void)
/* Insert NOPs for hotpatching. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- {
- if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG)
- break;
- }
- gcc_assert (insn);
- /* Output a series of NOPs after the NOTE_INSN_FUNCTION_BEG. */
- while (hw_after > 0)
+ /* Emit NOPs
+ 1. inside the area covered by debug information to allow setting
+ breakpoints at the NOPs,
+ 2. before any insn which results in an asm instruction,
+ 3. before in-function labels to avoid jumping to the NOPs, for
+ example as part of a loop,
+ 4. before any barrier in case the function is completely empty
+ (__builtin_unreachable ()) and has neither internal labels nor
+ active insns.
+ */
+ if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
+ break;
+ /* Output a series of NOPs before the first active insn. */
+ while (insn && hw_after > 0)
{
if (hw_after >= 3 && TARGET_CPU_ZARCH)
{
- insn = emit_insn_after (gen_nop_6_byte (), insn);
+ emit_insn_before (gen_nop_6_byte (), insn);
hw_after -= 3;
}
else if (hw_after >= 2)
{
- insn = emit_insn_after (gen_nop_4_byte (), insn);
+ emit_insn_before (gen_nop_4_byte (), insn);
hw_after -= 2;
}
else
{
- insn = emit_insn_after (gen_nop_2_byte (), insn);
+ emit_insn_before (gen_nop_2_byte (), insn);
hw_after -= 1;
}
}
- gcc_assert (hw_after == 0);
}
}
@@ -11702,7 +13176,8 @@ s390_sched_reorder (FILE *file, int verbose,
if (reload_completed && *nreadyp > 1)
s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
- if (s390_tune == PROCESSOR_2827_ZEC12
+ if ((s390_tune == PROCESSOR_2827_ZEC12
+ || s390_tune == PROCESSOR_2964_Z13)
&& reload_completed
&& *nreadyp > 1)
{
@@ -11785,7 +13260,8 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
{
last_scheduled_insn = insn;
- if (s390_tune == PROCESSOR_2827_ZEC12
+ if ((s390_tune == PROCESSOR_2827_ZEC12
+ || s390_tune == PROCESSOR_2964_Z13)
&& reload_completed
&& recog_memoized (insn) >= 0)
{
@@ -11865,7 +13341,8 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
if (s390_tune != PROCESSOR_2097_Z10
&& s390_tune != PROCESSOR_2817_Z196
- && s390_tune != PROCESSOR_2827_ZEC12)
+ && s390_tune != PROCESSOR_2827_ZEC12
+ && s390_tune != PROCESSOR_2964_Z13)
return nunroll;
/* Count the number of memory references within the loop body. */
@@ -11981,6 +13458,8 @@ s390_option_override (void)
}
/* Sanity checks. */
+ if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE)
+ gcc_unreachable ();
if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
error ("z/Architecture mode not supported on %s", s390_arch_string);
if (TARGET_64BIT && !TARGET_ZARCH)
@@ -11996,6 +13475,22 @@ s390_option_override (void)
if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
target_flags |= MASK_OPT_HTM;
+ if (target_flags_explicit & MASK_OPT_VX)
+ {
+ if (TARGET_OPT_VX)
+ {
+ if (!TARGET_CPU_VX)
+ error ("hardware vector support not available on %s",
+ s390_arch_string);
+ if (TARGET_SOFT_FLOAT)
+ error ("hardware vector support not available with -msoft-float");
+ }
+ }
+ else if (TARGET_CPU_VX)
+ /* Enable vector support if available and not explicitly disabled
+ by user. E.g. with -m31 -march=z13 -mzarch */
+ target_flags |= MASK_OPT_VX;
+
if (TARGET_HARD_DFP && !TARGET_DFP)
{
if (target_flags_explicit & MASK_HARD_DFP)
@@ -12035,6 +13530,7 @@ s390_option_override (void)
s390_cost = &z196_cost;
break;
case PROCESSOR_2827_ZEC12:
+ case PROCESSOR_2964_Z13:
s390_cost = &zEC12_cost;
break;
default:
@@ -12062,7 +13558,8 @@ s390_option_override (void)
if (s390_tune == PROCESSOR_2097_Z10
|| s390_tune == PROCESSOR_2817_Z196
- || s390_tune == PROCESSOR_2827_ZEC12)
+ || s390_tune == PROCESSOR_2827_ZEC12
+ || s390_tune == PROCESSOR_2964_Z13)
{
maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
global_options.x_param_values,
@@ -12155,8 +13652,8 @@ s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
static void
s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
{
- tree sfpc = s390_builtin_decls[S390_BUILTIN_S390_SFPC];
- tree efpc = s390_builtin_decls[S390_BUILTIN_S390_EFPC];
+ tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
+ tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
tree call_efpc = build_call_expr (efpc, 0);
tree fenv_var = create_tmp_var (unsigned_type_node);
@@ -12224,6 +13721,140 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef FPC_DXC_SHIFT
}
+/* Return the vector mode to be used for inner mode MODE when doing
+ vectorization. */
+static machine_mode
+s390_preferred_simd_mode (machine_mode mode)
+{
+ if (TARGET_VX)
+ switch (mode)
+ {
+ case DFmode:
+ return V2DFmode;
+ case DImode:
+ return V2DImode;
+ case SImode:
+ return V4SImode;
+ case HImode:
+ return V8HImode;
+ case QImode:
+ return V16QImode;
+ default:;
+ }
+ return word_mode;
+}
+
+/* Our hardware does not require vectors to be strictly aligned. */
+static bool
+s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type ATTRIBUTE_UNUSED,
+ int misalignment ATTRIBUTE_UNUSED,
+ bool is_packed ATTRIBUTE_UNUSED)
+{
+ if (TARGET_VX)
+ return true;
+
+ return default_builtin_support_vector_misalignment (mode, type, misalignment,
+ is_packed);
+}
+
+/* The vector ABI requires vector types to be aligned on an 8 byte
+ boundary (our stack alignment). However, we allow this to be
+ overriden by the user, while this definitely breaks the ABI. */
+static HOST_WIDE_INT
+s390_vector_alignment (const_tree type)
+{
+ if (!TARGET_VX_ABI)
+ return default_vector_alignment (type);
+
+ if (TYPE_USER_ALIGN (type))
+ return TYPE_ALIGN (type);
+
+ return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
+}
+
+/* Implement TARGET_ASM_FILE_END. */
+static void
+s390_asm_file_end (void)
+{
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+ varpool_node *vnode;
+ cgraph_node *cnode;
+
+ FOR_EACH_VARIABLE (vnode)
+ if (TREE_PUBLIC (vnode->decl))
+ s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
+
+ FOR_EACH_FUNCTION (cnode)
+ if (TREE_PUBLIC (cnode->decl))
+ s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
+
+
+ if (s390_vector_abi != 0)
+ fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
+ s390_vector_abi);
+#endif
+ file_end_indicate_exec_stack ();
+}
+
+/* Return true if TYPE is a vector bool type. */
+static inline bool
+s390_vector_bool_type_p (const_tree type)
+{
+ return TYPE_VECTOR_OPAQUE (type);
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+ not permitted on TYPE1 and TYPE2, NULL otherwise. */
+static const char*
+s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
+{
+ bool bool1_p, bool2_p;
+ bool plusminus_p;
+ bool muldiv_p;
+ bool compare_p;
+ machine_mode mode1, mode2;
+
+ if (!TARGET_ZVECTOR)
+ return NULL;
+
+ if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
+ return NULL;
+
+ bool1_p = s390_vector_bool_type_p (type1);
+ bool2_p = s390_vector_bool_type_p (type2);
+
+ /* Mixing signed and unsigned types is forbidden for all
+ operators. */
+ if (!bool1_p && !bool2_p
+ && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+ return N_("types differ in signess");
+
+ plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
+ muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
+ || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
+ || op == ROUND_DIV_EXPR);
+ compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
+ || op == EQ_EXPR || op == NE_EXPR);
+
+ if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
+ return N_("binary operator does not support two vector bool operands");
+
+ if (bool1_p != bool2_p && (muldiv_p || compare_p))
+ return N_("binary operator does not support vector bool operand");
+
+ mode1 = TYPE_MODE (type1);
+ mode2 = TYPE_MODE (type2);
+
+ if (bool1_p != bool2_p && plusminus_p
+ && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
+ || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
+ return N_("binary operator does not support mixing vector "
+ "bool with floating point vector operands");
+
+ return NULL;
+}
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -12332,6 +13963,8 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#define TARGET_FUNCTION_VALUE s390_function_value
#undef TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE s390_libcall_value
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
#undef TARGET_KEEP_LEAF_WHEN_PROFILED
#define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
@@ -12350,6 +13983,9 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
#endif
+#undef TARGET_DWARF_FRAME_REG_MODE
+#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
+
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE s390_mangle_type
@@ -12358,6 +13994,9 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
+
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
@@ -12418,6 +14057,24 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
+#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
+#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
+
+#undef TARGET_VECTOR_ALIGNMENT
+#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END s390_asm_file_end
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"