From 5ada7a142c1df1fad051c42f408d84df72a06b4c Mon Sep 17 00:00:00 2001 From: krebbel Date: Thu, 27 Jun 2013 07:44:11 +0000 Subject: 2013-06-27 Andreas Krebbel * config/s390/s390.c: Rename UNSPEC_CCU_TO_INT to UNSPEC_STRCMPCC_TO_INT and UNSPEC_CCZ_TO_INT to UNSPEC_CC_TO_INT. (struct machine_function): Add tbegin_p. (s390_canonicalize_comparison): Fold CC mode compares to conditional jump if possible. (s390_emit_jump): Return the emitted jump. (s390_branch_condition_mask, s390_branch_condition_mnemonic): Handle CCRAWmode compares. (s390_option_override): Default to -mhtm if available. (s390_reg_clobbered_rtx): Handle floating point regs as well. (s390_regs_ever_clobbered): Use s390_regs_ever_clobbered also for FPRs instead of df_regs_ever_live_p. (s390_optimize_nonescaping_tx): New function. (s390_init_frame_layout): Extend clobbered_regs array to cover FPRs as well. (s390_emit_prologue): Call s390_optimize_nonescaping_tx. (s390_expand_tbegin): New function. (enum s390_builtin): New enum definition. (code_for_builtin): New array definition. (s390_init_builtins): New function. (s390_expand_builtin): New function. (TARGET_INIT_BUILTINS): Define. (TARGET_EXPAND_BUILTIN): Define. * common/config/s390/s390-common.c (processor_flags_table): Add PF_TX. * config/s390/predicates.md (s390_comparison): Handle CCRAWmode. (s390_alc_comparison): Likewise. * config/s390/s390-modes.def: Add CCRAWmode. * config/s390/s390.h (processor_flags): Add PF_TX. (TARGET_CPU_HTM): Define macro. (TARGET_HTM): Define macro. (TARGET_CPU_CPP_BUILTINS): Define __HTM__ for htm. * config/s390/s390.md: Rename UNSPEC_CCU_TO_INT to UNSPEC_STRCMPCC_TO_INT and UNSPEC_CCZ_TO_INT to UNSPEC_CC_TO_INT. (UNSPECV_TBEGIN, UNSPECV_TBEGINC, UNSPECV_TEND, UNSPECV_TABORT) (UNSPECV_ETND, UNSPECV_NTSTG, UNSPECV_PPA): New unspecv enum values. (TBEGIN_MASK, TBEGINC_MASK): New constants. ("*cc_to_int"): Move up. ("*movcc", "*cjump_64", "*cjump_31"): Accept integer constants other than 0. ("*ccraw_to_int"): New insn and splitter definition. ("tbegin", "tbegin_nofloat", "tbegin_retry") ("tbegin_retry_nofloat", "tbeginc", "tend", "tabort") ("tx_assist"): New expander. ("tbegin_1", "tbegin_nofloat_1", "*tbeginc_1", "*tend_1") ("*tabort_1", "etnd", "ntstg", "*ppa"): New insn definition. * config/s390/s390.opt: Add -mhtm option. * config/s390/s390-protos.h (s390_emit_jump): Add return type. * config/s390/htmxlintrin.h: New file. * config/s390/htmintrin.h: New file. * config/s390/s390intrin.h: New file. * doc/extend.texi: Document htm builtins. * config.gcc: Add the new header files to extra_headers. 2013-06-27 Andreas Krebbel * gcc.target/s390/htm-1.c: New file. * gcc.target/s390/htm-nofloat-1.c: New file. * gcc.target/s390/htm-xl-intrin-1.c: New file. 2013-06-27 Andreas Krebbel * config/s390/target.h: Include htmintrin.h. (_HTM_ITM_RETRIES): New macro definition. (htm_available, htm_init, htm_begin, htm_begin_success) (htm_commit, htm_abort, htm_abort_should_retry): New functions. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@200454 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/s390/htmintrin.h | 57 +++++ gcc/config/s390/htmxlintrin.h | 182 +++++++++++++ gcc/config/s390/predicates.md | 12 +- gcc/config/s390/s390-modes.def | 9 + gcc/config/s390/s390-protos.h | 3 +- gcc/config/s390/s390.c | 564 ++++++++++++++++++++++++++++++++++++++--- gcc/config/s390/s390.h | 37 +-- gcc/config/s390/s390.md | 281 ++++++++++++++++++-- gcc/config/s390/s390.opt | 4 + gcc/config/s390/s390intrin.h | 33 +++ 10 files changed, 1106 insertions(+), 76 deletions(-) create mode 100644 gcc/config/s390/htmintrin.h create mode 100644 gcc/config/s390/htmxlintrin.h create mode 100644 gcc/config/s390/s390intrin.h (limited to 'gcc/config') diff --git a/gcc/config/s390/htmintrin.h b/gcc/config/s390/htmintrin.h new file mode 100644 index 00000000000..7aaa9f5bf7c --- /dev/null +++ b/gcc/config/s390/htmintrin.h @@ -0,0 +1,57 @@ +/* GNU compiler hardware transactional execution intrinsics + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef _HTMINTRIN_H +#define _HTMINTRIN_H + + +/* Condition codes generated by tbegin */ +#define _HTM_TBEGIN_STARTED 0 +#define _HTM_TBEGIN_INDETERMINATE 1 +#define _HTM_TBEGIN_TRANSIENT 2 +#define _HTM_TBEGIN_PERSISTENT 3 + +/* The abort codes below this threshold are reserved for machine + use. */ +#define _HTM_FIRST_USER_ABORT_CODE 256 + +/* The transaction diagnostic block is it is defined in the Principles + of Operation chapter 5-91. */ + +struct __htm_tdb { + unsigned char format; /* 0 */ + unsigned char flags; + unsigned char reserved1[4]; + unsigned short nesting_depth; + unsigned long long abort_code; /* 8 */ + unsigned long long conflict_token; /* 16 */ + unsigned long long atia; /* 24 */ + unsigned char eaid; /* 32 */ + unsigned char dxc; + unsigned char reserved2[2]; + unsigned int program_int_id; + unsigned long long exception_id; /* 40 */ + unsigned long long bea; /* 48 */ + unsigned char reserved3[72]; /* 56 */ + unsigned long long gprs[16]; /* 128 */ +} __attribute__((__packed__, __aligned__ (8))); + + +#endif /* _HTMINTRIN_H */ diff --git a/gcc/config/s390/htmxlintrin.h b/gcc/config/s390/htmxlintrin.h new file mode 100644 index 00000000000..bb142195b2b --- /dev/null +++ b/gcc/config/s390/htmxlintrin.h @@ -0,0 +1,182 @@ +/* XL compiler hardware transactional execution intrinsics + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef _HTMXLINTRIN_H +#define _HTMXLINTRIN_H + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* These intrinsics are being made available for compatibility with + the IBM XL compiler. For documentation please see the "z/OS XL + C/C++ Programming Guide" publically available on the web. */ + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_simple_begin () +{ + return __builtin_tbegin_nofloat (0); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_begin (void* const tdb) +{ + return __builtin_tbegin_nofloat (tdb); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_end () +{ + return __builtin_tend (); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_abort () +{ + return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_named_abort (unsigned char const code) +{ + return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_non_transactional_store (void* const addr, long long const value) +{ + __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_nesting_depth (void* const tdb_ptr) +{ + int depth = __builtin_tx_nesting_depth (); + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (depth != 0) + return depth; + + if (tdb->format == 0) + return 0; + return tdb->nesting_depth; +} + +/* Transaction failure diagnostics */ + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_user_abort (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (tdb->format == 0) + return 0; + + return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + if (tdb->format == 0) + return 0; + + if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE) + { + *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE; + return 1; + } + return 0; +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_illegal (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 0 + && (tdb->abort_code == 4 /* unfiltered program interruption */ + || tdb->abort_code == 11 /* restricted instruction */)); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_footprint_exceeded (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 0 + && (tdb->abort_code == 7 /* fetch overflow */ + || tdb->abort_code == 8 /* store overflow */)); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_nested_too_deep (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return tdb->format == 0 && tdb->abort_code == 13; /* depth exceeded */ +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_conflict (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return (tdb->format == 0 + && (tdb->abort_code == 9 /* fetch conflict */ + || tdb->abort_code == 10 /* store conflict */)); +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_is_failure_persistent (long const result) +{ + return result == _HTM_TBEGIN_PERSISTENT; +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_address (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; +#ifdef __s390x__ + return tdb->atia; +#else + return tdb->atia & 0xffffffff; +#endif +} + +extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__TM_failure_code (void* const tdb_ptr) +{ + struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr; + + return tdb->abort_code; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _HTMXLINTRIN_H */ diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index 523326e177d..069b42489a7 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -176,7 +176,11 @@ { if (GET_CODE (XEXP (op, 0)) != REG || REGNO (XEXP (op, 0)) != CC_REGNUM - || XEXP (op, 1) != const0_rtx) + || (XEXP (op, 1) != const0_rtx + && !(CONST_INT_P (XEXP (op, 1)) + && GET_MODE (XEXP (op, 0)) == CCRAWmode + && INTVAL (XEXP (op, 1)) >= 0 + && INTVAL (XEXP (op, 1)) <= 15))) return false; return (s390_branch_condition_mask (op) >= 0); @@ -224,7 +228,11 @@ if (GET_CODE (XEXP (op, 0)) != REG || REGNO (XEXP (op, 0)) != CC_REGNUM - || XEXP (op, 1) != const0_rtx) + || (XEXP (op, 1) != const0_rtx + && !(CONST_INT_P (XEXP (op, 1)) + && GET_MODE (XEXP (op, 0)) == CCRAWmode + && INTVAL (XEXP (op, 1)) >= 0 + && INTVAL (XEXP (op, 1)) <= 15))) return false; switch (GET_MODE (XEXP (op, 0))) diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def index 419108fb473..5e0b50cafa1 100644 --- a/gcc/config/s390/s390-modes.def +++ b/gcc/config/s390/s390-modes.def @@ -152,6 +152,14 @@ The compare and swap instructions sets the condition code to 0/1 if the operands were equal/unequal. The CCZ1 mode ensures the result can be effectively placed into a register. +CCRAW + +The cc mode generated by a non-compare instruction. The condition +code mask for the CC consumer is determined by the comparison operator +(only EQ and NE allowed) and the immediate value given as second +operand to the operator. For the other CC modes this value used to be +0. + */ @@ -172,3 +180,4 @@ CC_MODE (CCT); CC_MODE (CCT1); CC_MODE (CCT2); CC_MODE (CCT3); +CC_MODE (CCRAW); diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index 1a8205359e4..67283df4553 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -58,7 +58,7 @@ extern bool s390_match_ccmode (rtx, enum machine_mode); extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool); extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx); extern rtx s390_emit_compare (enum rtx_code, rtx, rtx); -extern void s390_emit_jump (rtx, rtx); +extern rtx s390_emit_jump (rtx, rtx); extern bool symbolic_reference_mentioned_p (rtx); extern bool tls_symbolic_reference_mentioned_p (rtx); extern bool legitimate_la_operand_p (rtx); @@ -87,6 +87,7 @@ extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx, rtx, bool); extern void s390_expand_atomic (enum machine_mode, enum rtx_code, rtx, rtx, rtx, bool); +extern void s390_expand_tbegin (rtx, rtx, rtx, bool); extern rtx s390_return_addr_rtx (int, rtx); extern rtx s390_back_chain_rtx (void); extern rtx s390_emit_call (rtx, rtx, rtx, rtx); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 06d1228ff06..2cacf6f52ad 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -367,6 +367,10 @@ struct GTY(()) machine_function const char *some_ld_name; bool has_landing_pad_p; + + /* True if the current function may contain a tbegin clobbering + FPRs. */ + bool tbegin_p; }; /* Few accessor macros for struct cfun->machine->s390_frame_layout. */ @@ -824,9 +828,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, *op1 = constm1_rtx; } - /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */ + /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */ if (GET_CODE (*op0) == UNSPEC - && XINT (*op0, 1) == UNSPEC_CCU_TO_INT + && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT && XVECLEN (*op0, 0) == 1 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode && GET_CODE (XVECEXP (*op0, 0, 0)) == REG @@ -852,25 +856,35 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, } } - /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */ + /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */ if (GET_CODE (*op0) == UNSPEC - && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT + && XINT (*op0, 1) == UNSPEC_CC_TO_INT && XVECLEN (*op0, 0) == 1 - && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode && GET_CODE (XVECEXP (*op0, 0, 0)) == REG && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM - && *op1 == const0_rtx) + && CONST_INT_P (*op1)) { enum rtx_code new_code = UNKNOWN; - switch (*code) + switch (GET_MODE (XVECEXP (*op0, 0, 0))) { - case EQ: new_code = EQ; break; - case NE: new_code = NE; break; - default: break; + case CCZmode: + case CCRAWmode: + switch (*code) + { + case EQ: new_code = EQ; break; + case NE: new_code = NE; break; + default: break; + } + break; + default: break; } if (new_code != UNKNOWN) { + /* For CCRAWmode put the required cc mask into the second + operand. */ + if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode) + *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1))); *op0 = XVECEXP (*op0, 0, 0); *code = new_code; } @@ -942,10 +956,11 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, const0_rtx); } -/* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an - unconditional jump, else a conditional jump under condition COND. */ +/* Emit a jump instruction to TARGET and return it. If COND is + NULL_RTX, emit an unconditional jump, else a conditional jump under + condition COND. */ -void +rtx s390_emit_jump (rtx target, rtx cond) { rtx insn; @@ -955,7 +970,7 @@ s390_emit_jump (rtx target, rtx cond) target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx); insn = gen_rtx_SET (VOIDmode, pc_rtx, target); - emit_jump_insn (insn); + return emit_jump_insn (insn); } /* Return branch condition mask to implement a branch @@ -971,7 +986,10 @@ s390_branch_condition_mask (rtx code) gcc_assert (GET_CODE (XEXP (code, 0)) == REG); gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM); - gcc_assert (XEXP (code, 1) == const0_rtx); + gcc_assert (XEXP (code, 1) == const0_rtx + || (GET_MODE (XEXP (code, 0)) == CCRAWmode + && CONST_INT_P (XEXP (code, 1)))); + switch (GET_MODE (XEXP (code, 0))) { @@ -1145,6 +1163,17 @@ s390_branch_condition_mask (rtx code) } break; + case CCRAWmode: + switch (GET_CODE (code)) + { + case EQ: + return INTVAL (XEXP (code, 1)); + case NE: + return (INTVAL (XEXP (code, 1))) ^ 0xf; + default: + gcc_unreachable (); + } + default: return -1; } @@ -1204,7 +1233,9 @@ s390_branch_condition_mnemonic (rtx code, int inv) if (GET_CODE (XEXP (code, 0)) == REG && REGNO (XEXP (code, 0)) == CC_REGNUM - && XEXP (code, 1) == const0_rtx) + && (XEXP (code, 1) == const0_rtx + || (GET_MODE (XEXP (code, 0)) == CCRAWmode + && CONST_INT_P (XEXP (code, 1))))) mask = s390_branch_condition_mask (code); else mask = s390_compare_and_branch_condition_mask (code); @@ -1602,6 +1633,11 @@ s390_option_override (void) if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP) target_flags |= MASK_HARD_DFP; + /* Enable hardware transactions if available and not explicitly + disabled by user. E.g. with -m31 -march=zEC12 -mzarch */ + if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) + target_flags |= MASK_OPT_HTM; + if (TARGET_HARD_DFP && !TARGET_DFP) { if (target_flags_explicit & MASK_HARD_DFP) @@ -7334,11 +7370,11 @@ s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *d if (GET_CODE (setreg) == SUBREG) { rtx inner = SUBREG_REG (setreg); - if (!GENERAL_REG_P (inner)) + if (!GENERAL_REG_P (inner) && !FP_REG_P (inner)) return; regno = subreg_regno (setreg); } - else if (GENERAL_REG_P (setreg)) + else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg)) regno = REGNO (setreg); else return; @@ -7361,13 +7397,13 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered) rtx cur_insn; unsigned int i; - memset (regs_ever_clobbered, 0, 16 * sizeof (int)); + memset (regs_ever_clobbered, 0, 32 * sizeof (int)); /* For non-leaf functions we have to consider all call clobbered regs to be clobbered. */ if (!crtl->is_leaf) { - for (i = 0; i < 16; i++) + for (i = 0; i < 32; i++) regs_ever_clobbered[i] = call_really_used_regs[i]; } @@ -7389,7 +7425,7 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered) See expand_builtin_unwind_init. For regs_ever_live this is done by reload. */ if (cfun->has_nonlocal_label) - for (i = 0; i < 16; i++) + for (i = 0; i < 32; i++) if (!call_really_used_regs[i]) regs_ever_clobbered[i] = 1; @@ -7455,17 +7491,6 @@ s390_register_info (int clobbered_regs[]) { int i, j; - /* fprs 8 - 15 are call saved for 64 Bit ABI. */ - cfun_frame_layout.fpr_bitmap = 0; - cfun_frame_layout.high_fprs = 0; - if (TARGET_64BIT) - for (i = 24; i < 32; i++) - if (df_regs_ever_live_p (i) && !global_regs[i]) - { - cfun_set_fpr_bit (i - 16); - cfun_frame_layout.high_fprs++; - } - /* Find first and last gpr to be saved. We trust regs_ever_live data, except that we don't save and restore global registers. @@ -7474,6 +7499,29 @@ s390_register_info (int clobbered_regs[]) s390_regs_ever_clobbered (clobbered_regs); + /* fprs 8 - 15 are call saved for 64 Bit ABI. */ + if (!epilogue_completed) + { + cfun_frame_layout.fpr_bitmap = 0; + cfun_frame_layout.high_fprs = 0; + if (TARGET_64BIT) + for (i = 24; i < 32; i++) + /* During reload we have to use the df_regs_ever_live infos + since reload is marking FPRs used as spill slots there as + live before actually making the code changes. Without + this we fail during elimination offset verification. */ + if ((clobbered_regs[i] + || (df_regs_ever_live_p (i) + && (lra_in_progress + || reload_in_progress + || crtl->saves_all_registers))) + && !global_regs[i]) + { + cfun_set_fpr_bit (i - 16); + cfun_frame_layout.high_fprs++; + } + } + for (i = 0; i < 16; i++) clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i]; @@ -7724,7 +7772,7 @@ s390_init_frame_layout (void) { HOST_WIDE_INT frame_size; int base_used; - int clobbered_regs[16]; + int clobbered_regs[32]; /* On S/390 machines, we may need to perform branch splitting, which will require both base and return address register. We have no @@ -7759,6 +7807,157 @@ s390_init_frame_layout (void) while (frame_size != cfun_frame_layout.frame_size); } +/* Remove the FPR clobbers from a tbegin insn if it can be proven that + the TX is nonescaping. A transaction is considered escaping if + there is at least one path from tbegin returning CC0 to the + function exit block without an tend. + + The check so far has some limitations: + - only single tbegin/tend BBs are supported + - the first cond jump after tbegin must separate the CC0 path from ~CC0 + - when CC is copied to a GPR and the CC0 check is done with the GPR + this is not supported +*/ + +static void +s390_optimize_nonescaping_tx (void) +{ + const unsigned int CC0 = 1 << 3; + basic_block tbegin_bb = NULL; + basic_block tend_bb = NULL; + basic_block bb; + rtx insn; + bool result = true; + int bb_index; + rtx tbegin_insn = NULL_RTX; + + if (!cfun->machine->tbegin_p) + return; + + for (bb_index = 0; bb_index < n_basic_blocks; bb_index++) + { + bb = BASIC_BLOCK (bb_index); + + FOR_BB_INSNS (bb, insn) + { + rtx ite, cc, pat, target; + unsigned HOST_WIDE_INT mask; + + if (!INSN_P (insn) || INSN_CODE (insn) <= 0) + continue; + + pat = PATTERN (insn); + + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + + if (GET_CODE (pat) != SET + || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE) + continue; + + if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN) + { + rtx tmp; + + tbegin_insn = insn; + + /* Just return if the tbegin doesn't have clobbers. */ + if (GET_CODE (PATTERN (insn)) != PARALLEL) + return; + + if (tbegin_bb != NULL) + return; + + /* Find the next conditional jump. */ + for (tmp = NEXT_INSN (insn); + tmp != NULL_RTX; + tmp = NEXT_INSN (tmp)) + { + if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp)) + return; + if (!JUMP_P (tmp)) + continue; + + ite = SET_SRC (PATTERN (tmp)); + if (GET_CODE (ite) != IF_THEN_ELSE) + continue; + + cc = XEXP (XEXP (ite, 0), 0); + if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)) + || GET_MODE (cc) != CCRAWmode + || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT) + return; + + if (bb->succs->length () != 2) + return; + + mask = INTVAL (XEXP (XEXP (ite, 0), 1)); + if (GET_CODE (XEXP (ite, 0)) == NE) + mask ^= 0xf; + + if (mask == CC0) + target = XEXP (ite, 1); + else if (mask == (CC0 ^ 0xf)) + target = XEXP (ite, 2); + else + return; + + { + edge_iterator ei; + edge e1, e2; + + ei = ei_start (bb->succs); + e1 = ei_safe_edge (ei); + ei_next (&ei); + e2 = ei_safe_edge (ei); + + if (e2->flags & EDGE_FALLTHRU) + { + e2 = e1; + e1 = ei_safe_edge (ei); + } + + if (!(e1->flags & EDGE_FALLTHRU)) + return; + + tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest; + } + if (tmp == BB_END (bb)) + break; + } + } + + if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND) + { + if (tend_bb != NULL) + return; + tend_bb = bb; + } + } + } + + /* Either we successfully remove the FPR clobbers here or we are not + able to do anything for this TX. Both cases don't qualify for + another look. */ + cfun->machine->tbegin_p = false; + + if (tbegin_bb == NULL || tend_bb == NULL) + return; + + calculate_dominance_info (CDI_POST_DOMINATORS); + result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb); + free_dominance_info (CDI_POST_DOMINATORS); + + if (!result) + return; + + PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0); + INSN_CODE (tbegin_insn) = -1; + df_insn_rescan (tbegin_insn); + + return; +} + /* Update frame layout. Recompute actual register save data based on current info and update regs_ever_live for the special registers. May be called multiple times, but may never cause *more* registers @@ -7767,7 +7966,7 @@ s390_init_frame_layout (void) static void s390_update_frame_layout (void) { - int clobbered_regs[16]; + int clobbered_regs[32]; s390_register_info (clobbered_regs); @@ -8204,8 +8403,10 @@ s390_emit_prologue (void) int offset; int next_fpr = 0; - /* Complete frame layout. */ + /* Try to get rid of the FPR clobbers. */ + s390_optimize_nonescaping_tx (); + /* Complete frame layout. */ s390_update_frame_layout (); /* Annotate all constant pool references to let the scheduler know @@ -9353,6 +9554,294 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, return build_va_arg_indirect_ref (addr); } +/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX) + expanders. + DEST - Register location where CC will be stored. + TDB - Pointer to a 256 byte area where to store the transaction. + diagnostic block. NULL if TDB is not needed. + RETRY - Retry count value. If non-NULL a retry loop for CC2 + is emitted + CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part + of the tbegin instruction pattern. */ + +void +s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p) +{ + const int CC0 = 1 << 3; + const int CC1 = 1 << 2; + const int CC3 = 1 << 0; + rtx abort_label = gen_label_rtx (); + rtx leave_label = gen_label_rtx (); + rtx retry_reg = gen_reg_rtx (SImode); + rtx retry_label = NULL_RTX; + rtx jump; + rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); + + if (retry != NULL_RTX) + { + emit_move_insn (retry_reg, retry); + retry_label = gen_label_rtx (); + emit_label (retry_label); + } + + if (clobber_fprs_p) + emit_insn (gen_tbegin_1 (tdb, + gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK))); + else + emit_insn (gen_tbegin_nofloat_1 (tdb, + gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK))); + + jump = s390_emit_jump (abort_label, + gen_rtx_NE (VOIDmode, + gen_rtx_REG (CCRAWmode, CC_REGNUM), + gen_rtx_CONST_INT (VOIDmode, CC0))); + + JUMP_LABEL (jump) = abort_label; + LABEL_NUSES (abort_label) = 1; + add_reg_note (jump, REG_BR_PROB, very_unlikely); + + /* Initialize CC return value. */ + emit_move_insn (dest, const0_rtx); + + s390_emit_jump (leave_label, NULL_RTX); + LABEL_NUSES (leave_label) = 1; + emit_barrier (); + + /* Abort handler code. */ + + emit_label (abort_label); + if (retry != NULL_RTX) + { + rtx count = gen_reg_rtx (SImode); + jump = s390_emit_jump (leave_label, + gen_rtx_EQ (VOIDmode, + gen_rtx_REG (CCRAWmode, CC_REGNUM), + gen_rtx_CONST_INT (VOIDmode, CC1 | CC3))); + LABEL_NUSES (leave_label) = 2; + add_reg_note (jump, REG_BR_PROB, very_unlikely); + + /* CC2 - transient failure. Perform retry with ppa. */ + emit_move_insn (count, retry); + emit_insn (gen_subsi3 (count, count, retry_reg)); + emit_insn (gen_tx_assist (count)); + jump = emit_jump_insn (gen_doloop_si64 (retry_label, + retry_reg, + retry_reg)); + JUMP_LABEL (jump) = retry_label; + LABEL_NUSES (retry_label) = 1; + } + + emit_move_insn (dest, gen_rtx_UNSPEC (SImode, + gen_rtvec (1, gen_rtx_REG (CCRAWmode, + CC_REGNUM)), + UNSPEC_CC_TO_INT)); + emit_label (leave_label); +} + +/* Builtins. */ + +enum s390_builtin +{ + S390_BUILTIN_TBEGIN, + S390_BUILTIN_TBEGIN_NOFLOAT, + S390_BUILTIN_TBEGIN_RETRY, + S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, + S390_BUILTIN_TBEGINC, + S390_BUILTIN_TEND, + S390_BUILTIN_TABORT, + S390_BUILTIN_NON_TX_STORE, + S390_BUILTIN_TX_NESTING_DEPTH, + S390_BUILTIN_TX_ASSIST, + + S390_BUILTIN_max +}; + +static enum insn_code const code_for_builtin[S390_BUILTIN_max] = { + CODE_FOR_tbegin, + CODE_FOR_tbegin_nofloat, + CODE_FOR_tbegin_retry, + CODE_FOR_tbegin_retry_nofloat, + CODE_FOR_tbeginc, + CODE_FOR_tend, + CODE_FOR_tabort, + CODE_FOR_ntstg, + CODE_FOR_etnd, + CODE_FOR_tx_assist +}; + +static void +s390_init_builtins (void) +{ + tree ftype, uint64_type; + + /* void foo (void) */ + ftype = build_function_type_list (void_type_node, NULL_TREE); + add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC, + BUILT_IN_MD, NULL, NULL_TREE); + + /* void foo (int) */ + ftype = build_function_type_list (void_type_node, integer_type_node, + NULL_TREE); + add_builtin_function ("__builtin_tabort", ftype, + S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_tx_assist", ftype, + S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE); + + /* int foo (void *) */ + ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE); + add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_tbegin_nofloat", ftype, + S390_BUILTIN_TBEGIN_NOFLOAT, + BUILT_IN_MD, NULL, NULL_TREE); + + /* int foo (void *, int) */ + ftype = build_function_type_list (integer_type_node, ptr_type_node, + integer_type_node, NULL_TREE); + add_builtin_function ("__builtin_tbegin_retry", ftype, + S390_BUILTIN_TBEGIN_RETRY, + BUILT_IN_MD, + NULL, NULL_TREE); + add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype, + S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, + BUILT_IN_MD, + NULL, NULL_TREE); + + /* int foo (void) */ + ftype = build_function_type_list (integer_type_node, NULL_TREE); + add_builtin_function ("__builtin_tx_nesting_depth", ftype, + S390_BUILTIN_TX_NESTING_DEPTH, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_tend", ftype, + S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE); + + /* void foo (uint64_t *, uint64_t) */ + if (TARGET_64BIT) + uint64_type = long_unsigned_type_node; + else + uint64_type = long_long_unsigned_type_node; + + ftype = build_function_type_list (void_type_node, + build_pointer_type (uint64_type), + uint64_type, NULL_TREE); + add_builtin_function ("__builtin_non_tx_store", ftype, + S390_BUILTIN_NON_TX_STORE, + BUILT_IN_MD, NULL, NULL_TREE); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ +#define MAX_ARGS 2 + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + tree arg; + call_expr_arg_iterator iter; + + if (fcode >= S390_BUILTIN_max) + internal_error ("bad builtin fcode"); + icode = code_for_builtin[fcode]; + if (icode == 0) + internal_error ("bad builtin fcode"); + + if (!TARGET_ZEC12) + error ("Transactional execution builtins require zEC12 or later\n"); + + if (!TARGET_HTM && TARGET_ZEC12) + error ("Transactional execution builtins not enabled (-mtx)\n"); + + /* Set a flag in the machine specific cfun part in order to support + saving/restoring of FPRs. */ + if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY) + cfun->machine->tbegin_p = true; + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + arity = 0; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + const struct insn_operand_data *insn_op; + + if (arg == error_mark_node) + return NULL_RTX; + if (arity >= MAX_ARGS) + return NULL_RTX; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + + if (!(*insn_op->predicate) (op[arity], insn_op->mode)) + { + if (insn_op->predicate == memory_operand) + { + /* Don't move a NULL pointer into a register. Otherwise + we have to rely on combine being able to move it back + in order to get an immediate 0 in the instruction. */ + if (op[arity] != const0_rtx) + op[arity] = copy_to_mode_reg (Pmode, op[arity]); + op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); + } + else + op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); + } + + arity++; + } + + if (nonvoid) + { + enum machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0], op[1]); + else + pat = GEN_FCN (icode) (op[0], op[1]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + + /* Output assembly code for the trampoline template to stdio stream FILE. @@ -11008,6 +11497,11 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY s390_return_in_memory +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS s390_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN s390_expand_builtin + #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index b0e530f0ed4..d53fed7a6f2 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -34,7 +34,8 @@ enum processor_flags PF_DFP = 16, PF_Z10 = 32, PF_Z196 = 64, - PF_ZEC12 = 128 + PF_ZEC12 = 128, + PF_TX = 256 }; /* This is necessary to avoid a warning about comparing different enum @@ -61,6 +62,8 @@ enum processor_flags (s390_arch_flags & PF_Z196) #define TARGET_CPU_ZEC12 \ (s390_arch_flags & PF_ZEC12) +#define TARGET_CPU_HTM \ + (s390_arch_flags & PF_TX) /* These flags indicate that the generated code should run on a cpu providing the respective hardware facility when run in @@ -78,6 +81,8 @@ enum processor_flags (TARGET_ZARCH && TARGET_CPU_Z196) #define TARGET_ZEC12 \ (TARGET_ZARCH && TARGET_CPU_ZEC12) +#define TARGET_HTM \ + (TARGET_ZARCH && TARGET_CPU_HTM && TARGET_OPT_HTM) #define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196) @@ -93,23 +98,25 @@ enum processor_flags #define TARGET_TPF 0 /* Target CPU builtins. */ -#define TARGET_CPU_CPP_BUILTINS() \ - do \ - { \ - builtin_assert ("cpu=s390"); \ - builtin_assert ("machine=s390"); \ - builtin_define ("__s390__"); \ - if (TARGET_ZARCH) \ - builtin_define ("__zarch__"); \ - if (TARGET_64BIT) \ - builtin_define ("__s390x__"); \ - if (TARGET_LONG_DOUBLE_128) \ - builtin_define ("__LONG_DOUBLE_128__"); \ - } \ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("cpu=s390"); \ + builtin_assert ("machine=s390"); \ + builtin_define ("__s390__"); \ + if (TARGET_ZARCH) \ + builtin_define ("__zarch__"); \ + if (TARGET_64BIT) \ + builtin_define ("__s390x__"); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + if (TARGET_HTM) \ + builtin_define ("__HTM__"); \ + } \ while (0) #ifdef DEFAULT_TARGET_64BIT -#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP) +#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM) #else #define TARGET_DEFAULT 0 #endif diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 0141b9813ef..e12d1538a50 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -59,11 +59,17 @@ (define_c_enum "unspec" [ ; Miscellaneous UNSPEC_ROUND - UNSPEC_CCU_TO_INT - UNSPEC_CCZ_TO_INT UNSPEC_ICM UNSPEC_TIE + ; Convert CC into a str comparison result and copy it into an + ; integer register + ; cc0->0, cc1->1, cc2->-1, (cc3->-1) + UNSPEC_STRCMPCC_TO_INT + + ; Copy CC as is into the lower 2 bits of an integer register + UNSPEC_CC_TO_INT + ; GOT/PLT and lt-relative accesses UNSPEC_LTREL_OFFSET UNSPEC_LTREL_BASE @@ -138,6 +144,15 @@ ; Atomic Support UNSPECV_CAS UNSPECV_ATOMIC_OP + + ; Transactional Execution support + UNSPECV_TBEGIN + UNSPECV_TBEGINC + UNSPECV_TEND + UNSPECV_TABORT + UNSPECV_ETND + UNSPECV_NTSTG + UNSPECV_PPA ]) ;; @@ -191,6 +206,9 @@ (PFPO_OP1_TYPE_SHIFT 8) ]) +; Immediate operands for tbegin and tbeginc +(define_constants [(TBEGIN_MASK 65292)]) ; 0xff0c +(define_constants [(TBEGINC_MASK 65288)]) ; 0xff08 ;; Instruction operand type as used in the Principles of Operation. ;; Used to determine defaults for length and other attribute values. @@ -2251,7 +2269,7 @@ (define_insn "movcc" [(set (match_operand:CC 0 "nonimmediate_operand" "=d,c,d,d,d,R,T") - (match_operand:CC 1 "nonimmediate_operand" "d,d,c,R,T,d,d"))] + (match_operand:CC 1 "nonimmediate_operand" " d,d,c,R,T,d,d"))] "" "@ lr\t%0,%1 @@ -2583,7 +2601,7 @@ (use (reg:SI 0))]) (parallel [(set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_CCU_TO_INT)) + (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_STRCMPCC_TO_INT)) (clobber (reg:CC CC_REGNUM))])] "" { @@ -2825,7 +2843,7 @@ (match_dup 2)] UNSPEC_TDC_INSN)) (set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))] + (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))] "TARGET_HARD_FLOAT" { operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET); @@ -2837,12 +2855,21 @@ (match_dup 2)] UNSPEC_TDC_INSN)) (set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))] + (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))] "TARGET_HARD_FLOAT" { operands[2] = GEN_INT (S390_TDC_INFINITY); }) +(define_insn_and_split "*cc_to_int" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand 1 "register_operand" "0")] + UNSPEC_CC_TO_INT))] + "operands != NULL" + "#" + "reload_completed" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))]) + ; This insn is used to generate all variants of the Test Data Class ; instruction, namely tcxb, tcdb, and tceb. The insn's first operand ; is the register to be tested and the second one is the bit mask @@ -2858,14 +2885,6 @@ [(set_attr "op_type" "RXE") (set_attr "type" "fsimp")]) -(define_insn_and_split "*ccz_to_int" - [(set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(match_operand:CCZ 1 "register_operand" "0")] - UNSPEC_CCZ_TO_INT))] - "" - "#" - "reload_completed" - [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))]) ; @@ -3210,7 +3229,7 @@ (define_insn_and_split "cmpint" [(set (match_operand:SI 0 "register_operand" "=d") (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT)) + UNSPEC_STRCMPCC_TO_INT)) (clobber (reg:CC CC_REGNUM))] "" "#" @@ -3223,10 +3242,10 @@ (define_insn_and_split "*cmpint_cc" [(set (reg CC_REGNUM) (compare (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT) + UNSPEC_STRCMPCC_TO_INT) (const_int 0))) (set (match_operand:SI 0 "register_operand" "=d") - (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT))] + (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT))] "s390_match_ccmode (insn, CCSmode)" "#" "&& reload_completed" @@ -3243,7 +3262,7 @@ (define_insn_and_split "*cmpint_sign" [(set (match_operand:DI 0 "register_operand" "=d") (sign_extend:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT))) + UNSPEC_STRCMPCC_TO_INT))) (clobber (reg:CC CC_REGNUM))] "TARGET_ZARCH" "#" @@ -3257,11 +3276,11 @@ [(set (reg CC_REGNUM) (compare (ashiftrt:DI (ashift:DI (subreg:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")] - UNSPEC_CCU_TO_INT) 0) + UNSPEC_STRCMPCC_TO_INT) 0) (const_int 32)) (const_int 32)) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=d") - (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT)))] + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT)))] "s390_match_ccmode (insn, CCSmode) && TARGET_ZARCH" "#" "&& reload_completed" @@ -5512,7 +5531,7 @@ (if_then_else:GPR (match_operator 1 "s390_comparison" [(match_operand 2 "cc_reg_operand" " c,c, c, c, c, c, c") - (const_int 0)]) + (match_operand 5 "const_int_operand" "")]) (match_operand:GPR 3 "nonimmediate_operand" " d,0,QS, 0, d, 0,QS") (match_operand:GPR 4 "nonimmediate_operand" " 0,d, 0,QS, 0, d,QS")))] "TARGET_Z196" @@ -7912,7 +7931,8 @@ (define_insn "*cjump_64" [(set (pc) (if_then_else - (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) + (match_operator 1 "s390_comparison" [(reg CC_REGNUM) + (match_operand 2 "const_int_operand" "")]) (label_ref (match_operand 0 "" "")) (pc)))] "TARGET_CPU_ZARCH" @@ -7931,7 +7951,8 @@ (define_insn "*cjump_31" [(set (pc) (if_then_else - (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)]) + (match_operator 1 "s390_comparison" [(reg CC_REGNUM) + (match_operand 2 "const_int_operand" "")]) (label_ref (match_operand 0 "" "")) (pc)))] "!TARGET_CPU_ZARCH" @@ -9800,3 +9821,217 @@ "cpsdr\t%0,%2,%1" [(set_attr "op_type" "RRF") (set_attr "type" "fsimp")]) + + +;; +;;- Transactional execution instructions +;; + +; This splitter helps combine to make use of CC directly when +; comparing the integer result of a tbegin builtin with a constant. +; The unspec is already removed by canonicalize_comparison. So this +; splitters only job is to turn the PARALLEL into separate insns +; again. Unfortunately this only works with the very first cc/int +; compare since combine is not able to deal with data flow across +; basic block boundaries. + +; It needs to be an insn pattern as well since combine does not apply +; the splitter directly. Combine would only use it if it actually +; would reduce the number of instructions. +(define_insn_and_split "*ccraw_to_int" + [(set (pc) + (if_then_else + (match_operator 0 "s390_eqne_operator" + [(reg:CCRAW CC_REGNUM) + (match_operand 1 "const_int_operand" "")]) + (label_ref (match_operand 2 "" "")) + (pc))) + (set (match_operand:SI 3 "register_operand" "=d") + (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] + "" + "#" + "" + [(set (match_dup 3) + (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT)) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCRAW CC_REGNUM) (match_dup 1)]) + (label_ref (match_dup 2)) + (pc)))] + "") + +; Non-constrained transaction begin + +(define_expand "tbegin" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], NULL_RTX, true); + DONE; +}) + +(define_expand "tbegin_nofloat" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], NULL_RTX, false); + DONE; +}) + +(define_expand "tbegin_retry" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q") + (match_operand 2 "const_int_operand")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], operands[2], true); + DONE; +}) + +(define_expand "tbegin_retry_nofloat" + [(match_operand:SI 0 "register_operand" "=d") + (match_operand:BLK 1 "memory_operand" "=Q") + (match_operand 2 "const_int_operand")] + "TARGET_HTM" +{ + s390_expand_tbegin (operands[0], operands[1], operands[2], false); + DONE; +}) + +(define_insn "tbegin_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand" "=Q") + (match_operand 1 "const_int_operand" " D")] + UNSPECV_TBEGIN)) + (clobber (reg:DF 16)) + (clobber (reg:DF 17)) + (clobber (reg:DF 18)) + (clobber (reg:DF 19)) + (clobber (reg:DF 20)) + (clobber (reg:DF 21)) + (clobber (reg:DF 22)) + (clobber (reg:DF 23)) + (clobber (reg:DF 24)) + (clobber (reg:DF 25)) + (clobber (reg:DF 26)) + (clobber (reg:DF 27)) + (clobber (reg:DF 28)) + (clobber (reg:DF 29)) + (clobber (reg:DF 30)) + (clobber (reg:DF 31))] +; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is +; not supposed to be used for immediates (see genpreds.c). + "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff" + "tbegin\t%0,%x1" + [(set_attr "op_type" "SIL")]) + +; Same as above but without the FPR clobbers +(define_insn "tbegin_nofloat_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand" "=Q") + (match_operand 1 "const_int_operand" " D")] + UNSPECV_TBEGIN))] + "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff" + "tbegin\t%0,%x1" + [(set_attr "op_type" "SIL")]) + + +; Constrained transaction begin + +(define_expand "tbeginc" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(const_int TBEGINC_MASK)] + UNSPECV_TBEGINC))] + "TARGET_HTM" + "") + +(define_insn "*tbeginc_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" " D")] + UNSPECV_TBEGINC))] + "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff" + "tbeginc\t0,%x0" + [(set_attr "op_type" "SIL")]) + +; Transaction end + +(define_expand "tend" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND)) + (set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] + "TARGET_HTM" + "") + +(define_insn "*tend_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))] + "TARGET_HTM" + "tend" + [(set_attr "op_type" "S")]) + +; Transaction abort + +(define_expand "tabort" + [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")] + UNSPECV_TABORT)] + "TARGET_HTM && operands != NULL" +{ + if (CONST_INT_P (operands[0]) + && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 255) + { + error ("Invalid transaction abort code: " HOST_WIDE_INT_PRINT_DEC + ". Values in range 0 through 255 are reserved.", + INTVAL (operands[0])); + FAIL; + } +}) + +(define_insn "*tabort_1" + [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")] + UNSPECV_TABORT)] + "TARGET_HTM && operands != NULL" + "tabort\t%Y0" + [(set_attr "op_type" "S")]) + +; Transaction extract nesting depth + +(define_insn "etnd" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec_volatile:SI [(const_int 0)] UNSPECV_ETND))] + "TARGET_HTM" + "etnd\t%0" + [(set_attr "op_type" "RRE")]) + +; Non-transactional store + +(define_insn "ntstg" + [(set (match_operand:DI 0 "memory_operand" "=RT") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "d")] + UNSPECV_NTSTG))] + "TARGET_HTM" + "ntstg\t%1,%0" + [(set_attr "op_type" "RXY")]) + +; Transaction perform processor assist + +(define_expand "tx_assist" + [(set (match_dup 1) (const_int 0)) + (unspec_volatile [(match_operand:SI 0 "register_operand" "d") + (match_dup 1) + (const_int 1)] + UNSPECV_PPA)] + "TARGET_HTM" +{ + operands[1] = gen_reg_rtx (SImode); +}) + +(define_insn "*ppa" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "d") + (match_operand:SI 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "I")] + UNSPECV_PPA)] + "TARGET_HTM && INTVAL (operands[2]) < 16" + "ppa\t%0,%1,1" + [(set_attr "op_type" "RRF")]) diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index ba38e6e14ed..7dedb836701 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -104,6 +104,10 @@ mlong-double-64 Target Report RejectNegative Negative(mlong-double-128) InverseMask(LONG_DOUBLE_128) Use 64-bit long double +mhtm +Target Report Mask(OPT_HTM) +Use hardware transactional execution instructions + mpacked-stack Target Report Mask(PACKED_STACK) Use packed stack layout diff --git a/gcc/config/s390/s390intrin.h b/gcc/config/s390/s390intrin.h new file mode 100644 index 00000000000..e1a00ce58e3 --- /dev/null +++ b/gcc/config/s390/s390intrin.h @@ -0,0 +1,33 @@ +/* S/390 System z specific intrinsics + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef _S390INTRIN_H +#define _S390INTRIN_H + +#ifndef __s390__ + #error s390intrin.h included on wrong platform/compiler +#endif + +#ifdef __HTM__ +#include +#endif + + +#endif /* _S390INTRIN_H*/ -- cgit v1.2.1