summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog66
-rw-r--r--gcc/config/arm/aout.h2
-rw-r--r--gcc/config/arm/arm-builtins.c80
-rw-r--r--gcc/config/arm/arm-fixed.md81
-rw-r--r--gcc/config/arm/arm-protos.h3
-rw-r--r--gcc/config/arm/arm.c17
-rw-r--r--gcc/config/arm/arm.h14
-rw-r--r--gcc/config/arm/arm.md152
-rw-r--r--gcc/config/arm/arm_acle.h44
-rw-r--r--gcc/config/arm/arm_acle_builtins.def5
-rw-r--r--gcc/config/arm/iterators.md6
-rw-r--r--gcc/config/arm/unspecs.md4
-rw-r--r--gcc/doc/sourcebuild.texi6
-rw-r--r--gcc/testsuite/ChangeLog9
-rw-r--r--gcc/testsuite/gcc.target/arm/acle/sat_no_smlatb.c19
-rw-r--r--gcc/testsuite/gcc.target/arm/acle/saturation.c40
-rw-r--r--gcc/testsuite/lib/target-supports.exp38
17 files changed, 557 insertions, 29 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c5818165a4c..51a301e2770 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,69 @@
+2019-11-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/arm/aout.h (REGISTER_NAMES): Add apsrq.
+ * config/arm/arm.md (APSRQ_REGNUM): Define.
+ (add_setq): New define_subst.
+ (add_clobber_q_name): New define_subst_attr.
+ (add_clobber_q_pred): Likewise.
+ (maddhisi4): Change to define_expand. Split into mult and add if
+ ARM_Q_BIT_READ.
+ (arm_maddhisi4): New define_insn.
+ (*maddhisi4tb): Disable for ARM_Q_BIT_READ.
+ (*maddhisi4tt): Likewise.
+ (arm_ssat): New define_expand.
+ (arm_usat): Likewise.
+ (arm_get_apsr): New define_insn.
+ (arm_set_apsr): Likewise.
+ (arm_saturation_occurred): New define_expand.
+ (arm_set_saturation): Likewise.
+ (*satsi_<SAT:code>): Rename to...
+ (satsi_<SAT:code><add_clobber_q_name>): ... This.
+ (*satsi_<SAT:code>_shift): Disable for ARM_Q_BIT_READ.
+ * config/arm/arm.h (FIXED_REGISTERS): Mark apsrq as fixed.
+ (CALL_USED_REGISTERS): Mark apsrq.
+ (FIRST_PSEUDO_REGISTER): Update value.
+ (REG_ALLOC_ORDER): Add APSRQ_REGNUM.
+ (machine_function): Add q_bit_access.
+ (ARM_Q_BIT_READ): Define.
+ * config/arm/arm.c (TARGET_CHECK_BUILTIN_CALL): Define.
+ (arm_conditional_register_usage): Clear APSRQ_REGNUM from
+ operand_reg_set.
+ (arm_q_bit_access): Define.
+ * config/arm/arm-builtins.c: Include stringpool.h.
+ (arm_sat_binop_imm_qualifiers,
+ arm_unsigned_sat_binop_unsigned_imm_qualifiers,
+ arm_sat_occurred_qualifiers, arm_set_sat_qualifiers): Define.
+ (SAT_BINOP_UNSIGNED_IMM_QUALIFIERS,
+ UNSIGNED_SAT_BINOP_UNSIGNED_IMM_QUALIFIERS, SAT_OCCURRED_QUALIFIERS,
+ SET_SAT_QUALIFIERS): Likewise.
+ (arm_builtins): Define ARM_BUILTIN_SAT_IMM_CHECK.
+ (arm_init_acle_builtins): Initialize __builtin_sat_imm_check.
+ Handle 0 argument expander.
+ (arm_expand_acle_builtin): Handle ARM_BUILTIN_SAT_IMM_CHECK.
+ (arm_check_builtin_call): Define.
+ * config/arm/arm.md (ssmulsa3, usmulusa3, usmuluha3,
+ arm_ssatsihi_shift, arm_usatsihi): Disable when ARM_Q_BIT_READ.
+ * config/arm/arm-protos.h (arm_check_builtin_call): Declare prototype.
+ (arm_q_bit_access): Likewise.
+ * config/arm/arm_acle.h (__ssat, __usat, __ignore_saturation,
+ __saturation_occurred, __set_saturation_occurred): Define.
+ * config/arm/arm_acle_builtins.def: Define builtins for ssat, usat,
+ saturation_occurred, set_saturation_occurred.
+ * config/arm/unspecs.md (UNSPEC_Q_SET): Define.
+ (UNSPEC_APSR_READ): Likewise.
+ (VUNSPEC_APSR_WRITE): Likewise.
+ * config/arm/arm-fixed.md (ssadd<mode>3): Convert to define_expand.
+ (*arm_ssadd<mode>3): New define_insn.
+ (sssub<mode>3): Convert to define_expand.
+ (*arm_sssub<mode>3): New define_insn.
+ (ssmulsa3): Convert to define_expand.
+ (*arm_ssmulsa3): New define_insn.
+ (usmulusa3): Convert to define_expand.
+ (*arm_usmulusa3): New define_insn.
+ (ssmulha3): FAIL if ARM_Q_BIT_READ.
+ (arm_ssatsihi_shift, arm_usatsihi): Disable for ARM_Q_BIT_READ.
+ * config/arm/iterators.md (qaddsub_clob_q): New mode attribute.
+
2019-11-07 Martin Liska <mliska@suse.cz>
PR c++/92354
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 91830a6cbde..a5f83cb503f 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -72,7 +72,7 @@
"wr8", "wr9", "wr10", "wr11", \
"wr12", "wr13", "wr14", "wr15", \
"wcgr0", "wcgr1", "wcgr2", "wcgr3", \
- "cc", "vfpcc", "sfp", "afp" \
+ "cc", "vfpcc", "sfp", "afp", "apsrq" \
}
#endif
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index c5cdb7b5d33..995f50785f6 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -41,6 +41,7 @@
#include "langhooks.h"
#include "case-cfn-macros.h"
#include "sbitmap.h"
+#include "stringpool.h"
#define SIMD_MAX_BUILTIN_ARGS 7
@@ -127,6 +128,20 @@ arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_immediate };
#define BINOP_IMM_QUALIFIERS (arm_binop_imm_qualifiers)
+/* T (T, unsigned immediate). */
+static enum arm_type_qualifiers
+arm_sat_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_none, qualifier_unsigned_immediate };
+#define SAT_BINOP_UNSIGNED_IMM_QUALIFIERS \
+ (arm_sat_binop_imm_qualifiers)
+
+/* unsigned T (T, unsigned immediate). */
+static enum arm_type_qualifiers
+arm_unsigned_sat_binop_unsigned_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_none, qualifier_unsigned_immediate };
+#define UNSIGNED_SAT_BINOP_UNSIGNED_IMM_QUALIFIERS \
+ (arm_unsigned_sat_binop_unsigned_imm_qualifiers)
+
/* T (T, lane index). */
static enum arm_type_qualifiers
arm_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -285,6 +300,18 @@ arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
qualifier_none, qualifier_struct_load_store_lane_index };
#define STORE1LANE_QUALIFIERS (arm_storestruct_lane_qualifiers)
+ /* int (void). */
+static enum arm_type_qualifiers
+arm_sat_occurred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_void };
+#define SAT_OCCURRED_QUALIFIERS (arm_sat_occurred_qualifiers)
+
+ /* void (int). */
+static enum arm_type_qualifiers
+arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_void, qualifier_none };
+#define SET_SAT_QUALIFIERS (arm_set_sat_qualifiers)
+
#define v8qi_UP E_V8QImode
#define v4hi_UP E_V4HImode
#define v4hf_UP E_V4HFmode
@@ -674,6 +701,7 @@ enum arm_builtins
ARM_BUILTIN_##N,
ARM_BUILTIN_ACLE_BASE,
+ ARM_BUILTIN_SAT_IMM_CHECK = ARM_BUILTIN_ACLE_BASE,
#include "arm_acle_builtins.def"
@@ -1169,6 +1197,16 @@ arm_init_acle_builtins (void)
{
unsigned int i, fcode = ARM_BUILTIN_ACLE_PATTERN_START;
+ tree sat_check_fpr = build_function_type_list (void_type_node,
+ intSI_type_node,
+ intSI_type_node,
+ intSI_type_node,
+ NULL);
+ arm_builtin_decls[ARM_BUILTIN_SAT_IMM_CHECK]
+ = add_builtin_function ("__builtin_sat_imm_check", sat_check_fpr,
+ ARM_BUILTIN_SAT_IMM_CHECK, BUILT_IN_MD,
+ NULL, NULL_TREE);
+
for (i = 0; i < ARRAY_SIZE (acle_builtin_data); i++, fcode++)
{
arm_builtin_datum *d = &acle_builtin_data[i];
@@ -2307,6 +2345,9 @@ constant_arg:
if (have_retval)
switch (argc)
{
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
case 1:
pat = GEN_FCN (icode) (target, op[0]);
break;
@@ -2465,7 +2506,26 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target,
static rtx
arm_expand_acle_builtin (int fcode, tree exp, rtx target)
{
-
+ if (fcode == ARM_BUILTIN_SAT_IMM_CHECK)
+ {
+ /* Check the saturation immediate bounds. */
+
+ rtx min_sat = expand_normal (CALL_EXPR_ARG (exp, 1));
+ rtx max_sat = expand_normal (CALL_EXPR_ARG (exp, 2));
+ gcc_assert (CONST_INT_P (min_sat));
+ gcc_assert (CONST_INT_P (max_sat));
+ rtx sat_imm = expand_normal (CALL_EXPR_ARG (exp, 0));
+ if (CONST_INT_P (sat_imm))
+ {
+ if (!IN_RANGE (sat_imm, min_sat, max_sat))
+ error ("%Ksaturation bit range must be in the range [%wd, %wd]",
+ exp, UINTVAL (min_sat), UINTVAL (max_sat));
+ }
+ else
+ error ("%Ksaturation bit range must be a constant immediate", exp);
+ /* Don't generate any RTL. */
+ return const0_rtx;
+ }
arm_builtin_datum *d
= &acle_builtin_data[fcode - ARM_BUILTIN_ACLE_PATTERN_START];
@@ -3295,4 +3355,22 @@ arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
reload_fenv, restore_fnenv), update_call);
}
+/* Implement TARGET_CHECK_BUILTIN_CALL. Record a read of the Q bit through
+ intrinsics in the machine function. */
+bool
+arm_check_builtin_call (location_t , vec<location_t> , tree fndecl,
+ tree, unsigned int, tree *)
+{
+ int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+ if (fcode == ARM_BUILTIN_saturation_occurred
+ || fcode == ARM_BUILTIN_set_saturation)
+ {
+ if (cfun && cfun->decl)
+ DECL_ATTRIBUTES (cfun->decl)
+ = tree_cons (get_identifier ("acle qbit"), NULL_TREE,
+ DECL_ATTRIBUTES (cfun->decl));
+ }
+ return true;
+}
+
#include "gt-arm-builtins.h"
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index fcab40d13f6..85dbc5d05c3 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -46,11 +46,22 @@
[(set_attr "predicable" "yes")
(set_attr "type" "alu_dsp_reg")])
-(define_insn "ssadd<mode>3"
+(define_expand "ssadd<mode>3"
+ [(set (match_operand:QADDSUB 0 "s_register_operand")
+ (ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand")
+ (match_operand:QADDSUB 2 "s_register_operand")))]
+ "TARGET_INT_SIMD"
+ {
+ if (<qaddsub_clob_q>)
+ FAIL;
+ }
+)
+
+(define_insn "*arm_ssadd<mode>3"
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
(ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
- "TARGET_INT_SIMD"
+ "TARGET_INT_SIMD && !<qaddsub_clob_q>"
"qadd<qaddsub_suf>%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
(set_attr "type" "alu_dsp_reg")])
@@ -84,11 +95,22 @@
[(set_attr "predicable" "yes")
(set_attr "type" "alu_dsp_reg")])
-(define_insn "sssub<mode>3"
+(define_expand "sssub<mode>3"
+ [(set (match_operand:QADDSUB 0 "s_register_operand")
+ (ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand")
+ (match_operand:QADDSUB 2 "s_register_operand")))]
+ "TARGET_INT_SIMD"
+ {
+ if (<qaddsub_clob_q>)
+ FAIL;
+ }
+)
+
+(define_insn "*arm_sssub<mode>3"
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
(ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
- "TARGET_INT_SIMD"
+ "TARGET_INT_SIMD && !<qaddsub_clob_q>"
"qsub<qaddsub_suf>%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
(set_attr "type" "alu_dsp_reg")])
@@ -193,19 +215,31 @@
DONE;
})
-;; The code sequence emitted by this insn pattern uses the Q flag, which GCC
-;; doesn't generally know about, so we don't bother expanding to individual
-;; instructions. It may be better to just use an out-of-line asm libcall for
-;; this.
+;; The code sequence emitted by this insn pattern uses the Q flag, so we need
+;; to bail out when ARM_Q_BIT_READ and resort to a library sequence instead.
+
+(define_expand "ssmulsa3"
+ [(parallel [(set (match_operand:SA 0 "s_register_operand")
+ (ss_mult:SA (match_operand:SA 1 "s_register_operand")
+ (match_operand:SA 2 "s_register_operand")))
+ (clobber (match_scratch:DI 3))
+ (clobber (match_scratch:SI 4))
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_32BIT && arm_arch6"
+ {
+ if (ARM_Q_BIT_READ)
+ FAIL;
+ }
+)
-(define_insn "ssmulsa3"
+(define_insn "*arm_ssmulsa3"
[(set (match_operand:SA 0 "s_register_operand" "=r")
(ss_mult:SA (match_operand:SA 1 "s_register_operand" "r")
(match_operand:SA 2 "s_register_operand" "r")))
(clobber (match_scratch:DI 3 "=r"))
(clobber (match_scratch:SI 4 "=r"))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT && arm_arch6"
+ "TARGET_32BIT && arm_arch6 && !ARM_Q_BIT_READ"
{
/* s16.15 * s16.15 -> s32.30. */
output_asm_insn ("smull\\t%Q3, %R3, %1, %2", operands);
@@ -256,16 +290,28 @@
(const_int 38))
(const_int 32)))])
-;; Same goes for this.
+(define_expand "usmulusa3"
+ [(parallel [(set (match_operand:USA 0 "s_register_operand")
+ (us_mult:USA (match_operand:USA 1 "s_register_operand")
+ (match_operand:USA 2 "s_register_operand")))
+ (clobber (match_scratch:DI 3))
+ (clobber (match_scratch:SI 4))
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_32BIT && arm_arch6"
+ {
+ if (ARM_Q_BIT_READ)
+ FAIL;
+ }
+)
-(define_insn "usmulusa3"
+(define_insn "*arm_usmulusa3"
[(set (match_operand:USA 0 "s_register_operand" "=r")
(us_mult:USA (match_operand:USA 1 "s_register_operand" "r")
(match_operand:USA 2 "s_register_operand" "r")))
(clobber (match_scratch:DI 3 "=r"))
(clobber (match_scratch:SI 4 "=r"))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT && arm_arch6"
+ "TARGET_32BIT && arm_arch6 && !ARM_Q_BIT_READ"
{
/* 16.16 * 16.16 -> 32.32. */
output_asm_insn ("umull\\t%Q3, %R3, %1, %2", operands);
@@ -358,6 +404,8 @@
(match_operand:HA 2 "s_register_operand")))]
"TARGET_32BIT && TARGET_DSP_MULTIPLY && arm_arch6"
{
+ if (ARM_Q_BIT_READ)
+ FAIL;
rtx tmp = gen_reg_rtx (SImode);
rtx rshift;
@@ -378,6 +426,9 @@
(match_operand:UHA 2 "s_register_operand")))]
"TARGET_INT_SIMD"
{
+ if (ARM_Q_BIT_READ)
+ FAIL;
+
rtx tmp1 = gen_reg_rtx (SImode);
rtx tmp2 = gen_reg_rtx (SImode);
rtx tmp3 = gen_reg_rtx (SImode);
@@ -405,7 +456,7 @@
(ss_truncate:HI (match_operator:SI 1 "sat_shift_operator"
[(match_operand:SI 2 "s_register_operand" "r")
(match_operand:SI 3 "immediate_operand" "I")])))]
- "TARGET_32BIT && arm_arch6"
+ "TARGET_32BIT && arm_arch6 && !ARM_Q_BIT_READ"
"ssat%?\\t%0, #16, %2%S1"
[(set_attr "predicable" "yes")
(set_attr "shift" "1")
@@ -414,7 +465,7 @@
(define_insn "arm_usatsihi"
[(set (match_operand:HI 0 "s_register_operand" "=r")
(us_truncate:HI (match_operand:SI 1 "s_register_operand")))]
- "TARGET_INT_SIMD"
+ "TARGET_INT_SIMD && !ARM_Q_BIT_READ"
"usat%?\\t%0, #16, %1"
[(set_attr "predicable" "yes")
(set_attr "type" "alu_imm")]
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c685bcbf99c..963dc3e92f0 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -28,6 +28,8 @@ extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
extern int use_return_insn (int, rtx);
extern bool use_simple_return_p (void);
extern enum reg_class arm_regno_class (int);
+extern bool arm_check_builtin_call (location_t , vec<location_t> , tree,
+ tree, unsigned int, tree *);
extern void arm_load_pic_register (unsigned long, rtx);
extern int arm_volatile_func (void);
extern void arm_expand_prologue (void);
@@ -58,6 +60,7 @@ extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
bool high);
extern void arm_emit_speculation_barrier_function (void);
extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
+extern bool arm_q_bit_access (void);
#ifdef RTX_CODE
extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index eddd3ca93ed..04f02e664dd 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -385,6 +385,9 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
#endif
+#undef TARGET_CHECK_BUILTIN_CALL
+#define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
+
#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
@@ -29140,6 +29143,10 @@ arm_conditional_register_usage (void)
if (TARGET_CALLER_INTERWORKING)
global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
}
+
+ /* The Q bit is only accessed via special ACLE patterns. */
+ CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
+
SUBTARGET_CONDITIONAL_REGISTER_USAGE
}
@@ -32375,6 +32382,16 @@ arm_emit_speculation_barrier_function ()
emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
}
+/* Have we recorded an explicit access to the Q bit of APSR?. */
+bool
+arm_q_bit_access (void)
+{
+ if (cfun && cfun->decl)
+ return lookup_attribute ("acle qbit",
+ DECL_ATTRIBUTES (cfun->decl));
+ return true;
+}
+
#if CHECKING_P
namespace selftest {
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 5fad1e5bcc2..4cf49ff4ceb 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -723,6 +723,8 @@ extern int arm_arch_cmse;
goto. Without it fp appears to be used and the
elimination code won't get rid of sfp. It tracks
fp exactly at all times.
+ apsrq Nor this, it is used to track operations on the Q bit
+ of APSR by ACLE saturating intrinsics.
*: See TARGET_CONDITIONAL_REGISTER_USAGE */
@@ -770,7 +772,7 @@ extern int arm_arch_cmse;
1,1,1,1,1,1,1,1, \
1,1,1,1, \
/* Specials. */ \
- 1,1,1,1 \
+ 1,1,1,1,1 \
}
/* 1 for registers not available across function calls.
@@ -800,7 +802,7 @@ extern int arm_arch_cmse;
1,1,1,1,1,1,1,1, \
1,1,1,1, \
/* Specials. */ \
- 1,1,1,1 \
+ 1,1,1,1,1 \
}
#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
@@ -975,10 +977,10 @@ extern int arm_arch_cmse;
((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \
&& (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1))
-/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP. */
+/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP + 1 APSRQ. */
/* Intel Wireless MMX Technology registers add 16 + 4 more. */
/* VFP (VFP3) adds 32 (64) + 1 VFPCC. */
-#define FIRST_PSEUDO_REGISTER 104
+#define FIRST_PSEUDO_REGISTER 105
#define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO)
@@ -1062,7 +1064,7 @@ extern int arm_regs_in_sequence[];
/* Registers not for general use. */ \
CC_REGNUM, VFPCC_REGNUM, \
FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, \
- SP_REGNUM, PC_REGNUM \
+ SP_REGNUM, PC_REGNUM, APSRQ_REGNUM \
}
/* Use different register alloc ordering for Thumb. */
@@ -1402,6 +1404,8 @@ typedef struct GTY(()) machine_function
machine_function;
#endif
+#define ARM_Q_BIT_READ (arm_q_bit_access ())
+
/* As in the machine_function, a global set of call-via labels, for code
that is in text_section. */
extern GTY(()) rtx thumb_call_via_label[14];
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 4f035cbfddd..992d7b60bbc 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -39,6 +39,7 @@
(LAST_ARM_REGNUM 15) ;
(CC_REGNUM 100) ; Condition code pseudo register
(VFPCC_REGNUM 101) ; VFP Condition code pseudo register
+ (APSRQ_REGNUM 104) ; Q bit pseudo register
]
)
;; 3rd operand to select_dominance_cc_mode
@@ -423,6 +424,20 @@
(include "marvell-pj4.md")
(include "xgene1.md")
+;; define_subst and associated attributes
+
+(define_subst "add_setq"
+ [(set (match_operand:SI 0 "" "")
+ (match_operand:SI 1 "" ""))]
+ ""
+ [(set (match_dup 0)
+ (match_dup 1))
+ (set (reg:CC APSRQ_REGNUM)
+ (unspec:CC [(reg:CC APSRQ_REGNUM)] UNSPEC_Q_SET))])
+
+(define_subst_attr "add_clobber_q_name" "add_setq" "" "_setq")
+(define_subst_attr "add_clobber_q_pred" "add_setq" "!ARM_Q_BIT_READ"
+ "ARM_Q_BIT_READ")
;;---------------------------------------------------------------------------
;; Insn patterns
@@ -2515,14 +2530,36 @@
(set_attr "predicable" "yes")]
)
-(define_insn "maddhisi4"
+(define_expand "maddhisi4"
+ [(set (match_operand:SI 0 "s_register_operand")
+ (plus:SI (mult:SI (sign_extend:SI
+ (match_operand:HI 1 "s_register_operand"))
+ (sign_extend:SI
+ (match_operand:HI 2 "s_register_operand")))
+ (match_operand:SI 3 "s_register_operand")))]
+ "TARGET_DSP_MULTIPLY"
+ {
+ /* If this function reads the Q bit from ACLE intrinsics break up the
+ multiplication and accumulation as an overflow during accumulation will
+ clobber the Q flag. */
+ if (ARM_Q_BIT_READ)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_mulhisi3 (tmp, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], tmp, operands[3]));
+ DONE;
+ }
+ }
+)
+
+(define_insn "*arm_maddhisi4"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(plus:SI (mult:SI (sign_extend:SI
(match_operand:HI 1 "s_register_operand" "r"))
(sign_extend:SI
(match_operand:HI 2 "s_register_operand" "r")))
(match_operand:SI 3 "s_register_operand" "r")))]
- "TARGET_DSP_MULTIPLY"
+ "TARGET_DSP_MULTIPLY && !ARM_Q_BIT_READ"
"smlabb%?\\t%0, %1, %2, %3"
[(set_attr "type" "smlaxy")
(set_attr "predicable" "yes")]
@@ -2537,7 +2574,7 @@
(sign_extend:SI
(match_operand:HI 2 "s_register_operand" "r")))
(match_operand:SI 3 "s_register_operand" "r")))]
- "TARGET_DSP_MULTIPLY"
+ "TARGET_DSP_MULTIPLY && !ARM_Q_BIT_READ"
"smlatb%?\\t%0, %1, %2, %3"
[(set_attr "type" "smlaxy")
(set_attr "predicable" "yes")]
@@ -2552,7 +2589,7 @@
(match_operand:SI 2 "s_register_operand" "r")
(const_int 16)))
(match_operand:SI 3 "s_register_operand" "r")))]
- "TARGET_DSP_MULTIPLY"
+ "TARGET_DSP_MULTIPLY && !ARM_Q_BIT_READ"
"smlatt%?\\t%0, %1, %2, %3"
[(set_attr "type" "smlaxy")
(set_attr "predicable" "yes")]
@@ -4044,12 +4081,113 @@
(define_code_attr SATlo [(smin "1") (smax "2")])
(define_code_attr SAThi [(smin "2") (smax "1")])
-(define_insn "*satsi_<SAT:code>"
+(define_expand "arm_ssat"
+ [(match_operand:SI 0 "s_register_operand")
+ (match_operand:SI 1 "s_register_operand")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_32BIT && arm_arch6"
+ {
+ HOST_WIDE_INT val = INTVAL (operands[2]);
+ /* The builtin checking code should have ensured the right
+ range for the immediate. */
+ gcc_assert (IN_RANGE (val, 1, 32));
+ HOST_WIDE_INT upper_bound = (HOST_WIDE_INT_1 << (val - 1)) - 1;
+ HOST_WIDE_INT lower_bound = -upper_bound - 1;
+ rtx up_rtx = gen_int_mode (upper_bound, SImode);
+ rtx lo_rtx = gen_int_mode (lower_bound, SImode);
+ if (ARM_Q_BIT_READ)
+ emit_insn (gen_satsi_smin_setq (operands[0], lo_rtx,
+ up_rtx, operands[1]));
+ else
+ emit_insn (gen_satsi_smin (operands[0], lo_rtx, up_rtx, operands[1]));
+ DONE;
+ }
+)
+
+(define_expand "arm_usat"
+ [(match_operand:SI 0 "s_register_operand")
+ (match_operand:SI 1 "s_register_operand")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_32BIT && arm_arch6"
+ {
+ HOST_WIDE_INT val = INTVAL (operands[2]);
+ /* The builtin checking code should have ensured the right
+ range for the immediate. */
+ gcc_assert (IN_RANGE (val, 0, 31));
+ HOST_WIDE_INT upper_bound = (HOST_WIDE_INT_1 << val) - 1;
+ rtx up_rtx = gen_int_mode (upper_bound, SImode);
+ rtx lo_rtx = CONST0_RTX (SImode);
+ if (ARM_Q_BIT_READ)
+ emit_insn (gen_satsi_smin_setq (operands[0], lo_rtx, up_rtx,
+ operands[1]));
+ else
+ emit_insn (gen_satsi_smin (operands[0], lo_rtx, up_rtx, operands[1]));
+ DONE;
+ }
+)
+
+(define_insn "arm_get_apsr"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (unspec:SI [(reg:CC APSRQ_REGNUM)] UNSPEC_APSR_READ))]
+ "TARGET_ARM_QBIT"
+ "mrs%?\t%0, APSR"
+ [(set_attr "predicable" "yes")
+ (set_attr "conds" "use")]
+)
+
+(define_insn "arm_set_apsr"
+ [(set (reg:CC APSRQ_REGNUM)
+ (unspec_volatile:CC
+ [(match_operand:SI 0 "s_register_operand" "r")] VUNSPEC_APSR_WRITE))]
+ "TARGET_ARM_QBIT"
+ "msr%?\tAPSR_nzcvq, %0"
+ [(set_attr "predicable" "yes")
+ (set_attr "conds" "set")]
+)
+
+;; Read the APSR and extract the Q bit (bit 27)
+(define_expand "arm_saturation_occurred"
+ [(match_operand:SI 0 "s_register_operand")]
+ "TARGET_ARM_QBIT"
+ {
+ rtx apsr = gen_reg_rtx (SImode);
+ emit_insn (gen_arm_get_apsr (apsr));
+ emit_insn (gen_extzv (operands[0], apsr, CONST1_RTX (SImode),
+ gen_int_mode (27, SImode)));
+ DONE;
+ }
+)
+
+;; Read the APSR and set the Q bit (bit position 27) according to operand 0
+(define_expand "arm_set_saturation"
+ [(match_operand:SI 0 "reg_or_int_operand")]
+ "TARGET_ARM_QBIT"
+ {
+ rtx apsr = gen_reg_rtx (SImode);
+ emit_insn (gen_arm_get_apsr (apsr));
+ rtx to_insert = gen_reg_rtx (SImode);
+ if (CONST_INT_P (operands[0]))
+ emit_move_insn (to_insert, operands[0] == CONST0_RTX (SImode)
+ ? CONST0_RTX (SImode) : CONST1_RTX (SImode));
+ else
+ {
+ rtx cmp = gen_rtx_NE (SImode, operands[0], CONST0_RTX (SImode));
+ emit_insn (gen_cstoresi4 (to_insert, cmp, operands[0],
+ CONST0_RTX (SImode)));
+ }
+ emit_insn (gen_insv (apsr, CONST1_RTX (SImode),
+ gen_int_mode (27, SImode), to_insert));
+ emit_insn (gen_arm_set_apsr (apsr));
+ DONE;
+ }
+)
+
+(define_insn "satsi_<SAT:code><add_clobber_q_name>"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(SAT:SI (<SATrev>:SI (match_operand:SI 3 "s_register_operand" "r")
(match_operand:SI 1 "const_int_operand" "i"))
(match_operand:SI 2 "const_int_operand" "i")))]
- "TARGET_32BIT && arm_arch6
+ "TARGET_32BIT && arm_arch6 && <add_clobber_q_pred>
&& arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>], NULL, NULL)"
{
int mask;
@@ -4075,7 +4213,7 @@
(match_operand:SI 5 "const_int_operand" "i")])
(match_operand:SI 1 "const_int_operand" "i"))
(match_operand:SI 2 "const_int_operand" "i")))]
- "TARGET_32BIT && arm_arch6
+ "TARGET_32BIT && arm_arch6 && !ARM_Q_BIT_READ
&& arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>], NULL, NULL)"
{
int mask;
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index 248a355d002..2564ad84985 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -433,6 +433,50 @@ __smlsldx (int16x2_t __a, int16x2_t __b, int64_t __c)
#endif
+#ifdef __ARM_FEATURE_SAT
+
+#define __ssat(__a, __sat) \
+ __extension__ \
+ ({ \
+ int32_t __arg = (__a); \
+ __builtin_sat_imm_check (__sat, 1, 32); \
+ int32_t __res = __builtin_arm_ssat (__arg, __sat); \
+ __res; \
+ })
+
+#define __usat(__a, __sat) \
+ __extension__ \
+ ({ \
+ int32_t __arg = (__a); \
+ __builtin_sat_imm_check (__sat, 0, 31); \
+ uint32_t __res = __builtin_arm_usat (__arg, __sat); \
+ __res; \
+ })
+
+#endif
+
+#ifdef __ARM_FEATURE_QBIT
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__ignore_saturation (void)
+{
+ /* ACLE designates this intrinsic as a hint.
+ Implement as a nop for now. */
+}
+
+/* These are defined as macros because the implementation of the builtins
+ requires easy access to the current function so wrapping it in an
+ always_inline function complicates things. */
+
+#define __saturation_occurred __builtin_arm_saturation_occurred
+
+#define __set_saturation_occurred(__a) \
+ __extension__ \
+ ({ \
+ int __arg = (__a); \
+ __builtin_arm_set_saturation (__arg); \
+ })
+#endif
#pragma GCC push_options
#ifdef __ARM_FEATURE_CRC32
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index 0021c0036ad..c72480321fa 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -79,3 +79,8 @@ VAR1 (TERNOP, smlald, di)
VAR1 (TERNOP, smlaldx, di)
VAR1 (TERNOP, smlsld, di)
VAR1 (TERNOP, smlsldx, di)
+
+VAR1 (SAT_BINOP_UNSIGNED_IMM, ssat, si)
+VAR1 (UNSIGNED_SAT_BINOP_UNSIGNED_IMM, usat, si)
+VAR1 (SAT_OCCURRED, saturation_occurred, si)
+VAR1 (SET_SAT, set_saturation, void)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 4eb203365a6..e5cef6852a2 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -763,6 +763,12 @@
(V4QQ "8") (V2HQ "16") (QQ "8") (HQ "16")
(V2HA "16") (HA "16") (SQ "") (SA "")])
+(define_mode_attr qaddsub_clob_q [(V4UQQ "0") (V2UHQ "0") (UQQ "0") (UHQ "0")
+ (V2UHA "0") (UHA "0")
+ (V4QQ "0") (V2HQ "0") (QQ "0") (HQ "0")
+ (V2HA "0") (HA "0") (SQ "ARM_Q_BIT_READ")
+ (SA "ARM_Q_BIT_READ")])
+
;; Mode attribute for vshll.
(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 78f88d5fa09..a4287949e52 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -70,6 +70,9 @@
; that.
UNSPEC_UNALIGNED_STORE ; Same for str/strh.
UNSPEC_PIC_UNIFIED ; Create a common pic addressing form.
+ UNSPEC_Q_SET ; Represent setting the Q bit.
+ UNSPEC_APSR_READ ; Represent reading the APSR.
+
UNSPEC_LL ; Represent an unpaired load-register-exclusive.
UNSPEC_VRINTZ ; Represent a float to integral float rounding
; towards zero.
@@ -211,6 +214,7 @@
VUNSPEC_MRRC ; Represent the coprocessor mrrc instruction.
VUNSPEC_MRRC2 ; Represent the coprocessor mrrc2 instruction.
VUNSPEC_SPECULATION_BARRIER ; Represents an unconditional speculation barrier.
+ VUNSPEC_APSR_WRITE ; Represent writing the APSR.
])
;; Enumerators for NEON unspecs.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 42a10cf2243..f3bf66c44ee 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1911,6 +1911,12 @@ ARM Target supports options suitable for accessing the SIMD32 intrinsics from
@code{arm_acle.h}.
Some multilibs may be incompatible with these options.
+@item arm_qbit_ok
+@anchor{arm_qbit_ok}
+ARM Target supports options suitable for accessing the Q-bit manipulation
+intrinsics from @code{arm_acle.h}.
+Some multilibs may be incompatible with these options.
+
@end table
@subsubsection AArch64-specific attributes
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 08f290baa2b..f7318b08b42 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2019-11-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * gcc.target/arm/acle/saturation.c: New test.
+ * gcc.target/arm/acle/sat_no_smlatb.c: Likewise.
+ * lib/target-supports.exp (check_effective_target_arm_qbit_ok_nocache):
+ Define..
+ (check_effective_target_arm_qbit_ok): Likewise.
+ (add_options_for_arm_qbit): Likewise.
+
2019-11-07 Martin Liska <mliska@suse.cz>
PR c++/92354
diff --git a/gcc/testsuite/gcc.target/arm/acle/sat_no_smlatb.c b/gcc/testsuite/gcc.target/arm/acle/sat_no_smlatb.c
new file mode 100644
index 00000000000..e0c53ed4dc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/sat_no_smlatb.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_dsp } */
+
+/* Ensure the smlatb doesn't get generated when reading the Q flag
+ from ACLE. */
+
+#include <arm_acle.h>
+
+int
+foo (int x, int in, int32_t c)
+{
+ short a = in & 0xffff;
+ short b = (in & 0xffff0000) >> 16;
+
+ int res = x + b * a + __ssat (c, 24);
+ return res + __saturation_occurred ();
+}
+
+/* { dg-final { scan-assembler-not "smlatb\\t" } } */
diff --git a/gcc/testsuite/gcc.target/arm/acle/saturation.c b/gcc/testsuite/gcc.target/arm/acle/saturation.c
new file mode 100644
index 00000000000..0b3fe519933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/saturation.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_qbit_ok } */
+/* { dg-add-options arm_qbit } */
+
+#include <arm_acle.h>
+
+int32_t
+test_ssat (int32_t a)
+{
+ return __ssat (a, 8);
+}
+
+/* { dg-final { scan-assembler-times "ssat\t...?, #8, ...?" 1 } } */
+
+uint32_t
+test_usat (int32_t a)
+{
+ return __usat (a, 24);
+}
+
+/* { dg-final { scan-assembler-times "usat\t...?, #24, ...?" 1 } } */
+
+/* Test that USAT doesn't get removed as we need its Q-setting behavior. */
+int
+test_sat_occur (int32_t a)
+{
+ uint32_t res = __usat (a, 3);
+ return __saturation_occurred ();
+}
+
+/* { dg-final { scan-assembler-times "usat\t...?, #3, ...?" 1 } } */
+/* { dg-final { scan-assembler "mrs\t...?, APSR" } } */
+
+void
+test_set_sat (void)
+{
+ __set_saturation_occurred (0);
+}
+
+/* { dg-final { scan-assembler-times "msr\tAPSR_nzcvq, ...?" 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6f224fa8141..751045d4744 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3845,6 +3845,44 @@ proc add_options_for_arm_simd32 { flags } {
return "$flags $et_arm_simd32_flags"
}
+# Return 1 if this is an ARM target supporting the saturation intrinsics
+# from arm_acle.h. Some multilibs may be incompatible with these options.
+# Also set et_arm_qbit_flags to the best options to add.
+# arm_acle.h includes stdint.h which can cause trouble with incompatible
+# -mfloat-abi= options.
+
+proc check_effective_target_arm_qbit_ok_nocache { } {
+ global et_arm_qbit_flags
+ set et_arm_qbit_flags ""
+ foreach flags {"" "-march=armv5te" "-march=armv5te -mfloat-abi=softfp" "-march=armv5te -mfloat-abi=hard"} {
+ if { [check_no_compiler_messages_nocache et_arm_qbit_flags object {
+ #include <arm_acle.h>
+ int dummy;
+ #ifndef __ARM_FEATURE_QBIT
+ #error not QBIT
+ #endif
+ } "$flags"] } {
+ set et_arm_qbit_flags $flags
+ return 1
+ }
+ }
+
+ return 0
+}
+
+proc check_effective_target_arm_qbit_ok { } {
+ return [check_cached_effective_target et_arm_qbit_flags \
+ check_effective_target_arm_qbit_ok_nocache]
+}
+
+proc add_options_for_arm_qbit { flags } {
+ if { ! [check_effective_target_arm_qbit_ok] } {
+ return "$flags"
+ }
+ global et_arm_qbit_flags
+ return "$flags $et_arm_qbit_flags"
+}
+
# Return 1 if this is an ARM target supporting -mfpu=neon without any
# -mfloat-abi= option. Useful in tests where add_options is not
# supported (such as lto tests).