;; Machine description for AArch64 AdvSIMD architecture.
;; Copyright (C) 2011-2013 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
; Main data types used by the insntructions
(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
(const_string "unknown"))
; Classification of AdvSIMD instructions for scheduling purposes.
; Do not set this attribute and the "v8type" attribute together in
; any instruction pattern.
; simd_abd integer absolute difference and accumulate.
; simd_abdl integer absolute difference and accumulate (long).
; simd_adal integer add and accumulate (long).
; simd_add integer addition/subtraction.
; simd_addl integer addition/subtraction (long).
; simd_addlv across lanes integer sum (long).
; simd_addn integer addition/subtraction (narrow).
; simd_addn2 integer addition/subtraction (narrow, high).
; simd_addv across lanes integer sum.
; simd_cls count leading sign/zero bits.
; simd_cmp compare / create mask.
; simd_cnt population count.
; simd_dup duplicate element.
; simd_dupgp duplicate general purpose register.
; simd_ext bitwise extract from pair.
; simd_fabd floating point absolute difference.
; simd_fadd floating point add/sub.
; simd_fcmp floating point compare.
; simd_fcvti floating point convert to integer.
; simd_fcvtl floating-point convert upsize.
; simd_fcvtn floating-point convert downsize (narrow).
; simd_fcvtn2 floating-point convert downsize (narrow, high).
; simd_fdiv floating point division.
; simd_fminmax floating point min/max.
; simd_fminmaxv across lanes floating point min/max.
; simd_fmla floating point multiply-add.
; simd_fmla_elt floating point multiply-add (by element).
; simd_fmul floating point multiply.
; simd_fmul_elt floating point multiply (by element).
; simd_fnegabs floating point neg/abs.
; simd_frecpe floating point reciprocal estimate.
; simd_frecps floating point reciprocal step.
; simd_frecpx floating point reciprocal exponent.
; simd_frint floating point round to integer.
; simd_fsqrt floating point square root.
; simd_icvtf integer convert to floating point.
; simd_ins insert element.
; simd_insgp insert general purpose register.
; simd_load1 load multiple structures to one register (LD1).
; simd_load1r load single structure to all lanes of one register (LD1R).
; simd_load1s load single structure to one lane of one register (LD1 [index]).
; simd_load2 load multiple structures to two registers (LD1, LD2).
; simd_load2r load single structure to all lanes of two registers (LD1R, LD2R).
; simd_load2s load single structure to one lane of two registers (LD2 [index]).
; simd_load3 load multiple structures to three registers (LD1, LD3).
; simd_load3r load single structure to all lanes of three registers (LD3R).
; simd_load3s load single structure to one lane of three registers (LD3 [index]).
; simd_load4 load multiple structures to four registers (LD1, LD2, LD4).
; simd_load4r load single structure to all lanes of four registers (LD4R).
; simd_load4s load single structure to one lane of four registers (LD4 [index]).
; simd_logic logical operation.
; simd_logic_imm logcial operation (immediate).
; simd_minmax integer min/max.
; simd_minmaxv across lanes integer min/max,
; simd_mla integer multiply-accumulate.
; simd_mla_elt integer multiply-accumulate (by element).
; simd_mlal integer multiply-accumulate (long).
; simd_mlal_elt integer multiply-accumulate (by element, long).
; simd_move move register.
; simd_move_imm move immediate.
; simd_movgp move element to general purpose register.
; simd_mul integer multiply.
; simd_mul_elt integer multiply (by element).
; simd_mull integer multiply (long).
; simd_mull_elt integer multiply (by element, long).
; simd_negabs integer negate/absolute.
; simd_rbit bitwise reverse.
; simd_rcpe integer reciprocal estimate.
; simd_rcps integer reciprocal square root.
; simd_rev element reverse.
; simd_sat_add integer saturating addition/subtraction.
; simd_sat_mlal integer saturating multiply-accumulate (long).
; simd_sat_mlal_elt integer saturating multiply-accumulate (by element, long).
; simd_sat_mul integer saturating multiply.
; simd_sat_mul_elt integer saturating multiply (by element).
; simd_sat_mull integer saturating multiply (long).
; simd_sat_mull_elt integer saturating multiply (by element, long).
; simd_sat_negabs integer saturating negate/absolute.
; simd_sat_shift integer saturating shift.
; simd_sat_shift_imm integer saturating shift (immediate).
; simd_sat_shiftn_imm integer saturating shift (narrow, immediate).
; simd_sat_shiftn2_imm integer saturating shift (narrow, high, immediate).
; simd_shift shift register/vector.
; simd_shift_acc shift accumulate.
; simd_shift_imm shift immediate.
; simd_shift_imm_acc shift immediate and accumualte.
; simd_shiftl shift register/vector (long).
; simd_shiftl_imm shift register/vector (long, immediate).
; simd_shiftn_imm shift register/vector (narrow, immediate).
; simd_shiftn2_imm shift register/vector (narrow, high, immediate).
; simd_store1 store multiple structures from one register (ST1).
; simd_store1s store single structure from one lane of one register (ST1 [index]).
; simd_store2 store multiple structures from two registers (ST1, ST2).
; simd_store2s store single structure from one lane of two registers (ST2 [index]).
; simd_store3 store multiple structures from three registers (ST1, ST3).
; simd_store3s store single structure from one lane of three register (ST3 [index]).
; simd_store4 store multiple structures from four registers (ST1, ST2, ST4).
; simd_store4s store single structure from one lane for four registers (ST4 [index]).
; simd_tbl table lookup.
; simd_trn transpose.
; simd_uzp unzip.
; simd_zip zip.
(define_attr "simd_type"
"simd_abd,\
simd_abdl,\
simd_adal,\
simd_add,\
simd_addl,\
simd_addlv,\
simd_addn,\
simd_addn2,\
simd_addv,\
simd_cls,\
simd_cmp,\
simd_cnt,\
simd_dup,\
simd_dupgp,\
simd_ext,\
simd_fabd,\
simd_fadd,\
simd_fcmp,\
simd_fcvti,\
simd_fcvtl,\
simd_fcvtn,\
simd_fcvtn2,\
simd_fdiv,\
simd_fminmax,\
simd_fminmaxv,\
simd_fmla,\
simd_fmla_elt,\
simd_fmul,\
simd_fmul_elt,\
simd_fnegabs,\
simd_frecpe,\
simd_frecps,\
simd_frecpx,\
simd_frint,\
simd_fsqrt,\
simd_icvtf,\
simd_ins,\
simd_insgp,\
simd_load1,\
simd_load1r,\
simd_load1s,\
simd_load2,\
simd_load2r,\
simd_load2s,\
simd_load3,\
simd_load3r,\
simd_load3s,\
simd_load4,\
simd_load4r,\
simd_load4s,\
simd_logic,\
simd_logic_imm,\
simd_minmax,\
simd_minmaxv,\
simd_mla,\
simd_mla_elt,\
simd_mlal,\
simd_mlal_elt,\
simd_movgp,\
simd_move,\
simd_move_imm,\
simd_mul,\
simd_mul_elt,\
simd_mull,\
simd_mull_elt,\
simd_negabs,\
simd_rbit,\
simd_rcpe,\
simd_rcps,\
simd_rev,\
simd_sat_add,\
simd_sat_mlal,\
simd_sat_mlal_elt,\
simd_sat_mul,\
simd_sat_mul_elt,\
simd_sat_mull,\
simd_sat_mull_elt,\
simd_sat_negabs,\
simd_sat_shift,\
simd_sat_shift_imm,\
simd_sat_shiftn_imm,\
simd_sat_shiftn2_imm,\
simd_shift,\
simd_shift_acc,\
simd_shift_imm,\
simd_shift_imm_acc,\
simd_shiftl,\
simd_shiftl_imm,\
simd_shiftn_imm,\
simd_shiftn2_imm,\
simd_store1,\
simd_store1s,\
simd_store2,\
simd_store2s,\
simd_store3,\
simd_store3s,\
simd_store4,\
simd_store4s,\
simd_tbl,\
simd_trn,\
simd_uzp,\
simd_zip,\
none"
(const_string "none"))
; The "neon_type" attribute is used by the AArch32 backend. Below is a mapping
; from "simd_type" to "neon_type".
(define_attr "neon_type"
"neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs,
neon_vmov,neon_vaba,neon_vsma,neon_vaba_qqq,
neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32,
neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,
neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16,
neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,
neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar,
neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,
neon_shift_1,neon_shift_2,neon_shift_3,neon_vshl_ddd,
neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd,
neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd,
neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar,
neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd,
neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle,
neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs,
neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4,
neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs,
neon_vst2_4_regs_vst3_vst4,neon_vst3_vst4,neon_vld1_vld2_lane,
neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane,
neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc,
neon_ldm_2,neon_stm_2,none,unknown"
(cond [
(eq_attr "simd_type" "simd_dup") (const_string "neon_bp_simple")
(eq_attr "simd_type" "simd_movgp") (const_string "neon_bp_simple")
(eq_attr "simd_type" "simd_add,simd_logic,simd_logic_imm") (const_string "neon_int_1")
(eq_attr "simd_type" "simd_negabs,simd_addlv") (const_string "neon_int_3")
(eq_attr "simd_type" "simd_addn,simd_addn2,simd_addl,simd_sat_add,simd_sat_negabs") (const_string "neon_int_4")
(eq_attr "simd_type" "simd_move") (const_string "neon_vmov")
(eq_attr "simd_type" "simd_ins") (const_string "neon_mcr")
(and (eq_attr "simd_type" "simd_mul,simd_sat_mul") (eq_attr "simd_mode" "V8QI,V4HI")) (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
(and (eq_attr "simd_type" "simd_mul,simd_sat_mul") (eq_attr "simd_mode" "V2SI,V8QI,V16QI,V2SI")) (const_string "neon_mul_qqq_8_16_32_ddd_32")
(and (eq_attr "simd_type" "simd_mull,simd_sat_mull") (eq_attr "simd_mode" "V8QI,V16QI,V4HI,V8HI")) (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
(and (eq_attr "simd_type" "simd_mull,simd_sat_mull") (eq_attr "simd_mode" "V2SI,V4SI,V2DI")) (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
(and (eq_attr "simd_type" "simd_mla,simd_sat_mlal") (eq_attr "simd_mode" "V8QI,V4HI")) (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
(and (eq_attr "simd_type" "simd_mla,simd_sat_mlal") (eq_attr "simd_mode" "V2SI")) (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
(and (eq_attr "simd_type" "simd_mla,simd_sat_mlal") (eq_attr "simd_mode" "V16QI,V8HI")) (const_string "neon_mla_qqq_8_16")
(and (eq_attr "simd_type" "simd_mla,simd_sat_mlal") (eq_attr "simd_mode" "V4SI")) (const_string "neon_mla_qqq_32_qqd_32_scalar")
(and (eq_attr "simd_type" "simd_mlal") (eq_attr "simd_mode" "V8QI,V16QI,V4HI,V8HI")) (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
(and (eq_attr "simd_type" "simd_mlal") (eq_attr "simd_mode" "V2SI,V4SI,V2DI")) (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
(and (eq_attr "simd_type" "simd_fmla") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vmla_ddd")
(and (eq_attr "simd_type" "simd_fmla") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vmla_qqq")
(and (eq_attr "simd_type" "simd_fmla_elt") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vmla_ddd_scalar")
(and (eq_attr "simd_type" "simd_fmla_elt") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vmla_qqq_scalar")
(and (eq_attr "simd_type" "simd_fmul,simd_fmul_elt,simd_fdiv,simd_fsqrt") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vmul_ddd")
(and (eq_attr "simd_type" "simd_fmul,simd_fmul_elt,simd_fdiv,simd_fsqrt") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vmul_qqd")
(and (eq_attr "simd_type" "simd_fadd") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vadd_ddd_vabs_dd")
(and (eq_attr "simd_type" "simd_fadd") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vadd_qqq_vabs_qq")
(and (eq_attr "simd_type" "simd_fnegabs,simd_fminmax,simd_fminmaxv") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vadd_ddd_vabs_dd")
(and (eq_attr "simd_type" "simd_fnegabs,simd_fminmax,simd_fminmaxv") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vadd_qqq_vabs_qq")
(and (eq_attr "simd_type" "simd_shift,simd_shift_acc") (eq_attr "simd_mode" "V8QI,V4HI,V2SI")) (const_string "neon_vshl_ddd")
(and (eq_attr "simd_type" "simd_shift,simd_shift_acc") (eq_attr "simd_mode" "V16QI,V8HI,V4SI,V2DI")) (const_string "neon_shift_3")
(eq_attr "simd_type" "simd_minmax,simd_minmaxv") (const_string "neon_int_5")
(eq_attr "simd_type" "simd_shiftn_imm,simd_shiftn2_imm,simd_shiftl_imm,") (const_string "neon_shift_1")
(eq_attr "simd_type" "simd_load1,simd_load2") (const_string "neon_vld1_1_2_regs")
(eq_attr "simd_type" "simd_load3,simd_load3") (const_string "neon_vld1_3_4_regs")
(eq_attr "simd_type" "simd_load1r,simd_load2r,simd_load3r,simd_load4r") (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
(eq_attr "simd_type" "simd_load1s,simd_load2s") (const_string "neon_vld1_vld2_lane")
(eq_attr "simd_type" "simd_load3s,simd_load4s") (const_string "neon_vld3_vld4_lane")
(eq_attr "simd_type" "simd_store1,simd_store2") (const_string "neon_vst1_1_2_regs_vst2_2_regs")
(eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs")
(eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane")
(eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane")
(and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
(and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
(eq_attr "simd_type" "none") (const_string "none")
]
(const_string "unknown")))
(define_expand "mov"
[(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
(match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
"TARGET_SIMD"
"
if (GET_CODE (operands[0]) == MEM)
operands[1] = force_reg (mode, operands[1]);
"
)
(define_expand "movmisalign"
[(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
(match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
"TARGET_SIMD"
{
/* This pattern is not permitted to fail during expansion: if both arguments
are non-registers (e.g. memory := constant, which can be created by the
auto-vectorizer), force operand 1 into a register. */
if (!register_operand (operands[0], mode)
&& !register_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
})
(define_insn "aarch64_simd_dup"
[(set (match_operand:VDQ 0 "register_operand" "=w, w")
(vec_duplicate:VDQ (match_operand: 1 "register_operand" "r, w")))]
"TARGET_SIMD"
"@
dup\\t%0., %1
dup\\t%0., %1.[0]"
[(set_attr "simd_type" "simd_dupgp, simd_dup")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_dup"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(vec_duplicate:VDQF (match_operand: 1 "register_operand" "w")))]
"TARGET_SIMD"
"dup\\t%0., %1.[0]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_dup_lane"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VALL
(vec_select:
(match_operand:VALL 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%0., %1.[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_dup_lane_"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VALL
(vec_select:
(match_operand: 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%0., %1.[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_simd_mov"
[(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
"=w, Utv, w, ?r, ?w, ?r, w")
(match_operand:VD 1 "aarch64_simd_general_operand"
"Utv, w, w, w, r, r, Dn"))]
"TARGET_SIMD
&& (register_operand (operands[0], mode)
|| register_operand (operands[1], mode))"
{
switch (which_alternative)
{
case 0: return "ld1\t{%0.}, %1";
case 1: return "st1\t{%1.}, %0";
case 2: return "orr\t%0., %1., %1.";
case 3: return "umov\t%0, %1.d[0]";
case 4: return "ins\t%0.d[0], %1";
case 5: return "mov\t%0, %1";
case 6:
return aarch64_output_simd_mov_immediate (operands[1],
mode, 64);
default: gcc_unreachable ();
}
}
[(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_simd_mov"
[(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
"=w, Utv, w, ?r, ?w, ?r, w")
(match_operand:VQ 1 "aarch64_simd_general_operand"
"Utv, w, w, w, r, r, Dn"))]
"TARGET_SIMD
&& (register_operand (operands[0], mode)
|| register_operand (operands[1], mode))"
{
switch (which_alternative)
{
case 0:
return "ld1\t{%0.}, %1";
case 1:
return "st1\t{%1.}, %0";
case 2:
return "orr\t%0., %1., %1.";
case 3:
case 4:
case 5:
return "#";
case 6:
return aarch64_output_simd_mov_immediate (operands[1], mode, 128);
default:
gcc_unreachable ();
}
}
[(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
(set_attr "simd_mode" "")
(set_attr "length" "4,4,4,8,8,8,4")]
)
(define_split
[(set (match_operand:VQ 0 "register_operand" "")
(match_operand:VQ 1 "register_operand" ""))]
"TARGET_SIMD && reload_completed
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))]
{
int rdest = REGNO (operands[0]);
int rsrc = REGNO (operands[1]);
rtx dest[2], src[2];
dest[0] = gen_rtx_REG (DImode, rdest);
src[0] = gen_rtx_REG (DImode, rsrc);
dest[1] = gen_rtx_REG (DImode, rdest + 1);
src[1] = gen_rtx_REG (DImode, rsrc + 1);
aarch64_simd_disambiguate_copy (operands, dest, src, 2);
})
(define_split
[(set (match_operand:VQ 0 "register_operand" "")
(match_operand:VQ 1 "register_operand" ""))]
"TARGET_SIMD && reload_completed
&& ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|| (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
[(const_int 0)]
{
aarch64_split_simd_move (operands[0], operands[1]);
DONE;
})
(define_expand "aarch64_split_simd_mov"
[(set (match_operand:VQ 0)
(match_operand:VQ 1))]
"TARGET_SIMD"
{
rtx dst = operands[0];
rtx src = operands[1];
if (GP_REGNUM_P (REGNO (src)))
{
rtx src_low_part = gen_lowpart (mode, src);
rtx src_high_part = gen_highpart (mode, src);
emit_insn
(gen_move_lo_quad_ (dst, src_low_part));
emit_insn
(gen_move_hi_quad_ (dst, src_high_part));
}
else
{
rtx dst_low_part = gen_lowpart (mode, dst);
rtx dst_high_part = gen_highpart (mode, dst);
rtx lo = aarch64_simd_vect_par_cnst_half (mode, false);
rtx hi = aarch64_simd_vect_par_cnst_half (mode, true);
emit_insn
(gen_aarch64_simd_mov_from_low (dst_low_part, src, lo));
emit_insn
(gen_aarch64_simd_mov_from_high (dst_high_part, src, hi));
}
DONE;
}
)
(define_insn "aarch64_simd_mov_from_low"
[(set (match_operand: 0 "register_operand" "=r")
(vec_select:
(match_operand:VQ 1 "register_operand" "w")
(match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
"TARGET_SIMD && reload_completed"
"umov\t%0, %1.d[0]"
[(set_attr "simd_type" "simd_movgp")
(set_attr "simd_mode" "")
(set_attr "length" "4")
])
(define_insn "aarch64_simd_mov_from_high"
[(set (match_operand: 0 "register_operand" "=r")
(vec_select:
(match_operand:VQ 1 "register_operand" "w")
(match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
"TARGET_SIMD && reload_completed"
"umov\t%0, %1.d[1]"
[(set_attr "simd_type" "simd_movgp")
(set_attr "simd_mode" "")
(set_attr "length" "4")
])
(define_insn "orn3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"orn\t%0., %2., %1."
[(set_attr "simd_type" "simd_logic")
(set_attr "simd_mode" "")]
)
(define_insn "bic3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"bic\t%0., %2., %1."
[(set_attr "simd_type" "simd_logic")
(set_attr "simd_mode" "")]
)
(define_insn "add3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(plus:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"add\t%0., %1., %2."
[(set_attr "simd_type" "simd_add")
(set_attr "simd_mode" "")]
)
(define_insn "sub3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"sub\t%0., %1., %2."
[(set_attr "simd_type" "simd_add")
(set_attr "simd_mode" "")]
)
(define_insn "mul3"
[(set (match_operand:VDQM 0 "register_operand" "=w")
(mult:VDQM (match_operand:VDQM 1 "register_operand" "w")
(match_operand:VDQM 2 "register_operand" "w")))]
"TARGET_SIMD"
"mul\t%0., %1., %2."
[(set_attr "simd_type" "simd_mul")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_mul3_elt"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
(vec_duplicate:VMUL
(vec_select:
(match_operand:VMUL 1 "register_operand" "")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VMUL 3 "register_operand" "w")))]
"TARGET_SIMD"
"mul\\t%0., %3., %1.[%2]"
[(set_attr "simd_type" "simd_mul_elt")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_mul3_elt_"
[(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
(mult:VMUL_CHANGE_NLANES
(vec_duplicate:VMUL_CHANGE_NLANES
(vec_select:
(match_operand: 1 "register_operand" "")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
"TARGET_SIMD"
"mul\\t%0., %3., %1.[%2]"
[(set_attr "simd_type" "simd_mul_elt")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_mul3_elt_to_128df"
[(set (match_operand:V2DF 0 "register_operand" "=w")
(mult:V2DF
(vec_duplicate:V2DF
(match_operand:DF 2 "register_operand" "w"))
(match_operand:V2DF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fmul\\t%0.2d, %1.2d, %2.d[0]"
[(set_attr "simd_type" "simd_fmul_elt")
(set_attr "simd_mode" "V2DF")]
)
(define_insn "*aarch64_mul3_elt_to_64v2df"
[(set (match_operand:DF 0 "register_operand" "=w")
(mult:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand")]))
(match_operand:DF 3 "register_operand" "w")))]
"TARGET_SIMD"
"fmul\\t%0.2d, %3.2d, %1.d[%2]"
[(set_attr "simd_type" "simd_fmul_elt")
(set_attr "simd_mode" "V2DF")]
)
(define_insn "neg2"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
"TARGET_SIMD"
"neg\t%0., %1."
[(set_attr "simd_type" "simd_negabs")
(set_attr "simd_mode" "")]
)
(define_insn "abs2"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
"TARGET_SIMD"
"abs\t%0., %1."
[(set_attr "simd_type" "simd_negabs")
(set_attr "simd_mode" "")]
)
(define_insn "abd_3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(abs:VDQ_BHSI (minus:VDQ_BHSI
(match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
"TARGET_SIMD"
"sabd\t%0., %1., %2."
[(set_attr "simd_type" "simd_abd")
(set_attr "simd_mode" "")]
)
(define_insn "aba_3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
(match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")))
(match_operand:VDQ_BHSI 3 "register_operand" "0")))]
"TARGET_SIMD"
"saba\t%0., %1., %2."
[(set_attr "simd_type" "simd_abd")
(set_attr "simd_mode" "")]
)
(define_insn "fabd_3"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(abs:VDQF (minus:VDQF
(match_operand:VDQF 1 "register_operand" "w")
(match_operand:VDQF 2 "register_operand" "w"))))]
"TARGET_SIMD"
"fabd\t%0., %1., %2."
[(set_attr "simd_type" "simd_fabd")
(set_attr "simd_mode" "")]
)
(define_insn "*fabd_scalar3"
[(set (match_operand:GPF 0 "register_operand" "=w")
(abs:GPF (minus:GPF
(match_operand:GPF 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w"))))]
"TARGET_SIMD"
"fabd\t%0, %1, %2"
[(set_attr "simd_type" "simd_fabd")
(set_attr "mode" "")]
)
(define_insn "and3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(and:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"and\t%0., %1., %2."
[(set_attr "simd_type" "simd_logic")
(set_attr "simd_mode" "")]
)
(define_insn "ior3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(ior:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"orr\t%0., %1., %2."
[(set_attr "simd_type" "simd_logic")
(set_attr "simd_mode" "")]
)
(define_insn "xor3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(xor:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"eor\t%0., %1., %2."
[(set_attr "simd_type" "simd_logic")
(set_attr "simd_mode" "")]
)
(define_insn "one_cmpl2"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(not:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
"TARGET_SIMD"
"not\t%0., %1."
[(set_attr "simd_type" "simd_logic")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_vec_set"
[(set (match_operand:VQ_S 0 "register_operand" "=w")
(vec_merge:VQ_S
(vec_duplicate:VQ_S
(match_operand: 1 "register_operand" "r"))
(match_operand:VQ_S 3 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
"ins\t%0.[%p2], %w1";
[(set_attr "simd_type" "simd_insgp")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_lshr"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
"TARGET_SIMD"
"ushr\t%0., %1., %2"
[(set_attr "simd_type" "simd_shift_imm")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_ashr"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
"TARGET_SIMD"
"sshr\t%0., %1., %2"
[(set_attr "simd_type" "simd_shift_imm")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_imm_shl"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "aarch64_simd_lshift_imm" "Dl")))]
"TARGET_SIMD"
"shl\t%0., %1., %2"
[(set_attr "simd_type" "simd_shift_imm")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_reg_sshl"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")))]
"TARGET_SIMD"
"sshl\t%0., %1., %2."
[(set_attr "simd_type" "simd_shift")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_reg_shl_unsigned"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")]
UNSPEC_ASHIFT_UNSIGNED))]
"TARGET_SIMD"
"ushl\t%0., %1., %2."
[(set_attr "simd_type" "simd_shift")
(set_attr "simd_mode" "")]
)
(define_insn "aarch64_simd_reg_shl_signed"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w")]
UNSPEC_ASHIFT_SIGNED))]
"TARGET_SIMD"
"sshl\t%0., %1., %2."
[(set_attr "simd_type" "simd_shift")
(set_attr "simd_mode" "")]
)
(define_expand "ashl3"
[(match_operand:VDQ 0 "register_operand" "")
(match_operand:VDQ 1 "register_operand" "")
(match_operand:SI 2 "general_operand" "")]
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
int shift_amount;
if (CONST_INT_P (operands[2]))
{
shift_amount = INTVAL (operands[2]);
if (shift_amount >= 0 && shift_amount < bit_width)
{
rtx tmp = aarch64_simd_gen_const_vector_dup (mode,
shift_amount);
emit_insn (gen_aarch64_simd_imm_shl (operands[0],
operands[1],
tmp));
DONE;
}
else
{
operands[2] = force_reg (SImode, operands[2]);
}
}
else if (MEM_P (operands[2]))
{
operands[2] = force_reg (SImode, operands[2]);
}
if (REG_P (operands[2]))
{
rtx tmp = gen_reg_rtx (mode);
emit_insn (gen_aarch64_simd_dup (tmp,
convert_to_mode (mode,
operands[2],
0)));
emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1],
tmp));
DONE;
}
else
FAIL;
}
)
(define_expand "lshr3"
[(match_operand:VDQ 0 "register_operand" "")
(match_operand:VDQ 1 "register_operand" "")
(match_operand:SI 2 "general_operand" "")]
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
int shift_amount;
if (CONST_INT_P (operands[2]))
{
shift_amount = INTVAL (operands[2]);
if (shift_amount > 0 && shift_amount <= bit_width)
{
rtx tmp = aarch64_simd_gen_const_vector_dup (mode,
shift_amount);
emit_insn (gen_aarch64_simd_lshr (operands[0],
operands[1],
tmp));
DONE;
}
else
operands[2] = force_reg (SImode, operands[2]);
}
else if (MEM_P (operands[2]))
{
operands[2] = force_reg (SImode, operands[2]);
}
if (REG_P (operands[2]))
{
rtx tmp = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (mode);
emit_insn (gen_negsi2 (tmp, operands[2]));
emit_insn (gen_aarch64_simd_dup (tmp1,
convert_to_mode (mode,
tmp, 0)));
emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0],
operands[1],
tmp1));
DONE;
}
else
FAIL;
}
)
(define_expand "ashr3"
[(match_operand:VDQ 0 "register_operand" "")
(match_operand:VDQ 1 "register_operand" "")
(match_operand:SI 2 "general_operand" "")]
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
int shift_amount;
if (CONST_INT_P (operands[2]))
{
shift_amount = INTVAL (operands[2]);
if (shift_amount > 0 && shift_amount <= bit_width)
{
rtx tmp = aarch64_simd_gen_const_vector_dup (mode,
shift_amount);
emit_insn (gen_aarch64_simd_ashr (operands[0],
operands[1],
tmp));
DONE;
}
else
operands[2] = force_reg (SImode, operands[2]);
}
else if (MEM_P (operands[2]))
{
operands[2] = force_reg (SImode, operands[2]);
}
if (REG_P (operands[2]))
{
rtx tmp = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (mode);
emit_insn (gen_negsi2 (tmp, operands[2]));
emit_insn (gen_aarch64_simd_dup (tmp1,
convert_to_mode (mode,
tmp, 0)));
emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0],
operands[1],
tmp1));
DONE;
}
else
FAIL;
}
)
(define_expand "vashl3"
[(match_operand:VDQ 0 "register_operand" "")
(match_operand:VDQ 1 "register_operand" "")
(match_operand:VDQ 2 "register_operand" "")]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1],
operands[2]));
DONE;
})
;; Using mode VQ_S as there is no V2DImode neg!
;; Negating individual lanes most certainly offsets the
;; gain from vectorization.
(define_expand "vashr3"
[(match_operand:VQ_S 0 "register_operand" "")
(match_operand:VQ_S 1 "register_operand" "")
(match_operand:VQ_S 2 "register_operand" "")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (mode);
emit (gen_neg2 (neg, operands[2]));
emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], operands[1],
neg));
DONE;
})
(define_expand "vlshr3"
[(match_operand:VQ_S 0 "register_operand" "")
(match_operand:VQ_S 1 "register_operand" "")
(match_operand:VQ_S 2 "register_operand" "")]
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (mode);
emit (gen_neg2 (neg, operands[2]));
emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], operands[1],
neg));
DONE;
})
(define_expand "vec_set"
[(match_operand:VQ_S 0 "register_operand" "+w")
(match_operand: 1 "register_operand" "r")
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD"
{
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
emit_insn (gen_aarch64_simd_vec_set (operands[0], operands[1],
GEN_INT (elem), operands[0]));
DONE;
}
)
(define_insn "aarch64_simd_vec_setv2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(vec_merge:V2DI
(vec_duplicate:V2DI
(match_operand:DI 1 "register_operand" "r"))
(match_operand:V2DI 3 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
"ins\t%0.d[%p2], %1";
[(set_attr "simd_type" "simd_insgp")
(set_attr "simd_mode" "V2DI")]
)
(define_expand "vec_setv2di"
[(match_operand:V2DI 0 "register_operand" "+w")
(match_operand:DI 1 "register_operand" "r")
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD"
{
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
GEN_INT (elem), operands[0]));
DONE;
}
)
(define_insn "aarch64_simd_vec_set"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(vec_merge:VDQF
(vec_duplicate:VDQF
(match_operand: 1 "register_operand" "w"))
(match_operand:VDQF 3 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
"ins\t%0.[%p2], %1.[0]";
[(set_attr "simd_type" "simd_ins")
(set_attr "simd_mode" "")]
)
(define_expand "vec_set"
[(match_operand:VDQF 0 "register_operand" "+w")
(match_operand: 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD"
{
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
emit_insn (gen_aarch64_simd_vec_set (operands[0], operands[1],
GEN_INT (elem), operands[0]));
DONE;
}
)
(define_insn "aarch64_mla"
[(set (match_operand:VQ_S 0 "register_operand" "=w")
(plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
(match_operand:VQ_S 3 "register_operand" "w"))
(match_operand:VQ_S 1 "register_operand" "0")))]
"TARGET_SIMD"
"mla\t%0., %2., %3."
[(set_attr "simd_type" "simd_mla")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_mla_elt"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
(vec_duplicate:VDQHS
(vec_select:
(match_operand:VDQHS 1 "register_operand" "")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQHS 3 "register_operand" "w"))
(match_operand:VDQHS 4 "register_operand" "0")))]
"TARGET_SIMD"
"mla\t%0., %3., %1.[%2]"
[(set_attr "simd_type" "simd_mla")
(set_attr "simd_mode" "")]
)
(define_insn "*aarch64_mla_elt_"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
(vec_duplicate:VDQHS
(vec_select:
(match_operand: 1 "register_operand" "")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQHS 3 "register_operand" "w"))
(match_operand:VDQHS 4 "register_operand" "0")))]
"TARGET_SIMD"
"mla\t%0.