diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 288 |
1 files changed, 243 insertions, 45 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3307b081aaa..e009bc96fc2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -328,6 +328,14 @@ (XMM29_REG 66) (XMM30_REG 67) (XMM31_REG 68) + (MASK0_REG 69) + (MASK1_REG 70) + (MASK2_REG 71) + (MASK3_REG 72) + (MASK4_REG 73) + (MASK5_REG 74) + (MASK6_REG 75) + (MASK7_REG 76) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -341,7 +349,7 @@ ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7, - atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" + atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be @@ -360,7 +368,7 @@ sseishft,sseishft1,ssecmp,ssecomi, ssecvt,ssecvt1,sseicvt,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg, - lwp, + lwp,mskmov,msklog, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -379,7 +387,7 @@ ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, ssecvt,ssecvt1,sseicvt,sseins, - sseshuf,sseshuf1,ssemuladd,sse4arg") + sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") @@ -390,7 +398,7 @@ ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, - bitmanip,imulx") + bitmanip,imulx,msklog,mskmov") (const_int 0) (eq_attr "unit" "i387,sse,mmx") (const_int 0) @@ -451,7 +459,7 @@ ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" (if_then_else - (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip") + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov") (eq_attr "unit" "sse,mmx")) (const_int 1) (const_int 0))) @@ -651,7 +659,7 @@ fmov,fcmp,fsgn, sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt, sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1, - mmx,mmxmov,mmxcmp,mmxcvt") + mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog") (match_operand 2 "memory_operand")) (const_string "load") (and (eq_attr "type" "icmov,ssemuladd,sse4arg") @@ -695,7 +703,7 @@ ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, - avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f" + avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f" (const_string "base")) (define_attr "enabled" "" @@ -718,6 +726,7 @@ (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX") (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2") (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2") + (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI") (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4") (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA") @@ -2213,8 +2222,8 @@ (const_string "SI")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,Yk,Yk,rm") + (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,Yk,Yk"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2223,6 +2232,16 @@ /* movzwl is faster than movw on p2 due to partial word stalls, though not as fast as an aligned movl. */ return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + + case TYPE_MSKMOV: + switch (which_alternative) + { + case 4: return "kmovw\t{%k1, %0|%0, %k1}"; + case 5: return "kmovw\t{%1, %0|%0, %1}"; + case 6: return "kmovw\t{%1, %k0|%k0, %1}"; + default: gcc_unreachable (); + } + default: if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; @@ -2240,11 +2259,17 @@ (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "imov") + (eq_attr "alternative" "4,5,6") + (const_string "mskmov") (and (match_test "TARGET_MOVX") (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "4,5,6") + (const_string "vex") + (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") @@ -2269,8 +2294,8 @@ ;; register stall machines with, where we use QImode instructions, since ;; partial register stall can be caused there. Then we use movzx. (define_insn "*movqi_internal" - [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") - (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn"))] + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,Yk,Yk,r") + (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn,r ,Yk,Yk"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2278,6 +2303,16 @@ case TYPE_IMOVX: gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + + case TYPE_MSKMOV: + switch (which_alternative) + { + case 7: return "kmovw\t{%k1, %0|%0, %k1}"; + case 8: return "kmovw\t{%1, %0|%0, %1}"; + case 9: return "kmovw\t{%1, %k0|%k0, %1}"; + default: gcc_unreachable (); + } + default: if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; @@ -2297,11 +2332,17 @@ (const_string "imov") (eq_attr "alternative" "3,5") (const_string "imovx") + (eq_attr "alternative" "7,8,9") + (const_string "mskmov") (and (match_test "TARGET_MOVX") (eq_attr "alternative" "2")) (const_string "imovx") ] (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "7,8,9") + (const_string "vex") + (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "3,4,5") (const_string "SI") @@ -7494,6 +7535,26 @@ operands[3] = gen_lowpart (QImode, operands[3]); }) +(define_split + [(set (match_operand:SWI12 0 "mask_reg_operand") + (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand") + (match_operand:SWI12 2 "mask_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && reload_completed" + [(set (match_dup 0) + (any_logic:SWI12 (match_dup 1) + (match_dup 2)))]) + +(define_insn "*k<logic><mode>" + [(set (match_operand:SWI12 0 "mask_reg_operand" "=Yk") + (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "Yk") + (match_operand:SWI12 2 "mask_reg_operand" "Yk")))] + "TARGET_AVX512F" + "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; + [(set_attr "mode" "<MODE>") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + ;; %%% This used to optimize known byte-wide and operations to memory, ;; and sometimes to QImode registers. If this is considered useful, ;; it should be done with splitters. @@ -7617,9 +7678,9 @@ (set_attr "mode" "SI")]) (define_insn "*andhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya") - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:HI 2 "general_operand" "rn,rm,L"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya,!Yk") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm,Yk") + (match_operand:HI 2 "general_operand" "rn,rm,L,Yk"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, HImode, operands)" { @@ -7628,34 +7689,38 @@ case TYPE_IMOVX: return "#"; + case TYPE_MSKLOG: + return "kandw\t{%2, %1, %0|%0, %1, %2}"; + default: gcc_assert (rtx_equal_p (operands[0], operands[1])); return "and{w}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,imovx") - (set_attr "length_immediate" "*,*,0") + [(set_attr "type" "alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") (match_operand 1 "ext_QIreg_operand")) (const_string "1") (const_string "*"))) - (set_attr "mode" "HI,HI,SI")]) + (set_attr "mode" "HI,HI,SI,HI")]) ;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*andqi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") - (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!Yk") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,Yk") + (match_operand:QI 2 "general_operand" "qn,qmn,rn,Yk"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, QImode, operands)" "@ and{b}\t{%2, %0|%0, %2} and{b}\t{%2, %0|%0, %2} - and{l}\t{%k2, %k0|%k0, %k2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI,QI,SI")]) + and{l}\t{%k2, %k0|%k0, %k2} + kandw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,alu,msklog") + (set_attr "mode" "QI,QI,SI,HI")]) (define_insn "*andqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) @@ -7668,6 +7733,40 @@ [(set_attr "type" "alu1") (set_attr "mode" "QI")]) +(define_insn "kandn<mode>" + [(set (match_operand:SWI12 0 "register_operand" "=r,&r,!Yk") + (and:SWI12 + (not:SWI12 + (match_operand:SWI12 1 "register_operand" "r,0,Yk")) + (match_operand:SWI12 2 "register_operand" "r,r,Yk"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F" + "@ + andn\t{%k2, %k1, %k0|%k0, %k1, %k2} + # + kandnw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "bmi,*,avx512f") + (set_attr "type" "bitmanip,*,msklog") + (set_attr "prefix" "*,*,vex") + (set_attr "btver2_decode" "direct,*,*") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI12 0 "general_reg_operand") + (and:SWI12 + (not:SWI12 + (match_dup 0)) + (match_operand:SWI12 1 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && !TARGET_BMI && reload_completed" + [(set (match_dup 0) + (not:HI (match_dup 0))) + (parallel [(set (match_dup 0) + (and:HI (match_dup 0) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + ;; Turn *anddi_1 into *andsi_1_zext if possible. (define_split [(set (match_operand:DI 0 "register_operand") @@ -7999,29 +8098,44 @@ "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") (define_insn "*<code><mode>_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,rm") - (any_or:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI248 2 "<general_operand>" "<g>,r<i>"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm") + (any_or:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) +(define_insn "*<code>hi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!Yk") + (any_or:HI + (match_operand:HI 1 "nonimmediate_operand" "%0,0,Yk") + (match_operand:HI 2 "general_operand" "<g>,r<i>,Yk"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, HImode, operands)" + "@ + <logic>{w}\t{%2, %0|%0, %2} + <logic>{w}\t{%2, %0|%0, %2} + k<logic>w\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,msklog") + (set_attr "mode" "HI")]) + ;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*<code>qi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") - (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qmn,qn,rn"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!Yk") + (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,Yk") + (match_operand:QI 2 "general_operand" "qmn,qn,rn,Yk"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, QImode, operands)" "@ <logic>{b}\t{%2, %0|%0, %2} <logic>{b}\t{%2, %0|%0, %2} - <logic>{l}\t{%k2, %k0|%k0, %k2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI,QI,SI")]) + <logic>{l}\t{%k2, %k0|%k0, %k2} + k<logic>w\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,alu,msklog") + (set_attr "mode" "QI,QI,SI,HI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*<code>si_1_zext" @@ -8071,6 +8185,74 @@ [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) +(define_insn "kxnor<mode>" + [(set (match_operand:SWI12 0 "register_operand" "=r,!Yk") + (not:SWI12 + (xor:SWI12 + (match_operand:SWI12 1 "register_operand" "0,Yk") + (match_operand:SWI12 2 "register_operand" "r,Yk"))))] + "TARGET_AVX512F" + "@ + # + kxnorw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "*,msklog") + (set_attr "prefix" "*,vex") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI12 0 "general_reg_operand") + (not:SWI12 + (xor:SWI12 + (match_dup 0) + (match_operand:SWI12 1 "general_reg_operand"))))] + "TARGET_AVX512F && reload_completed" + [(parallel [(set (match_dup 0) + (xor:HI (match_dup 0) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:HI (match_dup 0)))] + "") + +(define_insn "kortestzhi" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (ior:HI + (match_operand:HI 0 "register_operand" "Yk") + (match_operand:HI 1 "register_operand" "Yk")) + (const_int 0)))] + "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)" + "kortestw\t{%1, %0|%0, %1}" + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +(define_insn "kortestchi" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (ior:HI + (match_operand:HI 0 "register_operand" "Yk") + (match_operand:HI 1 "register_operand" "Yk")) + (const_int -1)))] + "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)" + "kortestw\t{%1, %0|%0, %1}" + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +(define_insn "kunpckhi" + [(set (match_operand:HI 0 "register_operand" "=Yk") + (ior:HI + (ashift:HI + (match_operand:HI 1 "register_operand" "Yk") + (const_int 8)) + (zero_extend:HI (match_operand:QI 2 "register_operand" "Yk"))))] + "TARGET_AVX512F" + "kunpckbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + ;; See comment for addsi_1_zext why we do use nonimmediate_operand ;; ??? Special case for immediate operand is missing - it is tricky. (define_insn "*<code>si_2_zext" @@ -8640,23 +8822,38 @@ "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;") (define_insn "*one_cmpl<mode>2_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") - (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))] + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))] "ix86_unary_operator_ok (NOT, <MODE>mode, operands)" "not{<imodesuffix>}\t%0" [(set_attr "type" "negnot") (set_attr "mode" "<MODE>")]) +(define_insn "*one_cmplhi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,!Yk") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "0,Yk")))] + "ix86_unary_operator_ok (NOT, HImode, operands)" + "@ + not{w}\t%0 + knotw\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,avx512f") + (set_attr "type" "negnot,msklog") + (set_attr "prefix" "*,vex") + (set_attr "mode" "HI")]) + ;; %%% Potential partial reg stall on alternative 1. What to do? (define_insn "*one_cmplqi2_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") - (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))] + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!Yk") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,Yk")))] "ix86_unary_operator_ok (NOT, QImode, operands)" "@ not{b}\t%0 - not{l}\t%k0" - [(set_attr "type" "negnot") - (set_attr "mode" "QI,SI")]) + not{l}\t%k0 + knotw\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,*,avx512f") + (set_attr "type" "negnot,negnot,msklog") + (set_attr "prefix" "*,*,vex") + (set_attr "mode" "QI,SI,QI")]) ;; ??? Currently never generated - xor is used instead. (define_insn "*one_cmplsi2_1_zext" @@ -16423,11 +16620,11 @@ }) ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. - +;; Do not split instructions with mask registers. (define_split - [(set (match_operand 0 "register_operand") + [(set (match_operand 0 "general_reg_operand") (match_operator 3 "promotable_binary_operator" - [(match_operand 1 "register_operand") + [(match_operand 1 "general_reg_operand") (match_operand 2 "aligned_operand")])) (clobber (reg:CC FLAGS_REG))] "! TARGET_PARTIAL_REG_STALL && reload_completed @@ -16522,9 +16719,10 @@ operands[1] = gen_lowpart (SImode, operands[1]); }) +;; Do not split instructions with mask regs. (define_split - [(set (match_operand 0 "register_operand") - (not (match_operand 1 "register_operand")))] + [(set (match_operand 0 "general_reg_operand") + (not (match_operand 1 "general_reg_operand")))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode || (GET_MODE (operands[0]) == QImode |