summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md288
1 files changed, 243 insertions, 45 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3307b081aaa..e009bc96fc2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -328,6 +328,14 @@
(XMM29_REG 66)
(XMM30_REG 67)
(XMM31_REG 68)
+ (MASK0_REG 69)
+ (MASK1_REG 70)
+ (MASK2_REG 71)
+ (MASK3_REG 72)
+ (MASK4_REG 73)
+ (MASK5_REG 74)
+ (MASK6_REG 75)
+ (MASK7_REG 76)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -341,7 +349,7 @@
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
- atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
+ atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
(const (symbol_ref "ix86_schedule")))
;; A basic instruction type. Refinements due to arguments to be
@@ -360,7 +368,7 @@
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
sseshuf,sseshuf1,ssemuladd,sse4arg,
- lwp,
+ lwp,mskmov,msklog,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -379,7 +387,7 @@
ssemul,sseimul,ssediv,sselog,sselog1,
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
- sseshuf,sseshuf1,ssemuladd,sse4arg")
+ sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
@@ -390,7 +398,7 @@
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
(cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
- bitmanip,imulx")
+ bitmanip,imulx,msklog,mskmov")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
@@ -451,7 +459,7 @@
;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
(if_then_else
- (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
+ (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
(eq_attr "unit" "sse,mmx"))
(const_int 1)
(const_int 0)))
@@ -651,7 +659,7 @@
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
- mmx,mmxmov,mmxcmp,mmxcvt")
+ mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
(match_operand 2 "memory_operand"))
(const_string "load")
(and (eq_attr "type" "icmov,ssemuladd,sse4arg")
@@ -695,7 +703,7 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
+ avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
(const_string "base"))
(define_attr "enabled" ""
@@ -718,6 +726,7 @@
(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
(eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
(eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
+ (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
(eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
(eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
@@ -2213,8 +2222,8 @@
(const_string "SI")))])
(define_insn "*movhi_internal"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m")
- (match_operand:HI 1 "general_operand" "r ,rn,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,Yk,Yk,rm")
+ (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,Yk,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2223,6 +2232,16 @@
/* movzwl is faster than movw on p2 due to partial word stalls,
though not as fast as an aligned movl. */
return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ switch (which_alternative)
+ {
+ case 4: return "kmovw\t{%k1, %0|%0, %k1}";
+ case 5: return "kmovw\t{%1, %0|%0, %1}";
+ case 6: return "kmovw\t{%1, %k0|%k0, %1}";
+ default: gcc_unreachable ();
+ }
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2240,11 +2259,17 @@
(and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand"))
(const_string "imov")
+ (eq_attr "alternative" "4,5,6")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "4,5,6")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
@@ -2269,8 +2294,8 @@
;; register stall machines with, where we use QImode instructions, since
;; partial register stall can be caused there. Then we use movzx.
(define_insn "*movqi_internal"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
- (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,Yk,Yk,r")
+ (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn,r ,Yk,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2278,6 +2303,16 @@
case TYPE_IMOVX:
gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ switch (which_alternative)
+ {
+ case 7: return "kmovw\t{%k1, %0|%0, %k1}";
+ case 8: return "kmovw\t{%1, %0|%0, %1}";
+ case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+ default: gcc_unreachable ();
+ }
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2297,11 +2332,17 @@
(const_string "imov")
(eq_attr "alternative" "3,5")
(const_string "imovx")
+ (eq_attr "alternative" "7,8,9")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4,5")
(const_string "SI")
@@ -7494,6 +7535,26 @@
operands[3] = gen_lowpart (QImode, operands[3]);
})
+(define_split
+ [(set (match_operand:SWI12 0 "mask_reg_operand")
+ (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand")
+ (match_operand:SWI12 2 "mask_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F && reload_completed"
+ [(set (match_dup 0)
+ (any_logic:SWI12 (match_dup 1)
+ (match_dup 2)))])
+
+(define_insn "*k<logic><mode>"
+ [(set (match_operand:SWI12 0 "mask_reg_operand" "=Yk")
+ (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "Yk")
+ (match_operand:SWI12 2 "mask_reg_operand" "Yk")))]
+ "TARGET_AVX512F"
+ "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "mode" "<MODE>")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; %%% This used to optimize known byte-wide and operations to memory,
;; and sometimes to QImode registers. If this is considered useful,
;; it should be done with splitters.
@@ -7617,9 +7678,9 @@
(set_attr "mode" "SI")])
(define_insn "*andhi_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya")
- (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
- (match_operand:HI 2 "general_operand" "rn,rm,L")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya,!Yk")
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm,Yk")
+ (match_operand:HI 2 "general_operand" "rn,rm,L,Yk")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, HImode, operands)"
{
@@ -7628,34 +7689,38 @@
case TYPE_IMOVX:
return "#";
+ case TYPE_MSKLOG:
+ return "kandw\t{%2, %1, %0|%0, %1, %2}";
+
default:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
return "and{w}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "type" "alu,alu,imovx")
- (set_attr "length_immediate" "*,*,0")
+ [(set_attr "type" "alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
(match_operand 1 "ext_QIreg_operand"))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "HI,HI,SI")])
+ (set_attr "mode" "HI,HI,SI,HI")])
;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
- (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!Yk")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,Yk")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn,Yk")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, QImode, operands)"
"@
and{b}\t{%2, %0|%0, %2}
and{b}\t{%2, %0|%0, %2}
- and{l}\t{%k2, %k0|%k0, %k2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI,QI,SI")])
+ and{l}\t{%k2, %k0|%k0, %k2}
+ kandw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,alu,msklog")
+ (set_attr "mode" "QI,QI,SI,HI")])
(define_insn "*andqi_1_slp"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -7668,6 +7733,40 @@
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
+(define_insn "kandn<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=r,&r,!Yk")
+ (and:SWI12
+ (not:SWI12
+ (match_operand:SWI12 1 "register_operand" "r,0,Yk"))
+ (match_operand:SWI12 2 "register_operand" "r,r,Yk")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F"
+ "@
+ andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
+ #
+ kandnw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "bmi,*,avx512f")
+ (set_attr "type" "bitmanip,*,msklog")
+ (set_attr "prefix" "*,*,vex")
+ (set_attr "btver2_decode" "direct,*,*")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:SWI12 0 "general_reg_operand")
+ (and:SWI12
+ (not:SWI12
+ (match_dup 0))
+ (match_operand:SWI12 1 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F && !TARGET_BMI && reload_completed"
+ [(set (match_dup 0)
+ (not:HI (match_dup 0)))
+ (parallel [(set (match_dup 0)
+ (and:HI (match_dup 0)
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
;; Turn *anddi_1 into *andsi_1_zext if possible.
(define_split
[(set (match_operand:DI 0 "register_operand")
@@ -7999,29 +8098,44 @@
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,rm")
- (any_or:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI248 2 "<general_operand>" "<g>,r<i>")))
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
+ (any_or:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "*<code>hi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!Yk")
+ (any_or:HI
+ (match_operand:HI 1 "nonimmediate_operand" "%0,0,Yk")
+ (match_operand:HI 2 "general_operand" "<g>,r<i>,Yk")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, HImode, operands)"
+ "@
+ <logic>{w}\t{%2, %0|%0, %2}
+ <logic>{w}\t{%2, %0|%0, %2}
+ k<logic>w\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,msklog")
+ (set_attr "mode" "HI")])
+
;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*<code>qi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
- (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!Yk")
+ (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,Yk")
+ (match_operand:QI 2 "general_operand" "qmn,qn,rn,Yk")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, QImode, operands)"
"@
<logic>{b}\t{%2, %0|%0, %2}
<logic>{b}\t{%2, %0|%0, %2}
- <logic>{l}\t{%k2, %k0|%k0, %k2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI,QI,SI")])
+ <logic>{l}\t{%k2, %k0|%k0, %k2}
+ k<logic>w\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,alu,msklog")
+ (set_attr "mode" "QI,QI,SI,HI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*<code>si_1_zext"
@@ -8071,6 +8185,74 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "kxnor<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=r,!Yk")
+ (not:SWI12
+ (xor:SWI12
+ (match_operand:SWI12 1 "register_operand" "0,Yk")
+ (match_operand:SWI12 2 "register_operand" "r,Yk"))))]
+ "TARGET_AVX512F"
+ "@
+ #
+ kxnorw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "*,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:SWI12 0 "general_reg_operand")
+ (not:SWI12
+ (xor:SWI12
+ (match_dup 0)
+ (match_operand:SWI12 1 "general_reg_operand"))))]
+ "TARGET_AVX512F && reload_completed"
+ [(parallel [(set (match_dup 0)
+ (xor:HI (match_dup 0)
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0)
+ (not:HI (match_dup 0)))]
+ "")
+
+(define_insn "kortestzhi"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int 0)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kortestchi"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int -1)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kunpckhi"
+ [(set (match_operand:HI 0 "register_operand" "=Yk")
+ (ior:HI
+ (ashift:HI
+ (match_operand:HI 1 "register_operand" "Yk")
+ (const_int 8))
+ (zero_extend:HI (match_operand:QI 2 "register_operand" "Yk"))))]
+ "TARGET_AVX512F"
+ "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
@@ -8640,23 +8822,38 @@
"ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
- (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+ (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
"not{<imodesuffix>}\t%0"
[(set_attr "type" "negnot")
(set_attr "mode" "<MODE>")])
+(define_insn "*one_cmplhi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,!Yk")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "0,Yk")))]
+ "ix86_unary_operator_ok (NOT, HImode, operands)"
+ "@
+ not{w}\t%0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512f")
+ (set_attr "type" "negnot,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "HI")])
+
;; %%% Potential partial reg stall on alternative 1. What to do?
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
- (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!Yk")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,Yk")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
"@
not{b}\t%0
- not{l}\t%k0"
- [(set_attr "type" "negnot")
- (set_attr "mode" "QI,SI")])
+ not{l}\t%k0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,*,avx512f")
+ (set_attr "type" "negnot,negnot,msklog")
+ (set_attr "prefix" "*,*,vex")
+ (set_attr "mode" "QI,SI,QI")])
;; ??? Currently never generated - xor is used instead.
(define_insn "*one_cmplsi2_1_zext"
@@ -16423,11 +16620,11 @@
})
;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
-
+;; Do not split instructions with mask registers.
(define_split
- [(set (match_operand 0 "register_operand")
+ [(set (match_operand 0 "general_reg_operand")
(match_operator 3 "promotable_binary_operator"
- [(match_operand 1 "register_operand")
+ [(match_operand 1 "general_reg_operand")
(match_operand 2 "aligned_operand")]))
(clobber (reg:CC FLAGS_REG))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
@@ -16522,9 +16719,10 @@
operands[1] = gen_lowpart (SImode, operands[1]);
})
+;; Do not split instructions with mask regs.
(define_split
- [(set (match_operand 0 "register_operand")
- (not (match_operand 1 "register_operand")))]
+ [(set (match_operand 0 "general_reg_operand")
+ (not (match_operand 1 "general_reg_operand")))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
&& (GET_MODE (operands[0]) == HImode
|| (GET_MODE (operands[0]) == QImode