summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md304
1 files changed, 233 insertions, 71 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c67ed31923e..3307b081aaa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -312,6 +312,22 @@
(XMM13_REG 50)
(XMM14_REG 51)
(XMM15_REG 52)
+ (XMM16_REG 53)
+ (XMM17_REG 54)
+ (XMM18_REG 55)
+ (XMM19_REG 56)
+ (XMM20_REG 57)
+ (XMM21_REG 58)
+ (XMM22_REG 59)
+ (XMM23_REG 60)
+ (XMM24_REG 61)
+ (XMM25_REG 62)
+ (XMM26_REG 63)
+ (XMM27_REG 64)
+ (XMM28_REG 65)
+ (XMM29_REG 66)
+ (XMM30_REG 67)
+ (XMM31_REG 68)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -350,7 +366,8 @@
;; Main data type used by the insn
(define_attr "mode"
- "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
+ "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF,
+ V2DF,V2SF,V1DF,V8DF"
(const_string "unknown"))
;; The CPU unit operations uses.
@@ -471,10 +488,13 @@
(const_int 0)))
;; Prefix used: original, VEX or maybe VEX.
-(define_attr "prefix" "orig,vex,maybe_vex"
- (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
- (const_string "vex")
- (const_string "orig")))
+(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
+ (cond [(eq_attr "mode" "OI,V8SF,V4DF")
+ (const_string "vex")
+ (eq_attr "mode" "XI,V16SF,V8DF")
+ (const_string "evex")
+ ]
+ (const_string "orig")))
;; VEX W bit is used.
(define_attr "prefix_vex_w" "" (const_int 0))
@@ -493,6 +513,9 @@
(symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
(symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
+;; 4-bytes evex prefix and 1 byte opcode.
+(define_attr "length_evex" "" (const_int 5))
+
;; Set when modrm byte is used.
(define_attr "modrm" ""
(cond [(eq_attr "type" "str,leave")
@@ -544,8 +567,17 @@
(plus (const_int 2)
(plus (attr "prefix_data16")
(attr "length_address")))
+ (ior (eq_attr "prefix" "evex")
+ (and (ior (eq_attr "prefix" "maybe_evex")
+ (eq_attr "prefix" "maybe_vex"))
+ (match_test "TARGET_AVX512F")))
+ (plus (attr "length_evex")
+ (plus (attr "length_immediate")
+ (plus (attr "modrm")
+ (attr "length_address"))))
(ior (eq_attr "prefix" "vex")
- (and (eq_attr "prefix" "maybe_vex")
+ (and (ior (eq_attr "prefix" "maybe_vex")
+ (eq_attr "prefix" "maybe_evex"))
(match_test "TARGET_AVX")))
(plus (attr "length_vex")
(plus (attr "length_immediate")
@@ -663,7 +695,7 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi2,fma4,fma"
+ avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
(const_string "base"))
(define_attr "enabled" ""
@@ -689,6 +721,10 @@
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
(eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
(eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
+ (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
+ (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
+ (eq_attr "isa" "fma_avx512f")
+ (symbol_ref "TARGET_FMA || TARGET_AVX512F")
]
(const_int 1)))
@@ -924,10 +960,12 @@
;; SSE instruction suffix for various modes
(define_mode_attr ssemodesuffix
[(SF "ss") (DF "sd")
+ (V16SF "ps") (V8DF "pd")
(V8SF "ps") (V4DF "pd")
(V4SF "ps") (V2DF "pd")
(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
- (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")])
+ (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
+ (V64QI "b") (V16SI "d") (V8DI "q")])
;; SSE vector suffix for floating point modes
(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
@@ -1649,6 +1687,12 @@
;; Move instructions.
+(define_expand "movxi"
+ [(set (match_operand:XI 0 "nonimmediate_operand")
+ (match_operand:XI 1 "general_operand"))]
+ "TARGET_AVX512F"
+ "ix86_expand_move (XImode, operands); DONE;")
+
;; Reload patterns to support multi-word load/store
;; with non-offsetable address.
(define_expand "reload_noff_store"
@@ -1746,6 +1790,30 @@
(set_attr "mode" "<MODE>")
(set_attr "length_immediate" "1")])
+(define_insn "*movxi_internal_avx512f"
+ [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:XI 1 "vector_move_operand" "C ,xm,x"))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ if (misaligned_operand (operands[0], XImode)
+ || misaligned_operand (operands[1], XImode))
+ return "vmovdqu32\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqa32\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "*movoi_internal_avx"
[(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m")
(match_operand:OI 1 "vector_move_operand" "C ,xm,x"))]
@@ -1857,9 +1925,9 @@
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*x,m ,*x,*Yj,*x,r ,*Yj ,*Yn"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -1896,6 +1964,8 @@
return "%vmovq\t{%1, %0|%0, %1}";
case MODE_TI:
return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
case MODE_V2SF:
gcc_assert (!TARGET_AVX);
@@ -1989,7 +2059,10 @@
(cond [(eq_attr "alternative" "2")
(const_string "SI")
(eq_attr "alternative" "12,13")
- (cond [(ior (not (match_test "TARGET_SSE2"))
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "XI")
+ (ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_AVX")
@@ -2018,9 +2091,9 @@
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?r,?*Yi")
+ "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,*x,r"))]
+ "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2038,6 +2111,8 @@
return "%vmovd\t{%1, %0|%0, %1}";
case MODE_TI:
return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_XI:
+ return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
@@ -2116,7 +2191,10 @@
(cond [(eq_attr "alternative" "2,3")
(const_string "DI")
(eq_attr "alternative" "6,7")
- (cond [(ior (not (match_test "TARGET_SSE2"))
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "XI")
+ (ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_AVX")
@@ -2255,7 +2333,7 @@
"TARGET_LP64 && ix86_check_movabs (insn, 0)"
"@
movabs{<imodesuffix>}\t{%1, %P0|[%P0], %1}
- mov{<imodesuffix>}\t{%1, %a0|%a0, %1}"
+ mov{<imodesuffix>}\t{%1, %a0|<iptrsize> PTR %a0, %1}"
[(set_attr "type" "imov")
(set_attr "modrm" "0,*")
(set_attr "length_address" "8,0")
@@ -2269,7 +2347,7 @@
"TARGET_LP64 && ix86_check_movabs (insn, 1)"
"@
movabs{<imodesuffix>}\t{%P1, %0|%0, [%P1]}
- mov{<imodesuffix>}\t{%a1, %0|%0, %a1}"
+ mov{<imodesuffix>}\t{%a1, %0|%0, <iptrsize> PTR %a1}"
[(set_attr "type" "imov")
(set_attr "modrm" "0,*")
(set_attr "length_address" "8,0")
@@ -2703,9 +2781,9 @@
;; Possible store forwarding (partial memory) stall in alternative 4.
(define_insn "*movdf_internal"
[(set (match_operand:DF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi")
+ "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi")
(match_operand:DF 1 "general_operand"
- "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yj,r"))]
+ "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2750,6 +2828,8 @@
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V8DF:
+ return "vmovapd\t{%g1, %g0|%g0, %g1}";
case MODE_V2DF:
return "%vmovapd\t{%1, %0|%0, %1}";
@@ -2824,6 +2904,8 @@
(eq_attr "alternative" "9,13")
(cond [(not (match_test "TARGET_SSE2"))
(const_string "V4SF")
+ (match_test "TARGET_AVX512F")
+ (const_string "XI")
(match_test "TARGET_AVX")
(const_string "V2DF")
(match_test "optimize_function_for_size_p (cfun)")
@@ -2839,7 +2921,10 @@
/* movaps is one byte shorter for non-AVX targets. */
(eq_attr "alternative" "10,14")
- (cond [(ior (not (match_test "TARGET_SSE2"))
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "V8DF")
+ (ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
@@ -2872,9 +2957,9 @@
(define_insn "*movsf_internal"
[(set (match_operand:SF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
+ "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
(match_operand:SF 1 "general_operand"
- "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,Yj,r ,*y ,m ,*y,*Yn,r"))]
+ "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2907,6 +2992,8 @@
return "vmovss\t{%1, %0, %0|%0, %0, %1}";
return "%vmovss\t{%1, %0|%0, %1}";
+ case MODE_V16SF:
+ return "vmovaps\t{%g1, %g0|%g0, %g1}";
case MODE_V4SF:
return "%vmovaps\t{%1, %0|%0, %1}";
@@ -2960,6 +3047,8 @@
(eq_attr "alternative" "5")
(cond [(not (match_test "TARGET_SSE2"))
(const_string "V4SF")
+ (match_test "TARGET_AVX512F")
+ (const_string "V16SF")
(match_test "TARGET_AVX")
(const_string "V4SF")
(match_test "optimize_function_for_size_p (cfun)")
@@ -2979,10 +3068,15 @@
of instructions to load just part of the register. It is
better to maintain the whole registers in single format
to avoid problems on using packed logical operations. */
- (and (eq_attr "alternative" "6")
- (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
- (match_test "TARGET_SSE_SPLIT_REGS")))
- (const_string "V4SF")
+ (eq_attr "alternative" "6")
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "V16SF")
+ (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (match_test "TARGET_SSE_SPLIT_REGS"))
+ (const_string "V4SF")
+ ]
+ (const_string "SF"))
]
(const_string "SF")))])
@@ -4596,10 +4690,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
&& TARGET_INTER_UNIT_CONVERSIONS
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_split
@@ -4608,10 +4699,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float:MODEF (match_dup 2)))])
@@ -4697,10 +4785,7 @@
(clobber (match_operand:SI 2 "memory_operand"))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
rtx op1 = operands[1];
@@ -4740,10 +4825,7 @@
(clobber (match_operand:SI 2 "memory_operand"))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
@@ -4764,10 +4846,7 @@
(float:MODEF (match_operand:SI 1 "register_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
rtx op1 = operands[1];
@@ -4810,10 +4889,7 @@
(float:MODEF (match_operand:SI 1 "memory_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(const_int 0)]
{
operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
@@ -4872,10 +4948,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit"
@@ -4905,10 +4978,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float:MODEF (match_dup 2)))])
@@ -4917,10 +4987,7 @@
(float:MODEF (match_operand:SWI48 1 "memory_operand")))
(clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
- && reload_completed
- && (SSE_REG_P (operands[0])
- || (GET_CODE (operands[0]) == SUBREG
- && SSE_REG_P (SUBREG_REG (operands[0]))))"
+ && reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp"
@@ -4968,6 +5035,46 @@
&& reload_completed"
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+;; Avoid partial SSE register dependency stalls
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand")
+ (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun)
+ && reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 0)
+ (vec_merge:<ssevecmode>
+ (vec_duplicate:<ssevecmode>
+ (float:MODEF (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand")
+ (float:MODEF (match_operand:DI 1 "nonimmediate_operand")))]
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun)
+ && reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 0)
+ (vec_merge:<ssevecmode>
+ (vec_duplicate:<ssevecmode>
+ (float:MODEF (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
+})
+
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */
@@ -5024,6 +5131,18 @@
&& reload_completed"
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
+ [(set (match_operand:MODEF 0 "register_operand")
+ (unsigned_float:MODEF
+ (match_operand:SWI12 1 "nonimmediate_operand")))]
+ "!TARGET_64BIT
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+{
+ operands[1] = convert_to_mode (SImode, operands[1], 1);
+ emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
+ DONE;
+})
+
;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
@@ -12278,11 +12397,33 @@
(set (attr "length")
(symbol_ref "TARGET_X32 ? 15 : 16"))])
+(define_insn "*tls_global_dynamic_64_largepic"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call:DI
+ (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
+ (match_operand:DI 3 "immediate_operand" "i")))
+ (match_operand 4)))
+ (unspec:DI [(match_operand 1 "tls_symbolic_operand")]
+ UNSPEC_TLS_GD)]
+ "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+ && GET_CODE (operands[3]) == CONST
+ && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
+ && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
+{
+ output_asm_insn
+ ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+ output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
+ output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
+ return "call\t{*%%rax|rax}";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "22")])
+
(define_expand "tls_global_dynamic_64_<mode>"
[(parallel
[(set (match_operand:P 0 "register_operand")
(call:P
- (mem:QI (match_operand 2 "constant_call_address_operand"))
+ (mem:QI (match_operand 2))
(const_int 0)))
(unspec:P [(match_operand 1 "tls_symbolic_operand")]
UNSPEC_TLS_GD)])]
@@ -12340,11 +12481,32 @@
[(set_attr "type" "multi")
(set_attr "length" "12")])
+(define_insn "*tls_local_dynamic_base_64_largepic"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call:DI
+ (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
+ (match_operand:DI 2 "immediate_operand" "i")))
+ (match_operand 3)))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+ "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+ && GET_CODE (operands[2]) == CONST
+ && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
+ && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
+{
+ output_asm_insn
+ ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+ output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
+ output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
+ return "call\t{*%%rax|rax}";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "22")])
+
(define_expand "tls_local_dynamic_base_64_<mode>"
[(parallel
[(set (match_operand:P 0 "register_operand")
(call:P
- (mem:QI (match_operand 1 "constant_call_address_operand"))
+ (mem:QI (match_operand 1))
(const_int 0)))
(unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
"TARGET_64BIT")
@@ -12629,10 +12791,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "*fop_<mode>_comm_sse"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ [(set (match_operand:MODEF 0 "register_operand" "=x,v")
(match_operator:MODEF 3 "binary_fp_operator"
- [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
- (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& COMMUTATIVE_ARITH_P (operands[3])
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -14884,7 +15046,7 @@
[(use (match_operand:SI 0 "register_operand"))
(use (match_operand:XF 1 "register_operand"))]
"TARGET_USE_FANCY_MATH_387
- && TARGET_C99_FUNCTIONS"
+ && ix86_libc_has_function (function_c99_misc)"
{
rtx mask = GEN_INT (0x45);
rtx val = GEN_INT (0x05);
@@ -14910,7 +15072,7 @@
[(use (match_operand:SI 0 "register_operand"))
(use (match_operand:MODEF 1 "nonimmediate_operand"))]
"TARGET_USE_FANCY_MATH_387
- && TARGET_C99_FUNCTIONS
+ && ix86_libc_has_function (function_c99_misc)
&& !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
{
rtx mask = GEN_INT (0x45);
@@ -15977,10 +16139,10 @@
;; are undefined in this condition, we're certain this is correct.
(define_insn "<code><mode>3"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ [(set (match_operand:MODEF 0 "register_operand" "=x,v")
(smaxmin:MODEF
- (match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
- (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")))]
+ (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
"@
<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}