summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/athlon.md803
-rw-r--r--gcc/config/i386/att.h7
-rw-r--r--gcc/config/i386/bsd.h6
-rw-r--r--gcc/config/i386/freebsd-aout.h3
-rw-r--r--gcc/config/i386/i386-coff.h7
-rw-r--r--gcc/config/i386/i386-interix.h29
-rw-r--r--gcc/config/i386/i386-protos.h6
-rw-r--r--gcc/config/i386/i386.c709
-rw-r--r--gcc/config/i386/i386.h87
-rw-r--r--gcc/config/i386/i386.md1422
-rw-r--r--gcc/config/i386/k6.md2
-rw-r--r--gcc/config/i386/lynx-ng.h7
-rw-r--r--gcc/config/i386/lynx.h7
-rw-r--r--gcc/config/i386/pentium.md2
-rw-r--r--gcc/config/i386/ppro.md4
-rw-r--r--gcc/config/i386/sco5.h6
-rw-r--r--gcc/config/i386/t-cygwin4
-rw-r--r--gcc/config/i386/t-interix5
-rw-r--r--gcc/config/i386/vxi386.h66
-rw-r--r--gcc/config/i386/winnt.c2
-rw-r--r--gcc/config/i386/xm-i386-interix.h32
21 files changed, 2287 insertions, 929 deletions
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md
index 548f2adf422..719046fcd61 100644
--- a/gcc/config/i386/athlon.md
+++ b/gcc/config/i386/athlon.md
@@ -1,34 +1,5 @@
;; AMD Athlon Scheduling
-;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
-;; This file is part of GNU CC.
-;;
-;; GNU CC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
-;;
-;; GNU CC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GNU CC; see the file COPYING. If not, write to
-;; the Free Software Foundation, 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA. */
-(define_attr "athlon_decode" "direct,vector"
- (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
- (const_string "vector")
- (and (eq_attr "type" "push")
- (match_operand 1 "memory_operand" ""))
- (const_string "vector")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load,store")
- (eq_attr "mode" "XF")))
- (const_string "vector")]
- (const_string "direct")))
-
;; The Athlon does contain three pipelined FP units, three integer units and
;; three address generation units.
;;
@@ -46,161 +17,649 @@
;; The load/store queue unit is not attached to the schedulers but
;; communicates with all the execution units separately instead.
-(define_function_unit "athlon_vectordec" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "vector"))
- 1 1)
-
-(define_function_unit "athlon_directdec" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "direct"))
- 1 1)
-
-(define_function_unit "athlon_vectordec" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "direct"))
- 1 1 [(eq_attr "athlon_decode" "vector")])
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
- 1 1)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "str"))
- 15 15)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "imul"))
- 5 0)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "idiv"))
- 42 0)
-
-(define_function_unit "athlon_muldiv" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "imul"))
- 5 0)
-
-(define_function_unit "athlon_muldiv" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "idiv"))
- 42 42)
-
-(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
- (cond [(eq_attr "type" "fop,fcmp,fistp")
- (const_string "add")
- (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
- (const_string "mul")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
- (const_string "store")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
- (const_string "any")
+(define_attr "athlon_decode" "direct,vector"
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
+ (const_string "vector")
+ (and (eq_attr "type" "push")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")
(and (eq_attr "type" "fmov")
- (ior (match_operand:SI 1 "register_operand" "")
- (match_operand 1 "immediate_operand" "")))
- (const_string "store")
- (eq_attr "type" "fmov")
- (const_string "muladd")]
- (const_string "none")))
-
-;; We use latencies 1 for definitions. This is OK to model colisions
-;; in execution units. The real latencies are modeled in the "fp" pipeline.
-
-;; fsin, fcos: 96-192
-;; fsincos: 107-211
-;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fpspc"))
- 100 1)
-
-;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fdiv"))
- 24 1)
-
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fop,fmul,fistp"))
- 4 1)
-
-;; XFmode loads are slow.
-;; XFmode store is slow too (8 cycles), but we don't need to model it, because
-;; there are no dependent instructions.
+ (and (eq_attr "memory" "load,store")
+ (eq_attr "mode" "XF")))
+ (const_string "vector")]
+ (const_string "direct")))
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load")
- (eq_attr "mode" "XF"))))
- 10 1)
+;;
+;; decode0 decode1 decode2
+;; \ | /
+;; instruction control unit (72 entry scheduler)
+;; | |
+;; integer scheduler (18) stack map
+;; / | | | | \ stack rename
+;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler
+;; | agu0 | agu1 agu2 register file
+;; | \ | | / | | |
+;; \ /\ | / fadd fmul fstore
+;; \ / \ | / fadd fmul fstore
+;; imul load/store (2x) fadd fmul fstore
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fmov,fsgn"))
- 2 1)
+(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_cpu_unit "athlon-decodev" "athlon")
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow troughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependnece between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too. Vector decoded instructions then can't be issued when
+;; modeled as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "athlon-decode2" "athlon-decode0")
+(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
+(define_reservation "athlon-vector" "nothing,athlon-decodev")
+(define_reservation "athlon-direct0" "nothing,athlon-decode0")
+(define_reservation "athlon-direct" "nothing,
+ (athlon-decode0 | athlon-decode1
+ | athlon-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
+ | (nothing,(athlon-decode0 + athlon-decode1))
+ | (nothing,(athlon-decode1 + athlon-decode2)))")
-;; fcmp and ftst instructions
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fcmp")
- (eq_attr "athlon_decode" "direct")))
- 3 1)
+;; Agu and ieu unit results in extremly large automatons and
+;; in our approximation they are hardly filled in. Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations. Skip the models.
-;; fcmpi instructions.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fcmp")
- (eq_attr "athlon_decode" "vector")))
- 3 1)
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+(define_cpu_unit "athlon-ieu0" "athlon")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing,nothing")
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fcmov"))
- 7 1)
+(define_cpu_unit "athlon-mult" "athlon_mult")
-(define_function_unit "athlon_fp_mul" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "mul"))
- 1 1)
+(define_cpu_unit "athlon-load0" "athlon_load")
+(define_cpu_unit "athlon-load1" "athlon_load")
+(define_reservation "athlon-load" "athlon-agu,
+ (athlon-load0 | athlon-load1)")
+(define_reservation "athlon-store" "nothing")
-(define_function_unit "athlon_fp_add" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "add"))
- 1 1)
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fadd | athlon-fmul | athlon-fstore)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
-(define_function_unit "athlon_fp_muladd" 2 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "muladd,mul,add"))
- 1 1)
-(define_function_unit "athlon_fp_store" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "store"))
- 1 1)
+;; Jump instructions are executed in the branch unit compltetely transparent to us
+(define_insn_reservation "athlon_branch" 0
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "ibr"))
+ "athlon-direct")
+(define_insn_reservation "athlon_call" 0
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "call,callv"))
+ "athlon-vector")
-;; We don't need to model the Address Generation Unit, since we don't model
-;; the re-order buffer yet and thus we never schedule more than three operations
-;; at time. Later we may want to experiment with MD_SCHED macros modeling the
-;; decoders independently on the functional units.
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "push"))
+ "athlon-direct,nothing,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "pop"))
+ "athlon-vector,athlon-ieu,athlon-load")
+(define_insn_reservation "athlon_pop_k8" 3
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "pop"))
+ "athlon-double,athlon-ieu,athlon-load")
+(define_insn_reservation "athlon_leave" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "leave"))
+ "athlon-vector,athlon-load")
+(define_insn_reservation "athlon_leave_k8" 3
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "leave"))
+ "athlon-double,athlon-load")
-;(define_function_unit "athlon_agu" 3 0
-; (and (eq_attr "cpu" "athlon")
-; (and (eq_attr "memory" "!none")
-; (eq_attr "athlon_fpunits" "none")))
-; 1 1)
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu")
-;; Model load unit to avoid too long sequences of loads. We don't need to
-;; model store queue, since it is hardly going to be bottleneck.
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
+;; ??? Widening multiply is vector or double.
+(define_insn_reservation "athlon_imul_k8_DI" 4
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_k8" 3
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_mem" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8_DI" 7
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "load,both"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8" 6
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
+(define_insn_reservation "athlon_idiv" 42
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,athlon-ieu*42")
+(define_insn_reservation "athlon_idiv_mem" 45
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu*42")
+(define_insn_reservation "athlon_str" 15
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both,store")))
+ "athlon-vector,athlon-load,athlon-ieu*10")
-(define_function_unit "athlon_load" 2 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "memory" "load,both"))
- 1 1)
+(define_insn_reservation "athlon_idirect" 1
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_loadmov" 3
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load")
+(define_insn_reservation "athlon_idirect_load" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_movstore" 1
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "store")))
+ "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_idirect_both" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_both" 6
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_idirect_store" 1
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_store" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,athlon-ieu,athlon-ieu,
+ athlon-store")
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fany")
+(define_insn_reservation "athlon_fldxf_k8" 13
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fany")
+(define_insn_reservation "athlon_fld" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fany,nothing,athlon-load")
+(define_insn_reservation "athlon_fld_k8" 4
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fany,athlon-load")
+(define_insn_reservation "athlon_fstxf" 10
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fstore")
+(define_insn_reservation "athlon_fstxf_k8" 8
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fstore")
+(define_insn_reservation "athlon_fst" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,athlon-fstore,nothing,athlon-store")
+(define_insn_reservation "athlon_fst_k8" 2
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,athlon-fstore,athlon-store")
+(define_insn_reservation "athlon_fist" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fistp"))
+ "athlon-direct,athlon-fstore,nothing")
+(define_insn_reservation "athlon_fmov" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fmov"))
+ "athlon-direct,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fadd_load_k8" 6
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fop"))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fmul_load_k8" 6
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fmul"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fsgn"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load_k8" 13
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_k8" 11
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "fpspc")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fpspc" 100
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fpspc"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_fcmov_load" 10
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fcmov" 7
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_fcmov_load_k8" 17
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fcmov_k8" 15
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_fcomi_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "type" "fcmp")))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 5
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fcmp"))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_fxch" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "fxch"))
+ "athlon-direct,athlon-fany")
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+(define_insn_reservation "athlon_movlpd_load" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-direct,athlon-load")
+(define_insn_reservation "athlon_movaps_load" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-double,athlon-load")
+(define_insn_reservation "athlon_movss_load" 3
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "SF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-double,athlon-load")
+(define_insn_reservation "athlon_mmxsseld" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fany,athlon-load")
+(define_insn_reservation "athlon_mmxssest" 3
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-double,athlon-store")
+(define_insn_reservation "athlon_mmxssest_k8" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,athlon-store")
+(define_insn_reservation "athlon_movaps" 2
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "mode" "V4SF,V2DF")))
+ "athlon-double,athlon-faddmul,athlon-faddmul")
+(define_insn_reservation "athlon_mmxssemov" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "mmxmov,ssemov"))
+ "athlon-direct,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 6
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "mmxmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "type" "mmxmul"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 5
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "unit" "mmx")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (eq_attr "unit" "mmx"))
+ "athlon-direct,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well. The latnecy
+;; is same as for i387 operations for scalar operations
+(define_insn_reservation "athlon_sselog_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_sselog_load_k8" 5
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_sselog" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sselog"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_sselog_k8" 3
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "sselog"))
+ "athlon-double,athlon-fmul")
+(define_insn_reservation "athlon_ssecmp_load" 5
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssecmp"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_k8" 3
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "ssecmp"))
+ "athlon-double,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load_k8" 6
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load_k8" 7
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sseadd"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_k8" 4
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "sseadd"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssecvt_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_ssecvt_load_k8" 4
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_ssecvt" 2
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssecvt")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_ssecvtvector_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecvt")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_ssecvtvector_load_k8" 5
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssecvt")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_ssecvtvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssecvt"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssecvtvector_k8" 3
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "ssecvt"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssemul_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssemul_load_k8" 6
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load_k8" 7
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssemul"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_k8" 5
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "ssemul"))
+ "athlon-double,athlon-fmul")
+(define_insn_reservation "athlon_ssediv_load" 19
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssediv_load_k8" 18
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssediv" 16
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_ssedivvector_load" 32
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssedivvector_load_k8" 35
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssedivvector" 29
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssediv"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_ssedivvector_k8" 33
+ (and (eq_attr "cpu" "k8")
+ (eq_attr "type" "ssediv"))
+ "athlon-vector,athlon-fmul")
diff --git a/gcc/config/i386/att.h b/gcc/config/i386/att.h
index 70ae1641365..8d9930852f7 100644
--- a/gcc/config/i386/att.h
+++ b/gcc/config/i386/att.h
@@ -90,13 +90,6 @@ do \
#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
sprintf ((BUF), "%s%s%ld", LOCAL_LABEL_PREFIX, (PREFIX), (long)(NUMBER))
-/* This is how to output an internal numbered label where
- PREFIX is the class of label and NUM is the number within the class. */
-
-#undef ASM_OUTPUT_INTERNAL_LABEL
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, "%s%s%d:\n", LOCAL_LABEL_PREFIX, PREFIX, NUM)
-
/* The prefix to add to user-visible assembler symbols. */
#undef USER_LABEL_PREFIX
diff --git a/gcc/config/i386/bsd.h b/gcc/config/i386/bsd.h
index 69ad1688bfb..9f396ec46ae 100644
--- a/gcc/config/i386/bsd.h
+++ b/gcc/config/i386/bsd.h
@@ -88,12 +88,6 @@ Boston, MA 02111-1307, USA. */
#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
sprintf ((BUF), "*%s%ld", (PREFIX), (long)(NUMBER))
-/* This is how to output an internal numbered label where
- PREFIX is the class of label and NUM is the number within the class. */
-
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, "%s%d:\n", PREFIX, NUM)
-
/* The prefix to add to user-visible assembler symbols. */
#define USER_LABEL_PREFIX "_"
diff --git a/gcc/config/i386/freebsd-aout.h b/gcc/config/i386/freebsd-aout.h
index a2b616e700b..646cf13b5d8 100644
--- a/gcc/config/i386/freebsd-aout.h
+++ b/gcc/config/i386/freebsd-aout.h
@@ -198,7 +198,8 @@ do { \
ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \
} while (0)
-#define ASM_SPEC " %| %{fpic:-k} %{fPIC:-k}"
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+#define ASM_SPEC "%{fpic:-k} %{fPIC:-k}"
#define LINK_SPEC \
"%{p:%e`-p' not supported; use `-pg' and gprof(1)} \
%{shared:-Bshareable} \
diff --git a/gcc/config/i386/i386-coff.h b/gcc/config/i386/i386-coff.h
index e8c5de9c65c..a4bb04ad729 100644
--- a/gcc/config/i386/i386-coff.h
+++ b/gcc/config/i386/i386-coff.h
@@ -60,11 +60,4 @@ Boston, MA 02111-1307, USA. */
#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER))
-/* This is how to output an internal numbered label where
- PREFIX is the class of label and NUM is the number within the class. */
-
-#undef ASM_OUTPUT_INTERNAL_LABEL
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, ".%s%d:\n", PREFIX, NUM)
-
/* end of i386-coff.h */
diff --git a/gcc/config/i386/i386-interix.h b/gcc/config/i386/i386-interix.h
index d309087217d..7e2290f2d3a 100644
--- a/gcc/config/i386/i386-interix.h
+++ b/gcc/config/i386/i386-interix.h
@@ -35,11 +35,12 @@ Boston, MA 02111-1307, USA. */
/* By default, target has a 80387, uses IEEE compatible arithmetic,
and returns float values in the 387 and needs stack probes
- We also align doubles to 64-bits for MSVC default compatibility */
+ We also align doubles to 64-bits for MSVC default compatibility
+ We do bitfields MSVC-compatably by default, too. */
#undef TARGET_SUBTARGET_DEFAULT
#define TARGET_SUBTARGET_DEFAULT \
(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE | \
- MASK_ALIGN_DOUBLE)
+ MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
#undef TARGET_CPU_DEFAULT
#define TARGET_CPU_DEFAULT 2 /* 486 */
@@ -243,6 +244,28 @@ Boston, MA 02111-1307, USA. */
#define TARGET_NOP_FUN_DLLIMPORT 1
#define drectve_section() /* nothing */
+/* Objective C has its own packing rules...
+ Objc tries to parallel the code in stor-layout.c at runtime
+ (see libobjc/encoding.c). This (compile-time) packing info isn't
+ available at runtime, so it's hopeless to try.
+
+ And if the user tries to set the flag for objc, give an error
+ so he has some clue. */
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do { \
+ if (strcmp (lang_hooks.name, "GNU Objective-C") == 0) \
+ { \
+ if ((target_flags & MASK_MS_BITFIELD_LAYOUT) != 0 \
+ && (target_flags_explicit & MASK_MS_BITFIELD_LAYOUT) != 0) \
+ { \
+ error ("ms-bitfields not supported for objc"); \
+ } \
+ target_flags &= ~MASK_MS_BITFIELD_LAYOUT; \
+ } \
+} while (0)
+
#define EH_FRAME_IN_DATA_SECTION
#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rdata,\"r\""
@@ -273,8 +296,6 @@ while (0)
#define HOST_PTR_AS_INT unsigned long
#define PCC_BITFIELD_TYPE_MATTERS 1
-#define PCC_BITFIELD_TYPE_TEST TYPE_NATIVE(rec)
-#define GROUP_BITFIELDS_BY_ALIGN TYPE_NATIVE(rec)
/* The following two flags are usually "off" for i386, because some non-gnu
tools (for the i386) don't handle them. However, we don't have that
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4afdf668bd8..e403950ab32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -41,6 +41,9 @@ extern int ix86_aligned_p PARAMS ((rtx));
extern int standard_80387_constant_p PARAMS ((rtx));
extern int standard_sse_constant_p PARAMS ((rtx));
extern int symbolic_reference_mentioned_p PARAMS ((rtx));
+extern bool extended_reg_mentioned_p PARAMS ((rtx));
+extern bool x86_extended_QIreg_mentioned_p PARAMS ((rtx));
+extern bool x86_extended_reg_mentioned_p PARAMS ((rtx));
extern int any_fp_register_operand PARAMS ((rtx, enum machine_mode));
extern int register_and_not_any_fp_reg_operand PARAMS ((rtx, enum machine_mode));
@@ -63,6 +66,7 @@ extern int initial_exec_symbolic_operand PARAMS ((rtx, enum machine_mode));
extern int local_exec_symbolic_operand PARAMS ((rtx, enum machine_mode));
extern int pic_symbolic_operand PARAMS ((rtx, enum machine_mode));
extern int call_insn_operand PARAMS ((rtx, enum machine_mode));
+extern int sibcall_insn_operand PARAMS ((rtx, enum machine_mode));
extern int constant_call_address_operand PARAMS ((rtx, enum machine_mode));
extern int const0_operand PARAMS ((rtx, enum machine_mode));
extern int const1_operand PARAMS ((rtx, enum machine_mode));
@@ -137,7 +141,7 @@ extern void ix86_expand_branch PARAMS ((enum rtx_code, rtx));
extern int ix86_expand_setcc PARAMS ((enum rtx_code, rtx));
extern int ix86_expand_int_movcc PARAMS ((rtx[]));
extern int ix86_expand_fp_movcc PARAMS ((rtx[]));
-extern void ix86_expand_call PARAMS ((rtx, rtx, rtx, rtx, rtx));
+extern void ix86_expand_call PARAMS ((rtx, rtx, rtx, rtx, rtx, int));
extern void x86_initialize_trampoline PARAMS ((rtx, rtx, rtx));
extern rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
extern void ix86_split_long_move PARAMS ((rtx[]));
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 42f6d93d3c3..2eaa1c54875 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21,6 +21,8 @@ Boston, MA 02111-1307, USA. */
#include "config.h"
#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
#include "rtl.h"
#include "tree.h"
#include "tm_p.h"
@@ -55,9 +57,9 @@ struct processor_costs size_cost = { /* costs for tunning for size */
3, /* cost of a lea instruction */
2, /* variable shift costs */
3, /* constant shift costs */
- 3, /* cost of starting a multiply */
+ {3, 3, 3, 3, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 3, /* cost of a divide/mod */
+ {3, 3, 3, 3, 5}, /* cost of a divide/mod */
3, /* cost of movsx */
3, /* cost of movzx */
0, /* "large" insn */
@@ -84,6 +86,7 @@ struct processor_costs size_cost = { /* costs for tunning for size */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 1, /* Branch cost */
2, /* cost of FADD and FSUB insns. */
2, /* cost of FMUL instruction. */
2, /* cost of FDIV instruction. */
@@ -99,9 +102,9 @@ struct processor_costs i386_cost = { /* 386 specific costs */
1, /* cost of a lea instruction */
3, /* variable shift costs */
2, /* constant shift costs */
- 6, /* cost of starting a multiply */
+ {6, 6, 6, 6, 6}, /* cost of starting a multiply */
1, /* cost of multiply per each bit set */
- 23, /* cost of a divide/mod */
+ {23, 23, 23, 23, 23}, /* cost of a divide/mod */
3, /* cost of movsx */
2, /* cost of movzx */
15, /* "large" insn */
@@ -128,6 +131,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 1, /* Branch cost */
23, /* cost of FADD and FSUB insns. */
27, /* cost of FMUL instruction. */
88, /* cost of FDIV instruction. */
@@ -142,9 +146,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */
1, /* cost of a lea instruction */
3, /* variable shift costs */
2, /* constant shift costs */
- 12, /* cost of starting a multiply */
+ {12, 12, 12, 12, 12}, /* cost of starting a multiply */
1, /* cost of multiply per each bit set */
- 40, /* cost of a divide/mod */
+ {40, 40, 40, 40, 40}, /* cost of a divide/mod */
3, /* cost of movsx */
2, /* cost of movzx */
15, /* "large" insn */
@@ -171,6 +175,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 1, /* Branch cost */
8, /* cost of FADD and FSUB insns. */
16, /* cost of FMUL instruction. */
73, /* cost of FDIV instruction. */
@@ -185,9 +190,9 @@ struct processor_costs pentium_cost = {
1, /* cost of a lea instruction */
4, /* variable shift costs */
1, /* constant shift costs */
- 11, /* cost of starting a multiply */
+ {11, 11, 11, 11, 11}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 25, /* cost of a divide/mod */
+ {25, 25, 25, 25, 25}, /* cost of a divide/mod */
3, /* cost of movsx */
2, /* cost of movzx */
8, /* "large" insn */
@@ -214,6 +219,7 @@ struct processor_costs pentium_cost = {
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 2, /* Branch cost */
3, /* cost of FADD and FSUB insns. */
3, /* cost of FMUL instruction. */
39, /* cost of FDIV instruction. */
@@ -228,9 +234,9 @@ struct processor_costs pentiumpro_cost = {
1, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
- 4, /* cost of starting a multiply */
+ {4, 4, 4, 4, 4}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 17, /* cost of a divide/mod */
+ {17, 17, 17, 17, 17}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
8, /* "large" insn */
@@ -257,6 +263,7 @@ struct processor_costs pentiumpro_cost = {
3, /* MMX or SSE register to integer */
32, /* size of prefetch block */
6, /* number of parallel prefetches */
+ 2, /* Branch cost */
3, /* cost of FADD and FSUB insns. */
5, /* cost of FMUL instruction. */
56, /* cost of FDIV instruction. */
@@ -271,9 +278,9 @@ struct processor_costs k6_cost = {
2, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
- 3, /* cost of starting a multiply */
+ {3, 3, 3, 3, 3}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 18, /* cost of a divide/mod */
+ {18, 18, 18, 18, 18}, /* cost of a divide/mod */
2, /* cost of movsx */
2, /* cost of movzx */
8, /* "large" insn */
@@ -300,6 +307,7 @@ struct processor_costs k6_cost = {
6, /* MMX or SSE register to integer */
32, /* size of prefetch block */
1, /* number of parallel prefetches */
+ 1, /* Branch cost */
2, /* cost of FADD and FSUB insns. */
2, /* cost of FMUL instruction. */
56, /* cost of FDIV instruction. */
@@ -314,9 +322,9 @@ struct processor_costs athlon_cost = {
2, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
- 5, /* cost of starting a multiply */
+ {5, 5, 5, 5, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 42, /* cost of a divide/mod */
+ {18, 26, 42, 74, 74}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
8, /* "large" insn */
@@ -343,6 +351,7 @@ struct processor_costs athlon_cost = {
5, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
+ 2, /* Branch cost */
4, /* cost of FADD and FSUB insns. */
4, /* cost of FMUL instruction. */
24, /* cost of FDIV instruction. */
@@ -352,14 +361,58 @@ struct processor_costs athlon_cost = {
};
static const
+struct processor_costs k8_cost = {
+ 1, /* cost of an add instruction */
+ 2, /* cost of a lea instruction */
+ 1, /* variable shift costs */
+ 1, /* constant shift costs */
+ {3, 4, 3, 4, 5}, /* cost of starting a multiply */
+ 0, /* cost of multiply per each bit set */
+ {18, 26, 42, 74, 74}, /* cost of a divide/mod */
+ 1, /* cost of movsx */
+ 1, /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of loading integer registers */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 3, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ 4, /* cost of FADD and FSUB insns. */
+ 4, /* cost of FMUL instruction. */
+ 19, /* cost of FDIV instruction. */
+ 2, /* cost of FABS instruction. */
+ 2, /* cost of FCHS instruction. */
+ 35, /* cost of FSQRT instruction. */
+};
+
+static const
struct processor_costs pentium4_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
- 8, /* variable shift costs */
- 8, /* constant shift costs */
- 30, /* cost of starting a multiply */
+ 4, /* variable shift costs */
+ 4, /* constant shift costs */
+ {15, 15, 15, 15, 15}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 112, /* cost of a divide/mod */
+ {56, 56, 56, 56, 56}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
16, /* "large" insn */
@@ -386,6 +439,7 @@ struct processor_costs pentium4_cost = {
10, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
+ 2, /* Branch cost */
5, /* cost of FADD and FSUB insns. */
7, /* cost of FMUL instruction. */
43, /* cost of FDIV instruction. */
@@ -404,52 +458,66 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_K6 (1<<PROCESSOR_K6)
#define m_ATHLON (1<<PROCESSOR_ATHLON)
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
-const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
-const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
+const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
+const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
const int x86_zero_extend_with_and = m_486 | m_PENT;
-const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
+const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
-const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
-const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
-const int x86_3dnow_a = m_ATHLON;
-const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
+const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
+const int x86_3dnow_a = m_ATHLON_K8;
+const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
const int x86_branch_hints = m_PENT4;
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
const int x86_partial_reg_stall = m_PPRO;
const int x86_use_loop = m_K6;
-const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
+const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
const int x86_use_mov0 = m_K6;
const int x86_use_cltd = ~(m_PENT | m_K6);
const int x86_read_modify_write = ~m_PENT;
const int x86_read_modify = ~(m_PENT | m_PPRO);
const int x86_split_long_moves = m_PPRO;
-const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
+const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
const int x86_single_stringop = m_386 | m_PENT4;
const int x86_qimode_math = ~(0);
const int x86_promote_qi_regs = 0;
const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO;
-const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
-const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
-const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
-const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
-const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
-const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
-const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
-const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
-const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
-const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
+const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
+const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
+const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
+const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
+const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
+const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
+const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
+const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
+const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
+const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
const int x86_decompose_lea = m_PENT4;
const int x86_shift1 = ~m_486;
-const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
+const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
+const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
+/* Set for machines where the type and dependencies are resolved on SSE register
+ parts insetad of whole registers, so we may maintain just lower part of
+ scalar values in proper format leaving the upper part undefined. */
+const int x86_sse_partial_regs = m_ATHLON_K8;
+/* Athlon optimizes partial-register FPS special case, thus avoiding the
+ need for extra instructions beforehand */
+const int x86_sse_partial_regs_for_cvtsd2ss = 0;
+const int x86_sse_typeless_stores = m_ATHLON_K8;
+const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
+const int x86_use_ffreep = m_ATHLON_K8;
+const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
/* In case the avreage insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
epilogue code. */
-#define FAST_PROLOGUE_INSN_COUNT 30
+#define FAST_PROLOGUE_INSN_COUNT 20
/* Set by prologue expander and used by epilogue expander to determine
the style used. */
@@ -755,6 +823,7 @@ static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree));
static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree));
+bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
struct ix86_address
{
@@ -796,9 +865,12 @@ static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
static int ix86_comp_type_attributes PARAMS ((tree, tree));
static int ix86_fntype_regparm PARAMS ((tree));
const struct attribute_spec ix86_attribute_table[];
+static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
static int ix86_value_regno PARAMS ((enum machine_mode));
+static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
+static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
@@ -897,6 +969,9 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
+
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true
@@ -904,6 +979,9 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
+
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
@@ -937,17 +1015,17 @@ override_options ()
const int align_jump;
const int align_jump_max_skip;
const int align_func;
- const int branch_cost;
}
const processor_target_table[PROCESSOR_max] =
{
- {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
- {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
- {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
- {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
- {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
- {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
- {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
+ {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
+ {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
+ {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
+ {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
+ {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
+ {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
};
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
@@ -962,7 +1040,8 @@ override_options ()
PTA_MMX = 4,
PTA_PREFETCH_SSE = 8,
PTA_3DNOW = 16,
- PTA_3DNOW_A = 64
+ PTA_3DNOW_A = 64,
+ PTA_64BIT = 128
} flags;
}
const processor_alias_table[] =
@@ -994,6 +1073,8 @@ override_options ()
| PTA_3DNOW_A | PTA_SSE},
{"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
+ {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
};
int const pta_size = ARRAY_SIZE (processor_alias_table);
@@ -1033,7 +1114,7 @@ override_options ()
if (!ix86_cpu_string)
ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
if (!ix86_arch_string)
- ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
+ ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
if (ix86_cmodel_string != 0)
{
@@ -1099,6 +1180,8 @@ override_options ()
target_flags |= MASK_SSE2;
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
x86_prefetch_sse = true;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ error ("CPU you selected does not support x86-64 instruction set");
break;
}
@@ -1109,6 +1192,8 @@ override_options ()
if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
{
ix86_cpu = processor_alias_table[i].processor;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ error ("CPU you selected does not support x86-64 instruction set");
break;
}
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
@@ -1215,7 +1300,7 @@ override_options ()
}
/* Validate -mbranch-cost= value, or provide default. */
- ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
+ ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
if (ix86_branch_cost_string)
{
i = atoi (ix86_branch_cost_string);
@@ -1371,6 +1456,60 @@ const struct attribute_spec ix86_attribute_table[] =
{ NULL, 0, 0, false, false, false, NULL }
};
+/* If PIC, we cannot make sibling calls to global functions
+ because the PLT requires %ebx live.
+ If we are returning floats on the register stack, we cannot make
+ sibling calls to functions that return floats. (The stack adjust
+ instruction will wind up after the sibcall jump, and not be executed.) */
+
+static bool
+ix86_function_ok_for_sibcall (decl, exp)
+ tree decl;
+ tree exp;
+{
+ /* If we are generating position-independent code, we cannot sibcall
+ optimize any indirect call, or a direct call to a global function,
+ as the PLT requires %ebx be live. */
+ if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
+ return false;
+
+ /* If we are returning floats on the 80387 register stack, we cannot
+ make a sibcall from a function that doesn't return a float to a
+ function that does; the necessary stack adjustment will not be
+ executed. */
+ if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
+ && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
+ return false;
+
+ /* If this call is indirect, we'll need to be able to use a call-clobbered
+ register for the address of the target function. Make sure that all
+ such registers are not used for passing parameters. */
+ if (!decl && !TARGET_64BIT)
+ {
+ int regparm = ix86_regparm;
+ tree attr, type;
+
+ /* We're looking at the CALL_EXPR, we need the type of the function. */
+ type = TREE_OPERAND (exp, 0); /* pointer expression */
+ type = TREE_TYPE (type); /* pointer type */
+ type = TREE_TYPE (type); /* function type */
+
+ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+ if (attr)
+ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+
+ if (regparm >= 3)
+ {
+ /* ??? Need to count the actual number of registers to be used,
+ not the possible number of registers. Fix later. */
+ return false;
+ }
+ }
+
+ /* Otherwise okay. That also includes certain types of indirect calls. */
+ return true;
+}
+
/* Handle a "cdecl" or "stdcall" attribute;
arguments as in struct attribute_spec.handler. */
static tree
@@ -3209,6 +3348,32 @@ call_insn_operand (op, mode)
return general_operand (op, Pmode);
}
+/* Test for a valid operand for a call instruction. Don't allow the
+ arg pointer register or virtual regs since they may decay into
+ reg + const, which the patterns can't handle. */
+
+int
+sibcall_insn_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ /* Disallow indirect through a virtual register. This leads to
+ compiler aborts when trying to eliminate them. */
+ if (GET_CODE (op) == REG
+ && (op == arg_pointer_rtx
+ || op == frame_pointer_rtx
+ || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+ && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
+ return 0;
+
+ /* Explicitly allow SYMBOL_REF even if pic. */
+ if (GET_CODE (op) == SYMBOL_REF)
+ return 1;
+
+ /* Otherwise we can only allow register operands. */
+ return register_operand (op, Pmode);
+}
+
int
constant_call_address_operand (op, mode)
rtx op;
@@ -3387,6 +3552,18 @@ q_regs_operand (op, mode)
return ANY_QI_REG_P (op);
}
+/* Return true if op is an flags register. */
+
+int
+flags_reg_operand (op, mode)
+ register rtx op;
+ enum machine_mode mode;
+{
+ if (mode != VOIDmode && GET_MODE (op) != mode)
+ return 0;
+ return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
+}
+
/* Return true if op is a NON_Q_REGS class register. */
int
@@ -3401,6 +3578,31 @@ non_q_regs_operand (op, mode)
return NON_QI_REG_P (op);
}
+int
+zero_extended_scalar_load_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ unsigned n_elts;
+ if (GET_CODE (op) != MEM)
+ return 0;
+ op = maybe_get_pool_constant (op);
+ if (!op)
+ return 0;
+ if (GET_CODE (op) != CONST_VECTOR)
+ return 0;
+ n_elts =
+ (GET_MODE_SIZE (GET_MODE (op)) /
+ GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
+ for (n_elts--; n_elts > 0; n_elts--)
+ {
+ rtx elt = CONST_VECTOR_ELT (op, n_elts);
+ if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+ return 0;
+ }
+ return 1;
+}
+
/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
insns. */
int
@@ -4160,7 +4362,7 @@ output_set_got (dest)
is what will be referred to by the Mach-O PIC subsystem. */
ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
#endif
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ (*targetm.asm_out.internal_label) (asm_out_file, "L",
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
@@ -4456,14 +4658,32 @@ ix86_expand_prologue ()
int use_mov = 0;
HOST_WIDE_INT allocate;
+ ix86_compute_frame_layout (&frame);
if (!optimize_size)
{
- use_fast_prologue_epilogue
- = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
+ int count = frame.nregs;
+
+ /* The fast prologue uses move instead of push to save registers. This
+ is significantly longer, but also executes faster as modern hardware
+ can execute the moves in parallel, but can't do that for push/pop.
+
+ Be curefull about choosing what prologue to emit: When function takes
+ many instructions to execute we may use slow version as well as in
+ case function is known to be outside hot spot (this is known with
+ feedback only). Weight the size of function by number of registers
+ to save as it is cheap to use one or two push instructions but very
+ slow to use many of them. */
+ if (count)
+ count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
+ if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
+ || (flag_branch_probabilities
+ && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
+ use_fast_prologue_epilogue = 0;
+ else
+ use_fast_prologue_epilogue = !expensive_function_p (count);
if (TARGET_PROLOGUE_USING_MOVE)
use_mov = use_fast_prologue_epilogue;
}
- ix86_compute_frame_layout (&frame);
/* Note: AT&T enter does NOT have reversed args. Enter is probably
slower on all targets. Also sdb doesn't like it. */
@@ -8960,6 +9180,84 @@ ix86_expand_setcc (code, dest)
return 1; /* DONE */
}
+/* Expand comparison setting or clearing carry flag. Return true when sucesfull
+ and set pop for the operation. */
+bool
+ix86_expand_carry_flag_compare (code, op0, op1, pop)
+ rtx op0, op1, *pop;
+ enum rtx_code code;
+{
+ enum machine_mode mode =
+ GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+
+ /* Do not handle DImode compares that go trought special path. Also we can't
+ deal with FP compares yet. This is possible to add. */
+ if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
+ return false;
+ switch (code)
+ {
+ case LTU:
+ case GEU:
+ break;
+
+ /* Convert a==0 into (unsigned)a<1. */
+ case EQ:
+ case NE:
+ if (op1 != const0_rtx)
+ return false;
+ op1 = const1_rtx;
+ code = (code == EQ ? LTU : GEU);
+ break;
+
+ /* Convert a>b into b<a or a>=b-1. */
+ case GTU:
+ case LEU:
+ if (GET_CODE (op1) == CONST_INT)
+ {
+ op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+ /* Bail out on overflow. We still can swap operands but that
+ would force loading of the constant into register. */
+ if (op1 == const0_rtx
+ || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+ return false;
+ code = (code == GTU ? GEU : LTU);
+ }
+ else
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ code = (code == GTU ? LTU : GEU);
+ }
+ break;
+
+ /* Convert a>0 into (unsigned)a<0x7fffffff. */
+ case LT:
+ case GE:
+ if (mode == DImode || op1 != const0_rtx)
+ return false;
+ op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
+ code = (code == LT ? GEU : LTU);
+ break;
+ case LE:
+ case GT:
+ if (mode == DImode || op1 != constm1_rtx)
+ return false;
+ op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
+ code = (code == LE ? GEU : LTU);
+ break;
+
+ default:
+ return false;
+ }
+ ix86_compare_op0 = op0;
+ ix86_compare_op1 = op1;
+ *pop = ix86_expand_compare (code, NULL, NULL);
+ if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
+ abort ();
+ return true;
+}
+
int
ix86_expand_int_movcc (operands)
rtx operands[];
@@ -8968,30 +9266,7 @@ ix86_expand_int_movcc (operands)
rtx compare_seq, compare_op;
rtx second_test, bypass_test;
enum machine_mode mode = GET_MODE (operands[0]);
-
- /* When the compare code is not LTU or GEU, we can not use sbbl case.
- In case comparsion is done with immediate, we can convert it to LTU or
- GEU by altering the integer. */
-
- if ((code == LEU || code == GTU)
- && GET_CODE (ix86_compare_op1) == CONST_INT
- && mode != HImode
- && INTVAL (ix86_compare_op1) != -1
- /* For x86-64, the immediate field in the instruction is 32-bit
- signed, so we can't increment a DImode value above 0x7fffffff. */
- && (!TARGET_64BIT
- || GET_MODE (ix86_compare_op0) != DImode
- || INTVAL (ix86_compare_op1) != 0x7fffffff)
- && GET_CODE (operands[2]) == CONST_INT
- && GET_CODE (operands[3]) == CONST_INT)
- {
- if (code == LEU)
- code = LTU;
- else
- code = GEU;
- ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
- GET_MODE (ix86_compare_op0));
- }
+ bool sign_bit_compare_p = false;;
start_sequence ();
compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
@@ -9000,10 +9275,14 @@ ix86_expand_int_movcc (operands)
compare_code = GET_CODE (compare_op);
+ if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
+ || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
+ sign_bit_compare_p = true;
+
/* Don't attempt mode expansion here -- if we had to expand 5 or 6
HImode insns, we'd be swallowed in word prefix ops. */
- if (mode != HImode
+ if ((mode != HImode || TARGET_FAST_PREFIX)
&& (mode != DImode || TARGET_64BIT)
&& GET_CODE (operands[2]) == CONST_INT
&& GET_CODE (operands[3]) == CONST_INT)
@@ -9013,32 +9292,53 @@ ix86_expand_int_movcc (operands)
HOST_WIDE_INT cf = INTVAL (operands[3]);
HOST_WIDE_INT diff;
- if ((compare_code == LTU || compare_code == GEU)
- && !second_test && !bypass_test)
+ diff = ct - cf;
+ /* Sign bit compares are better done using shifts than we do by using
+ sbb. */
+ if (sign_bit_compare_p
+ || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
{
/* Detect overlap between destination and compare sources. */
rtx tmp = out;
- /* To simplify rest of code, restrict to the GEU case. */
- if (compare_code == LTU)
+ if (!sign_bit_compare_p)
{
- HOST_WIDE_INT tmp = ct;
- ct = cf;
- cf = tmp;
- compare_code = reverse_condition (compare_code);
- code = reverse_condition (code);
- }
- diff = ct - cf;
+ compare_code = GET_CODE (compare_op);
+
+ /* To simplify rest of code, restrict to the GEU case. */
+ if (compare_code == LTU)
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ diff = ct - cf;
- if (reg_overlap_mentioned_p (out, ix86_compare_op0)
- || reg_overlap_mentioned_p (out, ix86_compare_op1))
- tmp = gen_reg_rtx (mode);
+ if (reg_overlap_mentioned_p (out, ix86_compare_op0)
+ || reg_overlap_mentioned_p (out, ix86_compare_op1))
+ tmp = gen_reg_rtx (mode);
- emit_insn (compare_seq);
- if (mode == DImode)
- emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
+ if (mode == DImode)
+ emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
+ else
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
+ }
else
- emit_insn (gen_x86_movsicc_0_m1 (tmp));
+ {
+ if (code == GT || code == GE)
+ code = reverse_condition (code);
+ else
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ }
+ tmp = emit_store_flag (tmp, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
if (diff == 1)
{
@@ -9052,7 +9352,7 @@ ix86_expand_int_movcc (operands)
if (ct)
tmp = expand_simple_binop (mode, PLUS,
tmp, GEN_INT (ct),
- tmp, 1, OPTAB_DIRECT);
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
}
else if (cf == -1)
{
@@ -9065,7 +9365,7 @@ ix86_expand_int_movcc (operands)
*/
tmp = expand_simple_binop (mode, IOR,
tmp, GEN_INT (ct),
- tmp, 1, OPTAB_DIRECT);
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
}
else if (diff == -1 && ct)
{
@@ -9077,11 +9377,11 @@ ix86_expand_int_movcc (operands)
*
* Size 8 - 11.
*/
- tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
if (cf)
tmp = expand_simple_binop (mode, PLUS,
- tmp, GEN_INT (cf),
- tmp, 1, OPTAB_DIRECT);
+ copy_rtx (tmp), GEN_INT (cf),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
}
else
{
@@ -9099,26 +9399,25 @@ ix86_expand_int_movcc (operands)
{
cf = ct;
ct = 0;
- tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
}
tmp = expand_simple_binop (mode, AND,
- tmp,
+ copy_rtx (tmp),
gen_int_mode (cf - ct, mode),
- tmp, 1, OPTAB_DIRECT);
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
if (ct)
tmp = expand_simple_binop (mode, PLUS,
- tmp, GEN_INT (ct),
- tmp, 1, OPTAB_DIRECT);
+ copy_rtx (tmp), GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
}
- if (tmp != out)
- emit_move_insn (out, tmp);
+ if (!rtx_equal_p (tmp, out))
+ emit_move_insn (copy_rtx (out), copy_rtx (tmp));
return 1; /* DONE */
}
- diff = ct - cf;
if (diff < 0)
{
HOST_WIDE_INT tmp;
@@ -9194,8 +9493,10 @@ ix86_expand_int_movcc (operands)
}
}
+
if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
|| diff == 3 || diff == 5 || diff == 9)
+ && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
&& (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
{
/*
@@ -9237,15 +9538,14 @@ ix86_expand_int_movcc (operands)
tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
nops++;
}
- if (tmp != out
- && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
+ if (!rtx_equal_p (tmp, out))
{
if (nops == 1)
out = force_operand (tmp, out);
else
- emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
}
- if (out != operands[0])
+ if (!rtx_equal_p (out, operands[0]))
emit_move_insn (operands[0], copy_rtx (out));
return 1; /* DONE */
@@ -9265,12 +9565,10 @@ ix86_expand_int_movcc (operands)
* This is reasonably steep, but branch mispredict costs are
* high on modern cpus, so consider failing only if optimizing
* for space.
- *
- * %%% Parameterize branch_cost on the tuning architecture, then
- * use that. The 80386 couldn't care less about mispredicts.
*/
- if (!optimize_size && !TARGET_CMOVE)
+ if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ && BRANCH_COST >= 2)
{
if (cf == 0)
{
@@ -9324,31 +9622,31 @@ ix86_expand_int_movcc (operands)
out = emit_store_flag (out, code, ix86_compare_op0,
ix86_compare_op1, VOIDmode, 0, 1);
- out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
- out, 1, OPTAB_DIRECT);
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
+ copy_rtx (out), 1, OPTAB_DIRECT);
}
- out = expand_simple_binop (mode, AND, out,
+ out = expand_simple_binop (mode, AND, copy_rtx (out),
gen_int_mode (cf - ct, mode),
- out, 1, OPTAB_DIRECT);
+ copy_rtx (out), 1, OPTAB_DIRECT);
if (ct)
- out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
- out, 1, OPTAB_DIRECT);
- if (out != operands[0])
- emit_move_insn (operands[0], out);
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
return 1; /* DONE */
}
}
- if (!TARGET_CMOVE)
+ if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
{
/* Try a few things more with specific constants and a variable. */
optab op;
rtx var, orig_out, out, tmp;
- if (optimize_size)
+ if (BRANCH_COST <= 2)
return 0; /* FAIL */
/* If one of the two operands is an interesting constant, load a
@@ -9357,9 +9655,9 @@ ix86_expand_int_movcc (operands)
if (GET_CODE (operands[2]) == CONST_INT)
{
var = operands[3];
- if (INTVAL (operands[2]) == 0)
+ if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
operands[3] = constm1_rtx, op = and_optab;
- else if (INTVAL (operands[2]) == -1)
+ else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
operands[3] = const0_rtx, op = ior_optab;
else
return 0; /* FAIL */
@@ -9367,9 +9665,9 @@ ix86_expand_int_movcc (operands)
else if (GET_CODE (operands[3]) == CONST_INT)
{
var = operands[2];
- if (INTVAL (operands[3]) == 0)
+ if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
operands[2] = constm1_rtx, op = and_optab;
- else if (INTVAL (operands[3]) == -1)
+ else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
operands[2] = const0_rtx, op = ior_optab;
else
return 0; /* FAIL */
@@ -9388,8 +9686,8 @@ ix86_expand_int_movcc (operands)
/* Mask in the interesting variable. */
out = expand_binop (mode, op, var, tmp, orig_out, 0,
OPTAB_WIDEN);
- if (out != orig_out)
- emit_move_insn (orig_out, out);
+ if (!rtx_equal_p (out, orig_out))
+ emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
return 1; /* DONE */
}
@@ -9422,27 +9720,33 @@ ix86_expand_int_movcc (operands)
emit_move_insn (tmp, operands[2]);
operands[2] = tmp;
}
+
if (! register_operand (operands[2], VOIDmode)
- && ! register_operand (operands[3], VOIDmode))
+ && (mode == QImode
+ || ! register_operand (operands[3], VOIDmode)))
operands[2] = force_reg (mode, operands[2]);
+ if (mode == QImode
+ && ! register_operand (operands[3], VOIDmode))
+ operands[3] = force_reg (mode, operands[3]);
+
emit_insn (compare_seq);
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_IF_THEN_ELSE (mode,
compare_op, operands[2],
operands[3])));
if (bypass_test)
- emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
gen_rtx_IF_THEN_ELSE (mode,
bypass_test,
- operands[3],
- operands[0])));
+ copy_rtx (operands[3]),
+ copy_rtx (operands[0]))));
if (second_test)
- emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
gen_rtx_IF_THEN_ELSE (mode,
second_test,
- operands[2],
- operands[0])));
+ copy_rtx (operands[2]),
+ copy_rtx (operands[0]))));
return 1; /* DONE */
}
@@ -9483,8 +9787,14 @@ ix86_expand_fp_movcc (operands)
if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
{
/* Check for min operation. */
- if (code == LT)
+ if (code == LT || code == UNLE)
{
+ if (code == UNLE)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
if (memory_operand (op0, VOIDmode))
op0 = force_reg (GET_MODE (operands[0]), op0);
@@ -9495,8 +9805,14 @@ ix86_expand_fp_movcc (operands)
return 1;
}
/* Check for max operation. */
- if (code == GT)
+ if (code == GT || code == UNGE)
{
+ if (code == UNGE)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
if (memory_operand (op0, VOIDmode))
op0 = force_reg (GET_MODE (operands[0]), op0);
@@ -10242,8 +10558,12 @@ ix86_expand_movstr (dst, src, count_exp, align_exp)
/* In case we don't know anything about the alignment, default to
library version, since it is usually equally fast and result in
- shorter code. */
- if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
+ shorter code.
+
+ Also emit call when we know that the count is large and call overhead
+ will not be important. */
+ if (!TARGET_INLINE_ALL_STRINGOPS
+ && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
{
end_sequence ();
return 0;
@@ -10457,8 +10777,12 @@ ix86_expand_clrstr (src, count_exp, align_exp)
/* In case we don't know anything about the alignment, default to
library version, since it is usually equally fast and result in
- shorter code. */
- if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
+ shorter code.
+
+ Also emit call when we know that the count is large and call overhead
+ will not be important. */
+ if (!TARGET_INLINE_ALL_STRINGOPS
+ && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
return 0;
if (TARGET_SINGLE_STRINGOP)
@@ -10828,8 +11152,9 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx)
}
void
-ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
+ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
rtx retval, fnaddr, callarg1, callarg2, pop;
+ int sibcall;
{
rtx use = NULL, call;
@@ -10861,6 +11186,15 @@ ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
fnaddr = gen_rtx_MEM (QImode, fnaddr);
}
+ if (sibcall && TARGET_64BIT
+ && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ rtx addr;
+ addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_REG (Pmode, 40);
+ emit_move_insn (fnaddr, addr);
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
if (retval)
@@ -11060,6 +11394,7 @@ ix86_issue_rate ()
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
return 3;
default:
@@ -11271,16 +11606,10 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
break;
case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
memory = get_attr_memory (insn);
dep_memory = get_attr_memory (dep_insn);
- if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
- {
- if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
- cost += 2;
- else
- cost += 3;
- }
/* Show ability of reorder buffer to hide latency of load by executing
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
@@ -11554,7 +11883,7 @@ ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
static int
ia32_use_dfa_pipeline_interface ()
{
- if (ix86_cpu == PROCESSOR_PENTIUM)
+ if (TARGET_PENTIUM || TARGET_ATHLON_K8)
return 1;
return 0;
}
@@ -12764,7 +13093,8 @@ safe_vector_operand (x, mode)
: gen_rtx_SUBREG (DImode, x, 0)));
else
emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
- : gen_rtx_SUBREG (V4SFmode, x, 0)));
+ : gen_rtx_SUBREG (V4SFmode, x, 0),
+ CONST0_RTX (V4SFmode)));
return x;
}
@@ -13434,7 +13764,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_SSE_ZERO:
target = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_clrv4sf (target));
+ emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
return target;
case IX86_BUILTIN_MMX_ZERO:
@@ -14058,6 +14388,17 @@ x86_order_regs_for_local_alloc ()
reg_alloc_order [pos++] = 0;
}
+#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
+#define TARGET_USE_MS_BITFIELD_LAYOUT 0
+#endif
+
+static bool
+ix86_ms_bitfield_layout_p (record_type)
+ tree record_type ATTRIBUTE_UNUSED;
+{
+ return TARGET_USE_MS_BITFIELD_LAYOUT;
+}
+
/* Returns an expression indicating where the this parameter is
located on entry to the FUNCTION. */
@@ -14317,7 +14658,7 @@ x86_machine_dependent_reorg (first)
{
edge e;
- if (!TARGET_ATHLON || !optimize || optimize_size)
+ if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
return;
for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
{
@@ -14328,25 +14669,69 @@ x86_machine_dependent_reorg (first)
if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
continue;
- prev = prev_nonnote_insn (ret);
+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+ if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
+ break;
if (prev && GET_CODE (prev) == CODE_LABEL)
{
edge e;
for (e = bb->pred; e; e = e->pred_next)
- if (EDGE_FREQUENCY (e) && e->src->index > 0
+ if (EDGE_FREQUENCY (e) && e->src->index >= 0
&& !(e->flags & EDGE_FALLTHRU))
insert = 1;
}
if (!insert)
{
- prev = prev_real_insn (ret);
+ prev = prev_active_insn (ret);
if (prev && GET_CODE (prev) == JUMP_INSN
&& any_condjump_p (prev))
insert = 1;
+ /* Empty functions get branch misspredict even when the jump destination
+ is not visible to us. */
+ if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ insert = 1;
}
if (insert)
emit_insn_before (gen_nop (), ret);
}
}
+/* Return nonzero when QImode register that must be represented via REX prefix
+ is used. */
+bool
+x86_extended_QIreg_mentioned_p (insn)
+ rtx insn;
+{
+ int i;
+ extract_insn_cached (insn);
+ for (i = 0; i < recog_data.n_operands; i++)
+ if (REG_P (recog_data.operand[i])
+ && REGNO (recog_data.operand[i]) >= 4)
+ return true;
+ return false;
+}
+
+/* Return nonzero when P points to register encoded via REX prefix.
+ Called via for_each_rtx. */
+static int
+extended_reg_mentioned_1 (p, data)
+ rtx *p;
+ void *data ATTRIBUTE_UNUSED;
+{
+ unsigned int regno;
+ if (!REG_P (*p))
+ return 0;
+ regno = REGNO (*p);
+ return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
+}
+
+/* Return true when INSN mentions register that must be encoded using REX
+ prefix. */
+bool
+x86_extended_reg_mentioned_p (insn)
+ rtx insn;
+{
+ return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
+}
+
#include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 493a2b5bf9c..c1f40dc209c 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -41,9 +41,11 @@ struct processor_costs {
const int lea; /* cost of a lea instruction */
const int shift_var; /* variable shift costs */
const int shift_const; /* constant shift costs */
- const int mult_init; /* cost of starting a multiply */
+ const int mult_init[5]; /* cost of starting a multiply
+ in QImode, HImode, SImode, DImode, TImode*/
const int mult_bit; /* cost of multiply per each bit set */
- const int divide; /* cost of a divide/mod */
+ const int divide[5]; /* cost of a divide/mod
+ in QImode, HImode, SImode, DImode, TImode*/
int movsx; /* The cost of movsx operation. */
int movzx; /* The cost of movzx operation. */
const int large_insn; /* insns larger than this cost more */
@@ -75,6 +77,7 @@ struct processor_costs {
const int prefetch_block; /* bytes moved to cache for prefetch. */
const int simultaneous_prefetches; /* number of parallel prefetch
operations. */
+ const int branch_cost; /* Default value for BRANCH_COST. */
const int fadd; /* cost of FADD and FSUB instructions. */
const int fmul; /* cost of FMUL instruction. */
const int fdiv; /* cost of FDIV instruction. */
@@ -118,8 +121,9 @@ extern int target_flags;
#define MASK_3DNOW_A 0x00020000 /* Support Athlon 3Dnow builtins */
#define MASK_128BIT_LONG_DOUBLE 0x00040000 /* long double size is 128bit */
#define MASK_64BIT 0x00080000 /* Produce 64bit code */
+#define MASK_MS_BITFIELD_LAYOUT 0x00100000 /* Use native (MS) bitfield layout */
-/* Unused: 0x03f0000 */
+/* Unused: 0x03e0000 */
/* ... overlap with subtarget options starts by 0x04000000. */
#define MASK_NO_RED_ZONE 0x04000000 /* Do not use red zone */
@@ -204,6 +208,8 @@ extern int target_flags;
#define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
#define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON)
#define TARGET_PENTIUM4 (ix86_cpu == PROCESSOR_PENTIUM4)
+#define TARGET_K8 (ix86_cpu == PROCESSOR_K8)
+#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
#define CPUMASK (1 << ix86_cpu)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
@@ -221,6 +227,9 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
extern const int x86_epilogue_using_move, x86_decompose_lea;
extern const int x86_arch_always_fancy_math_387, x86_shift1;
+extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
+extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
+extern const int x86_use_ffreep, x86_sse_partial_regs_for_cvtsd2ss;
extern int x86_prefetch_sse;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
@@ -257,12 +266,22 @@ extern int x86_prefetch_sse;
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ (x86_sse_partial_reg_dependency & CPUMASK)
+#define TARGET_SSE_PARTIAL_REGS (x86_sse_partial_regs & CPUMASK)
+#define TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS \
+ (x86_sse_partial_regs_for_cvtsd2ss & CPUMASK)
+#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & CPUMASK)
+#define TARGET_SSE_TYPELESS_LOAD0 (x86_sse_typeless_load0 & CPUMASK)
+#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & CPUMASK)
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
#define TARGET_DECOMPOSE_LEA (x86_decompose_lea & CPUMASK)
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
#define TARGET_SHIFT1 (x86_shift1 & CPUMASK)
+#define TARGET_USE_FFREEP (x86_use_ffreep & CPUMASK)
+#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & CPUMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
@@ -282,6 +301,8 @@ extern int x86_prefetch_sse;
#define TARGET_RED_ZONE (!(target_flags & MASK_NO_RED_ZONE))
+#define TARGET_USE_MS_BITFIELD_LAYOUT (target_flags & MASK_MS_BITFIELD_LAYOUT)
+
#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
@@ -374,6 +395,10 @@ extern int x86_prefetch_sse;
N_("Generate 64bit x86-64 code") }, \
{ "32", -MASK_64BIT, \
N_("Generate 32bit i386 code") }, \
+ { "ms-bitfields", MASK_MS_BITFIELD_LAYOUT, \
+ N_("Use native (MS) bitfield layout") }, \
+ { "no-ms-bitfields", -MASK_MS_BITFIELD_LAYOUT, \
+ N_("Use gcc default bitfield layout") }, \
{ "red-zone", -MASK_NO_RED_ZONE, \
N_("Use red-zone in the x86-64 code") }, \
{ "no-red-zone", MASK_NO_RED_ZONE, \
@@ -540,6 +565,8 @@ extern int x86_prefetch_sse;
if (last_cpu_char != 'n') \
builtin_define ("__tune_athlon_sse__"); \
} \
+ else if (TARGET_K8) \
+ builtin_define ("__tune_k8__"); \
else if (TARGET_PENTIUM4) \
builtin_define ("__tune_pentium4__"); \
\
@@ -598,6 +625,11 @@ extern int x86_prefetch_sse;
if (last_arch_char != 'n') \
builtin_define ("__athlon_sse__"); \
} \
+ else if (ix86_arch == PROCESSOR_K8) \
+ { \
+ builtin_define ("__k8"); \
+ builtin_define ("__k8__"); \
+ } \
else if (ix86_arch == PROCESSOR_PENTIUM4) \
{ \
builtin_define ("__pentium4"); \
@@ -619,11 +651,12 @@ extern int x86_prefetch_sse;
#define TARGET_CPU_DEFAULT_k6_3 10
#define TARGET_CPU_DEFAULT_athlon 11
#define TARGET_CPU_DEFAULT_athlon_sse 12
+#define TARGET_CPU_DEFAULT_k8 13
#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
"pentiumpro", "pentium2", "pentium3", \
"pentium4", "k6", "k6-2", "k6-3",\
- "athlon", "athlon-4"}
+ "athlon", "athlon-4", "k8"}
#ifndef CC1_SPEC
#define CC1_SPEC "%(cc1_cpu) "
@@ -1335,6 +1368,9 @@ enum reg_class
(((N) >= FIRST_SSE_REG && (N) <= LAST_SSE_REG) \
|| ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG))
+#define REX_SSE_REGNO_P(N) \
+ ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG)
+
#define SSE_REGNO(N) \
((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
@@ -1716,18 +1752,6 @@ typedef struct ix86_args {
#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) 0
-/* If PIC, we cannot make sibling calls to global functions
- because the PLT requires %ebx live.
- If we are returning floats on the register stack, we cannot make
- sibling calls to functions that return floats. (The stack adjust
- instruction will wind up after the sibcall jump, and not be executed.) */
-#define FUNCTION_OK_FOR_SIBCALL(DECL) \
- ((DECL) \
- && (! flag_pic || ! TREE_PUBLIC (DECL)) \
- && (! TARGET_FLOAT_RETURNS_IN_80387 \
- || ! FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \
- || FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl))))))
-
/* Perform any needed actions needed for a function that is receiving a
variable number of arguments.
@@ -1839,12 +1863,6 @@ typedef struct ix86_args {
/* Addressing modes, and classification of registers for them. */
-/* #define HAVE_POST_INCREMENT 0 */
-/* #define HAVE_POST_DECREMENT 0 */
-
-/* #define HAVE_PRE_DECREMENT 0 */
-/* #define HAVE_PRE_INCREMENT 0 */
-
/* Macros to check register numbers against specific register classes. */
/* These assume that REGNO is a hard or pseudo reg number.
@@ -2602,6 +2620,14 @@ do { \
#define TOPLEVEL_COSTS_N_INSNS(N) \
do { total = COSTS_N_INSNS (N); goto egress_rtx_costs; } while (0)
+/* Return index of given mode in mult and division cost tables. */
+#define MODE_INDEX(mode) \
+ ((mode) == QImode ? 0 \
+ : (mode) == HImode ? 1 \
+ : (mode) == SImode ? 2 \
+ : (mode) == DImode ? 3 \
+ : 4)
+
/* Like `CONST_COSTS' but applies to nonconstant RTL expressions.
This can be used, for example, to indicate how costly a multiply
instruction is. In writing this macro, you can use the construct
@@ -2687,10 +2713,12 @@ do { \
} \
\
TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \
+ [MODE_INDEX (GET_MODE (X))] \
+ nbits * ix86_cost->mult_bit); \
} \
else /* This is arbitrary */ \
TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \
+ [MODE_INDEX (GET_MODE (X))] \
+ 7 * ix86_cost->mult_bit); \
\
case DIV: \
@@ -2700,7 +2728,8 @@ do { \
if (FLOAT_MODE_P (GET_MODE (X))) \
TOPLEVEL_COSTS_N_INSNS (ix86_cost->fdiv); \
else \
- TOPLEVEL_COSTS_N_INSNS (ix86_cost->divide); \
+ TOPLEVEL_COSTS_N_INSNS (ix86_cost->divide \
+ [MODE_INDEX (GET_MODE (X))]); \
break; \
\
case PLUS: \
@@ -3040,14 +3069,6 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
? ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4\
: DW_EH_PE_absptr)
-/* Store in OUTPUT a string (made with alloca) containing
- an assembler-name for a local static variable named NAME.
- LABELNO is an integer which is different for each call. */
-
-#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO) \
-( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10), \
- sprintf ((OUTPUT), "%s.%d", (NAME), (LABELNO)))
-
/* This is how to output an insn to push a register on the stack.
It need not be very fast code. */
@@ -3241,6 +3262,7 @@ do { \
LABEL_REF, SUBREG, REG, MEM}}, \
{"pic_symbolic_operand", {CONST}}, \
{"call_insn_operand", {REG, SUBREG, MEM, SYMBOL_REF}}, \
+ {"sibcall_insn_operand", {REG, SUBREG, SYMBOL_REF}}, \
{"constant_call_address_operand", {SYMBOL_REF, CONST}}, \
{"const0_operand", {CONST_INT, CONST_DOUBLE}}, \
{"const1_operand", {CONST_INT}}, \
@@ -3252,6 +3274,7 @@ do { \
SYMBOL_REF, LABEL_REF, SUBREG, REG, MEM}}, \
{"nonmemory_no_elim_operand", {CONST_INT, REG, SUBREG}}, \
{"index_register_operand", {SUBREG, REG}}, \
+ {"flags_reg_operand", {REG}}, \
{"q_regs_operand", {SUBREG, REG}}, \
{"non_q_regs_operand", {SUBREG, REG}}, \
{"fcmov_comparison_operator", {EQ, NE, LTU, GTU, LEU, GEU, UNORDERED, \
@@ -3286,6 +3309,7 @@ do { \
{"register_and_not_any_fp_reg_operand", {REG}}, \
{"fp_register_operand", {REG}}, \
{"register_and_not_fp_reg_operand", {REG}}, \
+ {"zero_extended_scalar_load_operand", {MEM}}, \
/* A list of predicates that do special things with modes, and so
should not elicit warnings for VOIDmode match_operand. */
@@ -3305,6 +3329,7 @@ enum processor_type
PROCESSOR_K6,
PROCESSOR_ATHLON,
PROCESSOR_PENTIUM4,
+ PROCESSOR_K8,
PROCESSOR_max
};
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index befbfe49569..d625f586d46 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -132,7 +132,7 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8"
(const (symbol_ref "ix86_cpu")))
;; A basic instruction type. Refinements due to arguments to be
@@ -142,7 +142,7 @@
alu,alu1,negnot,imov,imovx,lea,
incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
icmp,test,ibr,setcc,icmov,
- push,pop,call,callv,
+ push,pop,call,callv,leave,
str,cld,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,
sselog,sseiadd,sseishft,sseimul,
@@ -152,7 +152,7 @@
;; Main data type used by the insn
(define_attr "mode"
- "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI,V4SF,V2DF,V2SF"
+ "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF"
(const_string "unknown"))
;; The CPU unit operations uses.
@@ -170,7 +170,7 @@
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
- (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv")
+ (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
@@ -232,9 +232,24 @@
(const_int 1)
(const_int 0)))
+;; Set when 0f opcode prefix is used.
+(define_attr "prefix_rex" ""
+ (cond [(and (eq_attr "mode" "DI")
+ (eq_attr "type" "!push,pop,call,callv,leave,ibr"))
+ (const_int 1)
+ (and (eq_attr "mode" "QI")
+ (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)")
+ (const_int 0)))
+ (const_int 1)
+ (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)")
+ (const_int 0))
+ (const_int 1)
+ ]
+ (const_int 0)))
+
;; Set when modrm byte is used.
(define_attr "modrm" ""
- (cond [(eq_attr "type" "str,cld")
+ (cond [(eq_attr "type" "str,cld,leave")
(const_int 0)
(eq_attr "unit" "i387")
(const_int 0)
@@ -273,7 +288,8 @@
(attr "length_address")))]
(plus (plus (attr "modrm")
(plus (attr "prefix_0f")
- (const_int 1)))
+ (plus (attr "prefix_rex")
+ (const_int 1))))
(plus (attr "prefix_rep")
(plus (attr "prefix_data16")
(plus (attr "length_immediate")
@@ -288,7 +304,7 @@
(const_string "unknown")
(eq_attr "type" "lea,fcmov,fpspc,cld")
(const_string "none")
- (eq_attr "type" "fistp")
+ (eq_attr "type" "fistp,leave")
(const_string "both")
(eq_attr "type" "push")
(if_then_else (match_operand 1 "memory_operand" "")
@@ -754,7 +770,13 @@
return "ftst\;fnstsw\t%0";
}
[(set_attr "type" "multi")
- (set_attr "mode" "unknownfp")])
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
;; We may not use "#" to split and emit these, since the REG_DEAD notes
;; used to manage the reg stack popping would not be preserved.
@@ -857,7 +879,13 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 0, 1);"
[(set_attr "type" "fcmp")
- (set_attr "mode" "unknownfp")])
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
(define_insn "*cmpfp_2u_1"
[(set (match_operand:HI 0 "register_operand" "=a")
@@ -871,7 +899,13 @@
&& GET_MODE (operands[1]) == GET_MODE (operands[2])"
"* return output_fp_compare (insn, operands, 2, 1);"
[(set_attr "type" "multi")
- (set_attr "mode" "unknownfp")])
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
;; Patterns to match the SImode-in-memory ficom instructions.
;;
@@ -911,7 +945,7 @@
;; FP compares, step 2
;; Move the fpsw to ax.
-(define_insn "x86_fnstsw_1"
+(define_insn "*x86_fnstsw_1"
[(set (match_operand:HI 0 "register_operand" "=a")
(unspec:HI [(reg 18)] UNSPEC_FNSTSW))]
"TARGET_80387"
@@ -946,7 +980,13 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
[(set_attr "type" "fcmp")
- (set_attr "mode" "unknownfp")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))
(set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_i_sse"
@@ -958,7 +998,10 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
[(set_attr "type" "fcmp,ssecmp")
- (set_attr "mode" "unknownfp")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
(set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_i_sse_only"
@@ -969,7 +1012,10 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "unknownfp")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
(set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_iu"
@@ -982,7 +1028,13 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
[(set_attr "type" "fcmp")
- (set_attr "mode" "unknownfp")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))
(set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_iu_sse"
@@ -994,7 +1046,10 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
[(set_attr "type" "fcmp,ssecmp")
- (set_attr "mode" "unknownfp")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
(set_attr "athlon_decode" "vector")])
(define_insn "*cmpfp_iu_sse_only"
@@ -1005,7 +1060,10 @@
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
[(set_attr "type" "ssecmp")
- (set_attr "mode" "unknownfp")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
(set_attr "athlon_decode" "vector")])
;; Move instructions.
@@ -2011,22 +2069,11 @@
{
switch (which_alternative)
{
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (4);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
case 1:
return "push{l}\t%1";
- case 2:
- return "#";
default:
+ /* This insn should be already splitted before reg-stack. */
abort ();
}
}
@@ -2040,23 +2087,11 @@
{
switch (which_alternative)
{
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (8);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
case 1:
return "push{q}\t%q1";
- case 2:
- return "#";
-
default:
+ /* This insn should be already splitted before reg-stack. */
abort ();
}
}
@@ -2104,7 +2139,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -2130,12 +2170,12 @@
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
- if (TARGET_SSE2 && !TARGET_ATHLON)
+ if (get_attr_mode (insn) == MODE_TI)
return "pxor\t%0, %0";
else
return "xorps\t%0, %0";
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
+ if (get_attr_mode (insn) == MODE_V4SF)
return "movaps\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
@@ -2155,7 +2195,40 @@
}
}
[(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
- (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,9,10")
+ (const_string "SI")
+ (eq_attr "alternative" "5")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APS move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ Do the same for architectures resolving dependencies on
+ the parts. While in DF mode it is better to always handle
+ just register parts, the SF mode is different due to lack
+ of instructions to load just part of the register. It is
+ better to maintain the whole registers in single format
+ to avoid problems on using packed logical operations. */
+ (eq_attr "alternative" "6")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "SF"))
+ (eq_attr "alternative" "11")
+ (const_string "DI")]
+ (const_string "SF")))])
(define_insn "*swapsf"
[(set (match_operand:SF 0 "register_operand" "+f")
@@ -2188,26 +2261,8 @@
(match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))]
"!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
{
- switch (which_alternative)
- {
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (8);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
- case 1:
- case 2:
- case 3:
- return "#";
-
- default:
- abort ();
- }
+ /* This insn should be already splitted before reg-stack. */
+ abort ();
}
[(set_attr "type" "multi")
(set_attr "mode" "DF,SI,SI,DF")])
@@ -2217,32 +2272,8 @@
(match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))]
"TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
{
- switch (which_alternative)
- {
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (8);
- if (TARGET_64BIT)
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
- else
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
-
- case 1:
- case 2:
- return "#";
-
- default:
- abort ();
- }
+ /* This insn should be already splitted before reg-stack. */
+ abort ();
}
[(set_attr "type" "multi")
(set_attr "mode" "DF,SI,DF")])
@@ -2279,7 +2310,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m")
(match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
- && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES)
+ && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
&& (reload_in_progress || reload_completed
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| GET_CODE (operands[1]) != CONST_DOUBLE
@@ -2290,7 +2321,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -2316,31 +2352,84 @@
case 4:
return "#";
case 5:
- if (TARGET_ATHLON)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ abort ();
+ }
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
- return "movapd\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+ case 7:
+ if (get_attr_mode (insn) == MODE_V2DF)
+ return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
- return "movsd\t{%1, %0|%0, %1}";
+ return "movsd\t{%1, %0|%0, %1}";
default:
abort();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
- (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "SI")
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")]
+ (const_string "V2DF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")]
+ (const_string "DF"))
+ /* For achitectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0))
+ (const_string "V2DF")
+ (const_string "DF"))]
+ (const_string "DF")))])
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
(match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
- && !optimize_size && TARGET_INTEGER_DFMODE_MOVES
+ && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT)
&& (reload_in_progress || reload_completed
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| GET_CODE (operands[1]) != CONST_DOUBLE
@@ -2351,7 +2440,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -2378,16 +2472,34 @@
return "#";
case 5:
- if (TARGET_ATHLON)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ abort ();
+ }
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
- return "movapd\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+ case 7:
+ if (get_attr_mode (insn) == MODE_V2DF)
+ return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
return "movsd\t{%1, %0|%0, %1}";
@@ -2396,7 +2508,42 @@
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
- (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "SI")
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")]
+ (const_string "V2DF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")]
+ (const_string "DF"))
+ /* For achitectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0))
+ (const_string "V2DF")
+ (const_string "DF"))]
+ (const_string "DF")))])
(define_split
[(set (match_operand:DF 0 "nonimmediate_operand" "")
@@ -2451,25 +2598,8 @@
(match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
"!TARGET_64BIT && optimize_size"
{
- switch (which_alternative)
- {
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (12);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
- case 1:
- case 2:
- return "#";
-
- default:
- abort ();
- }
+ /* This insn should be already splitted before reg-stack. */
+ abort ();
}
[(set_attr "type" "multi")
(set_attr "mode" "XF,SI,SI")])
@@ -2479,25 +2609,8 @@
(match_operand:TF 1 "general_no_elim_operand" "f,Fo,*r"))]
"optimize_size"
{
- switch (which_alternative)
- {
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (16);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
- case 1:
- case 2:
- return "#";
-
- default:
- abort ();
- }
+ /* This insn should be already splitted before reg-stack. */
+ abort ();
}
[(set_attr "type" "multi")
(set_attr "mode" "XF,SI,SI")])
@@ -2507,24 +2620,8 @@
(match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))]
"!TARGET_64BIT && !optimize_size"
{
- switch (which_alternative)
- {
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (12);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
- case 1:
- return "#";
-
- default:
- abort ();
- }
+ /* This insn should be already splitted before reg-stack. */
+ abort ();
}
[(set_attr "type" "multi")
(set_attr "mode" "XF,SI")])
@@ -2534,30 +2631,8 @@
(match_operand:TF 1 "general_no_elim_operand" "f#r,rFo#f"))]
"!optimize_size"
{
- switch (which_alternative)
- {
- case 0:
- /* %%% We loose REG_DEAD notes for controling pops if we split late. */
- operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx);
- operands[2] = stack_pointer_rtx;
- operands[3] = GEN_INT (16);
- if (TARGET_64BIT)
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
- else
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0";
- else
- return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0";
-
- case 1:
- return "#";
-
- default:
- abort ();
- }
+ /* This insn should be already splitted before reg-stack. */
+ abort ();
}
[(set_attr "type" "multi")
(set_attr "mode" "XF,SI")])
@@ -2610,7 +2685,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -2657,7 +2737,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -2704,7 +2789,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -2751,7 +2841,12 @@
case 0:
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp\t%y0";
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG
+ && TARGET_USE_FFREEP)
+ return "ffreep\t%y0";
+ return "fstp\t%y0";
+ }
else if (STACK_TOP_P (operands[0]))
return "fld%z1\t%y1";
else
@@ -3699,11 +3794,11 @@
(set_attr "mode" "SF,SF,SF,SF")])
(define_insn "*truncdfsf2_1_sse"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,Y")
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m#fxr,?f#xr,?r#fx,?x#fr,Y#fr")
(float_truncate:SF
- (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
+ (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
- "TARGET_80387 && TARGET_SSE2"
+ "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
{
switch (which_alternative)
{
@@ -3713,7 +3808,30 @@
else
return "fst%z0\t%y0";
case 4:
- return "cvtsd2ss\t{%1, %0|%0, %1}";
+ return "#";
+ default:
+ abort ();
+ }
+}
+ [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
+ (set_attr "mode" "SF,SF,SF,SF,DF")])
+
+(define_insn "*truncdfsf2_1_sse_nooverlap"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ case 4:
+ return "#";
default:
abort ();
}
@@ -3724,8 +3842,8 @@
(define_insn "*truncdfsf2_2"
[(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
(float_truncate:SF
- (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
- "TARGET_80387 && TARGET_SSE2
+ (match_operand:DF 1 "nonimmediate_operand" "mY,f#Y")))]
+ "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
@@ -3744,7 +3862,30 @@
[(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
-(define_insn "truncdfsf2_3"
+(define_insn "*truncdfsf2_2_nooverlap"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "#";
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ default:
+ abort ();
+ }
+}
+ [(set_attr "type" "ssecvt,fmov")
+ (set_attr "mode" "DF,SF")])
+
+(define_insn "*truncdfsf2_3"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f")))]
@@ -3762,11 +3903,20 @@
[(set (match_operand:SF 0 "register_operand" "=Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
- "!TARGET_80387 && TARGET_SSE2"
+ "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
+(define_insn "*truncdfsf2_sse_only_nooverlap"
+ [(set (match_operand:SF 0 "register_operand" "=&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY")))]
+ "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
+ "#"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
@@ -3776,15 +3926,56 @@
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
(define_split
- [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ [(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_operand 2 "" ""))]
"TARGET_80387 && reload_completed
- && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
- [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
- "")
+ && SSE_REG_P (operands[0])
+ && !STACK_REG_P (operands[1])"
+ [(const_int 0)]
+{
+ rtx src, dest;
+ if (!TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS)
+ emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1]));
+ else
+ {
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ /* simplify_gen_subreg refuses to widen memory references. */
+ if (GET_CODE (src) == SUBREG)
+ alter_subreg (&src);
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ abort ();
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsd2ss (dest, dest, src));
+ }
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed
+ && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
+ [(const_int 0)]
+{
+ rtx src, dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ /* simplify_gen_subreg refuses to widen memory references. */
+ if (GET_CODE (src) == SUBREG)
+ alter_subreg (&src);
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ abort ();
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsd2ss (dest, dest, src));
+ DONE;
+})
(define_split
[(set (match_operand:SF 0 "register_operand" "")
@@ -4468,7 +4659,7 @@
"")
(define_insn "*floatsisf2_i387"
- [(set (match_operand:SF 0 "register_operand" "=f,?f,x")
+ [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))]
"TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)"
"@
@@ -4488,6 +4679,22 @@
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ss (dest, dest, operands[1]));
+ DONE;
+})
+
(define_expand "floatdisf2"
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
@@ -4506,7 +4713,7 @@
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_i387"
- [(set (match_operand:SF 0 "register_operand" "=f,?f,x")
+ [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))]
"TARGET_64BIT && TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)"
"@
@@ -4526,6 +4733,22 @@
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ssq (dest, dest, operands[1]));
+ DONE;
+})
+
(define_insn "floathidf2"
[(set (match_operand:DF 0 "register_operand" "=f,f")
(float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
@@ -4544,7 +4767,7 @@
"")
(define_insn "*floatsidf2_i387"
- [(set (match_operand:DF 0 "register_operand" "=f,?f,Y")
+ [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))]
"TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)"
"@
@@ -4582,7 +4805,7 @@
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_i387"
- [(set (match_operand:DF 0 "register_operand" "=f,?f,Y")
+ [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))]
"TARGET_64BIT && TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)"
"@
@@ -9269,12 +9492,15 @@
in register. */
rtx reg = gen_reg_rtx (SFmode);
rtx dest = operands[0];
+ rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
- emit_move_insn (reg,
- gen_lowpart (SFmode,
- gen_int_mode (0x80000000, SImode)));
+ reg = force_reg (V4SFmode,
+ gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_rtvec (4, imm, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode))));
emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9293,7 +9519,7 @@
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
(neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
- (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x"))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
@@ -9314,7 +9540,7 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9324,13 +9550,15 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "register_operand" ""))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (xor:TI (subreg:TI (match_dup 1) 0)
- (subreg:TI (match_dup 2) 0)))]
+ (xor:TI (match_dup 1)
+ (match_dup 2)))]
{
+ operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0);
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
@@ -9403,7 +9631,7 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (DFmode);
+ rtx reg;
#if HOST_BITS_PER_WIDE_INT >= 64
rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
@@ -9413,7 +9641,10 @@
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
- emit_move_insn (reg, gen_lowpart (DFmode, imm));
+ imm = gen_lowpart (DFmode, imm);
+ reg = force_reg (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9432,7 +9663,7 @@
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
(neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y"))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9442,8 +9673,8 @@
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
- (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0")))
- (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r"))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9454,7 +9685,7 @@
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(neg:DF (match_operand:DF 1 "memory_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9464,7 +9695,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])
&& (!TARGET_64BIT || FP_REG_P (operands[0]))"
@@ -9475,7 +9706,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9488,13 +9719,19 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "register_operand" ""))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (xor:TI (subreg:TI (match_dup 1) 0)
- (subreg:TI (match_dup 2) 0)))]
+ (xor:TI (match_dup 1)
+ (match_dup 2)))]
{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
@@ -9727,14 +9964,18 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (SFmode);
+ rtx reg = gen_reg_rtx (V4SFmode);
rtx dest = operands[0];
+ rtx imm;
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
- emit_move_insn (reg,
- gen_lowpart (SFmode,
- gen_int_mode (0x80000000, SImode)));
+ imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
+ reg = force_reg (V4SFmode,
+ gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_rtvec (4, imm, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode))));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9751,20 +9992,20 @@
"#")
(define_insn "abssf2_ifs"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf")
- (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0")))
- (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x"))
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
|| (register_operand (operands[0], VOIDmode)
- && register_operand (operands[1], VOIDmode)))"
+ && register_operand (operands[1], VOIDmode)))"
"#")
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(abs:SF (match_operand:SF 1 "memory_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9774,7 +10015,7 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9784,12 +10025,23 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "register_operand" ""))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (and:TI (not:TI (subreg:TI (match_dup 2) 0))
- (subreg:TI (match_dup 1) 0)))])
+ (and:TI (match_dup 1)
+ (match_dup 2)))]
+{
+ operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0);
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ rtx tmp;
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+})
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
@@ -9852,17 +10104,22 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (DFmode);
+ rtx reg = gen_reg_rtx (V2DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
- rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
+ rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
#else
- rtx imm = immed_double_const (0, 0x80000000, DImode);
+ rtx imm = immed_double_const (~0, ~0x80000000, DImode);
#endif
rtx dest = operands[0];
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
- emit_move_insn (reg, gen_lowpart (DFmode, imm));
+
+ /* Produce LONG_DOUBLE with the proper immediate argument. */
+ imm = gen_lowpart (DFmode, imm);
+ reg = force_reg (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9879,9 +10136,9 @@
"#")
(define_insn "absdf2_ifs"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf")
- (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9890,9 +10147,9 @@
"#")
(define_insn "*absdf2_ifs_rex64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr")
- (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9903,7 +10160,7 @@
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(abs:DF (match_operand:DF 1 "memory_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9913,7 +10170,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9923,12 +10180,27 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "register_operand" ""))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (and:TI (not:TI (subreg:TI (match_dup 2) 0))
- (subreg:TI (match_dup 1) 0)))])
+ (and:TI (match_dup 1)
+ (match_dup 2)))]
+{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0);
+ operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ rtx tmp;
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+})
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
@@ -13367,7 +13639,7 @@
(match_operand:SI 3 "" "")))])]
"!TARGET_64BIT"
{
- ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3]);
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0);
DONE;
})
@@ -13412,7 +13684,17 @@
(use (match_operand 2 "" ""))]
""
{
- ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL);
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0);
+ DONE;
+})
+
+(define_expand "sibcall"
+ [(call (match_operand:QI 0 "" "")
+ (match_operand 1 "" ""))
+ (use (match_operand 2 "" ""))]
+ ""
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1);
DONE;
})
@@ -13431,41 +13713,51 @@
(define_insn "*call_1"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))]
- "!TARGET_64BIT"
+ "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
{
if (constant_call_address_operand (operands[0], QImode))
- {
- if (SIBLING_CALL_P (insn))
- return "jmp\t%P0";
- else
- return "call\t%P0";
- }
- if (SIBLING_CALL_P (insn))
- return "jmp\t%A0";
- else
- return "call\t%A0";
+ return "call\t%P0";
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1"
+ [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a"))
+ (match_operand 1 "" ""))]
+ "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], QImode))
+ return "jmp\t%P0";
+ return "jmp\t%A0";
}
[(set_attr "type" "call")])
(define_insn "*call_1_rex64"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))]
- "TARGET_64BIT"
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
{
if (constant_call_address_operand (operands[0], QImode))
- {
- if (SIBLING_CALL_P (insn))
- return "jmp\t%P0";
- else
- return "call\t%P0";
- }
- if (SIBLING_CALL_P (insn))
- return "jmp\t%A0";
- else
- return "call\t%A0";
+ return "call\t%P0";
+ return "call\t%A0";
}
[(set_attr "type" "call")])
+(define_insn "*sibcall_1_rex64"
+ [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" ""))
+ (match_operand 1 "" ""))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t%P0"
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64_v"
+ [(call (mem:QI (reg:DI 40))
+ (match_operand 0 "" ""))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t*%%r11"
+ [(set_attr "type" "call")])
+
+
;; Call subroutine, returning value in operand 0
(define_expand "call_value_pop"
@@ -13478,7 +13770,7 @@
"!TARGET_64BIT"
{
ix86_expand_call (operands[0], operands[1], operands[2],
- operands[3], operands[4]);
+ operands[3], operands[4], 0);
DONE;
})
@@ -13490,7 +13782,19 @@
;; Operand 2 not used on the i386.
""
{
- ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL);
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0);
+ DONE;
+})
+
+(define_expand "sibcall_value"
+ [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (use (match_operand:SI 3 "" ""))]
+ ;; Operand 2 not used on the i386.
+ ""
+{
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1);
DONE;
})
@@ -13513,7 +13817,7 @@
ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1),
- NULL);
+ NULL, 0);
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
@@ -13662,11 +13966,7 @@
(clobber (mem:BLK (scratch)))]
"!TARGET_64BIT"
"leave"
- [(set_attr "length_immediate" "0")
- (set_attr "length" "1")
- (set_attr "modrm" "0")
- (set_attr "athlon_decode" "vector")
- (set_attr "ppro_uops" "few")])
+ [(set_attr "type" "leave")])
(define_insn "leave_rex64"
[(set (reg:DI 7) (plus:DI (reg:DI 6) (const_int 8)))
@@ -13674,11 +13974,7 @@
(clobber (mem:BLK (scratch)))]
"TARGET_64BIT"
"leave"
- [(set_attr "length_immediate" "0")
- (set_attr "length" "1")
- (set_attr "modrm" "0")
- (set_attr "athlon_decode" "vector")
- (set_attr "ppro_uops" "few")])
+ [(set_attr "type" "leave")])
(define_expand "ffssi2"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
@@ -14312,6 +14608,24 @@
(const_string "fop")))
(set_attr "mode" "SF")])
+(define_insn "*fop_df_6"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(float_extend:DF
+ (match_operand:SF 1 "register_operand" "0,f"))
+ (float_extend:DF
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
(define_insn "*fop_xf_1"
[(set (match_operand:XF 0 "register_operand" "=f,f")
(match_operator:XF 3 "binary_fp_operator"
@@ -14421,7 +14735,7 @@
(define_insn "*fop_xf_4"
[(set (match_operand:XF 0 "register_operand" "=f,f")
(match_operator:XF 3 "binary_fp_operator"
- [(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+ [(float_extend:XF (match_operand 1 "nonimmediate_operand" "fm,0"))
(match_operand:XF 2 "register_operand" "0,f")]))]
"!TARGET_64BIT && TARGET_80387"
"* return output_387_binary_op (insn, operands);"
@@ -14437,7 +14751,7 @@
(define_insn "*fop_tf_4"
[(set (match_operand:TF 0 "register_operand" "=f,f")
(match_operator:TF 3 "binary_fp_operator"
- [(float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+ [(float_extend:TF (match_operand 1 "nonimmediate_operand" "fm,0"))
(match_operand:TF 2 "register_operand" "0,f")]))]
"TARGET_80387"
"* return output_387_binary_op (insn, operands);"
@@ -14455,7 +14769,7 @@
(match_operator:XF 3 "binary_fp_operator"
[(match_operand:XF 1 "register_operand" "0,f")
(float_extend:XF
- (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ (match_operand 2 "nonimmediate_operand" "fm,0"))]))]
"!TARGET_64BIT && TARGET_80387"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
@@ -14472,7 +14786,7 @@
(match_operator:TF 3 "binary_fp_operator"
[(match_operand:TF 1 "register_operand" "0,f")
(float_extend:TF
- (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ (match_operand 2 "nonimmediate_operand" "fm,0"))]))]
"TARGET_80387"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
@@ -14487,41 +14801,10 @@
(define_insn "*fop_xf_6"
[(set (match_operand:XF 0 "register_operand" "=f,f")
(match_operator:XF 3 "binary_fp_operator"
- [(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,0"))
- (match_operand:XF 2 "register_operand" "0,f")]))]
- "!TARGET_64BIT && TARGET_80387"
- "* return output_387_binary_op (insn, operands);"
- [(set (attr "type")
- (cond [(match_operand:XF 3 "mult_operator" "")
- (const_string "fmul")
- (match_operand:XF 3 "div_operator" "")
- (const_string "fdiv")
- ]
- (const_string "fop")))
- (set_attr "mode" "DF")])
-
-(define_insn "*fop_tf_6"
- [(set (match_operand:TF 0 "register_operand" "=f,f")
- (match_operator:TF 3 "binary_fp_operator"
- [(float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,0"))
- (match_operand:TF 2 "register_operand" "0,f")]))]
- "TARGET_80387"
- "* return output_387_binary_op (insn, operands);"
- [(set (attr "type")
- (cond [(match_operand:TF 3 "mult_operator" "")
- (const_string "fmul")
- (match_operand:TF 3 "div_operator" "")
- (const_string "fdiv")
- ]
- (const_string "fop")))
- (set_attr "mode" "DF")])
-
-(define_insn "*fop_xf_7"
- [(set (match_operand:XF 0 "register_operand" "=f,f")
- (match_operator:XF 3 "binary_fp_operator"
- [(match_operand:XF 1 "register_operand" "0,f")
+ [(float_extend:XF
+ (match_operand 1 "register_operand" "0,f"))
(float_extend:XF
- (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))]
+ (match_operand 2 "nonimmediate_operand" "fm,0"))]))]
"!TARGET_64BIT && TARGET_80387"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
@@ -14531,14 +14814,15 @@
(const_string "fdiv")
]
(const_string "fop")))
- (set_attr "mode" "DF")])
+ (set_attr "mode" "SF")])
-(define_insn "*fop_tf_7"
+(define_insn "*fop_tf_6"
[(set (match_operand:TF 0 "register_operand" "=f,f")
(match_operator:TF 3 "binary_fp_operator"
- [(match_operand:TF 1 "register_operand" "0,f")
+ [(float_extend:TF
+ (match_operand 1 "register_operand" "0,f"))
(float_extend:TF
- (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))]
+ (match_operand 2 "nonimmediate_operand" "fm,0"))]))]
"TARGET_80387"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
@@ -14548,7 +14832,7 @@
(const_string "fdiv")
]
(const_string "fop")))
- (set_attr "mode" "DF")])
+ (set_attr "mode" "SF")])
(define_split
[(set (match_operand 0 "register_operand" "")
@@ -15949,9 +16233,9 @@
(define_expand "movhicc"
[(set (match_operand:HI 0 "register_operand" "")
(if_then_else:HI (match_operand 1 "comparison_operator" "")
- (match_operand:HI 2 "nonimmediate_operand" "")
- (match_operand:HI 3 "nonimmediate_operand" "")))]
- "TARGET_CMOVE && TARGET_HIMODE_MATH"
+ (match_operand:HI 2 "general_operand" "")
+ (match_operand:HI 3 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
"if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
(define_insn "*movhicc_noc"
@@ -15968,6 +16252,33 @@
[(set_attr "type" "icmov")
(set_attr "mode" "HI")])
+(define_expand "movqicc"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (if_then_else:QI (match_operand 1 "comparison_operator" "")
+ (match_operand:QI 2 "general_operand" "")
+ (match_operand:QI 3 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+
+(define_insn_and_split "*movqicc_noc"
+ [(set (match_operand:QI 0 "register_operand" "=r,r")
+ (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
+ [(match_operand 4 "flags_reg_operand" "") (const_int 0)])
+ (match_operand:QI 2 "register_operand" "r,0")
+ (match_operand:QI 3 "register_operand" "0,r")))]
+ "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 2)
+ (match_dup 3)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_lowpart (SImode, operands[3]);"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
(define_expand "movsfcc"
[(set (match_operand:SF 0 "register_operand" "")
(if_then_else:SF (match_operand 1 "comparison_operator" "")
@@ -16035,7 +16346,7 @@
(define_split
[(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
- [(match_operand 4 "" "") (const_int 0)])
+ [(match_operand 4 "flags_reg_operand" "") (const_int 0)])
(match_operand:DF 2 "nonimmediate_operand" "")
(match_operand:DF 3 "nonimmediate_operand" "")))]
"!TARGET_64BIT && reload_completed"
@@ -16549,6 +16860,12 @@
(clobber (reg:CC 17))]
"TARGET_SSE
&& (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
+ /* Avoid combine from being smart and converting min/max
+ instruction patterns into conditional moves. */
+ && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
+ && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
+ || !rtx_equal_p (operands[4], operands[2])
+ || !rtx_equal_p (operands[5], operands[3]))
&& (!TARGET_IEEE_FP
|| (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
"#")
@@ -16576,6 +16893,12 @@
(clobber (reg:CC 17))]
"TARGET_SSE2
&& (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
+ /* Avoid combine from being smart and converting min/max
+ instruction patterns into conditional moves. */
+ && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
+ && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
+ || !rtx_equal_p (operands[4], operands[2])
+ || !rtx_equal_p (operands[5], operands[3]))
&& (!TARGET_IEEE_FP
|| (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
"#")
@@ -16637,6 +16960,14 @@
(set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
(subreg:TI (match_dup 7) 0)))]
{
+ if (GET_MODE (operands[2]) == DFmode
+ && TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ {
+ rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
/* If op2 == op3, op3 will be clobbered before it is used.
This should be optimized out though. */
if (operands_match_p (operands[2], operands[3]))
@@ -16743,8 +17074,22 @@
|| const0_operand (operands[3], GET_MODE (operands[0])))"
[(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
(set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
- (subreg:TI (match_dup 7) 0)))]
+ (match_dup 7)))]
{
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size
+ && GET_MODE (operands[2]) == DFmode)
+ {
+ if (REG_P (operands[2]))
+ {
+ rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ if (REG_P (operands[3]))
+ {
+ rtx op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ }
PUT_MODE (operands[1], GET_MODE (operands[0]));
if (!sse_comparison_operator (operands[1], VOIDmode))
{
@@ -16764,6 +17109,8 @@
operands[7] = operands[2];
operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0);
}
+ operands[7] = simplify_gen_subreg (TImode, operands[7],
+ GET_MODE (operands[7]), 0);
})
(define_expand "allocate_stack_worker"
@@ -17759,19 +18106,23 @@
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
(match_operand:SI 2 "" "")))]
- "!TARGET_64BIT"
+ "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
{
if (constant_call_address_operand (operands[1], QImode))
- {
- if (SIBLING_CALL_P (insn))
- return "jmp\t%P1";
- else
- return "call\t%P1";
- }
- if (SIBLING_CALL_P (insn))
- return "jmp\t%*%1";
- else
- return "call\t%*%1";
+ return "call\t%P1";
+ return "call\t%*%1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a"))
+ (match_operand:SI 2 "" "")))]
+ "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], QImode))
+ return "jmp\t%P1";
+ return "jmp\t%*%1";
}
[(set_attr "type" "callv")])
@@ -17779,21 +18130,29 @@
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
(match_operand:DI 2 "" "")))]
- "TARGET_64BIT"
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
{
if (constant_call_address_operand (operands[1], QImode))
- {
- if (SIBLING_CALL_P (insn))
- return "jmp\t%P1";
- else
- return "call\t%P1";
- }
- if (SIBLING_CALL_P (insn))
- return "jmp\t%A1";
- else
- return "call\t%A1";
+ return "call\t%P1";
+ return "call\t%A1";
}
[(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "" "")))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t%P1"
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64_v"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (reg:DI 40))
+ (match_operand:DI 1 "" "")))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t*%%r11"
+ [(set_attr "type" "callv")])
(define_insn "trap"
[(trap_if (const_int 1) (const_int 5))]
@@ -17838,7 +18197,7 @@
{
operands[2] = gen_label_rtx ();
output_asm_insn ("j%c0\t%l2\; int\t%1", operands);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+ (*targetm.asm_out.internal_label) (asm_out_file, "L",
CODE_LABEL_NUMBER (operands[2]));
RET;
})
@@ -17851,28 +18210,93 @@
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+ operands[2] = CONST0_RTX (V4SFmode);
+})
+
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "movv2di_internal"
[(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE"
- ;; @@@ let's try to use movaps here.
- "movdqa\t{%1, %0|%0, %1}"
+ "TARGET_SSE2"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
+
+(define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE2"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+ operands[2] = CONST0_RTX (V2DFmode);
+})
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
@@ -17922,28 +18346,85 @@
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movapd\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movapd\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "V2DF"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "V2DF"))]
+ (const_string "V2DF")))])
(define_insn "movv8hi_internal"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
(match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "movv16qi_internal"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_expand "movv2df"
[(set (match_operand:V2DF 0 "general_operand" "")
@@ -18160,26 +18641,83 @@
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
(match_operand:TI 1 "general_operand" "C,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
- "@
- xorps\t%0, %0
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+}
[(set_attr "type" "ssemov,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "2")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "*movti_rex64"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+ (match_operand:TI 1 "general_operand" "riFo,riF,O,xm,x"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
- "@
- #
- #
- xorps\t%0, %0
- movaps\\t{%1, %0|%0, %1}
- movaps\\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "#";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 3:
+ case 4:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+}
[(set_attr "type" "*,*,ssemov,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "4")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -18329,11 +18867,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" "")]
+ "TARGET_SSE"
+{
+ emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+ CONST0_RTX (V4SFmode)));
+ DONE;
+})
+
+(define_insn "sse_loadss_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (match_operand:V4SF 1 "memory_operand" "m")
- (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+ (match_operand:V4SF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
@@ -18856,12 +19404,26 @@
;; this insn.
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(const_int 0)] UNSPEC_NOP))]
+ (match_operand:V4SF 1 "const0_operand" "X"))]
"TARGET_SSE"
- "xorps\t{%0, %0|%0, %0}"
+{
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t{%0, %0|%0, %0}";
+ else
+ return "xorps\t{%0, %0|%0, %0}";
+}
[(set_attr "type" "sselog")
(set_attr "memory" "none")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF")))])
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
@@ -19093,6 +19655,18 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -19883,7 +20457,7 @@
output_asm_insn (\"rex\", operands);
output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
}
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+ (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
CODE_LABEL_NUMBER (operands[3]));
RET;
}
@@ -20720,7 +21294,7 @@
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 2 "register_operand" "xm")))
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
(const_int 14)))]
"TARGET_SSE2"
"cvtsd2ss\t{%2, %0|%0, %2}"
@@ -20732,7 +21306,7 @@
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "register_operand" "xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)])))
(const_int 2)))]
@@ -21008,10 +21582,20 @@
(define_insn "sse2_clrti"
[(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
"TARGET_SSE2"
- "pxor\t{%0, %0|%0, %0}"
- [(set_attr "type" "sseiadd")
+{
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t%0, %0";
+ else
+ return "xorps\t%0, %0";
+}
+ [(set_attr "type" "ssemov")
(set_attr "memory" "none")
- (set_attr "mode" "TI")])
+ (set (attr "mode")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI")))])
;; MMX unsigned averages/sum of absolute differences
@@ -21716,11 +22300,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
-(define_insn "sse2_loadsd"
+(define_expand "sse2_loadsd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" "")]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+ CONST0_RTX (V2DFmode)));
+ DONE;
+})
+
+(define_insn "sse2_loadsd_1"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (match_operand:DF 1 "memory_operand" "m")
- (vec_duplicate:DF (float:DF (const_int 0)))
+ (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+ (match_operand:V2DF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE2"
"movsd\t{%1, %0|%0, %1}"
diff --git a/gcc/config/i386/k6.md b/gcc/config/i386/k6.md
index af128bfe037..d9f6d84b022 100644
--- a/gcc/config/i386/k6.md
+++ b/gcc/config/i386/k6.md
@@ -71,7 +71,7 @@
;; Load unit have two cycle latency, but we take care for it in adjust_cost
(define_function_unit "k6_load" 1 0
(and (eq_attr "cpu" "k6")
- (ior (eq_attr "type" "pop")
+ (ior (eq_attr "type" "pop,leave")
(eq_attr "memory" "load,both")))
1 1)
diff --git a/gcc/config/i386/lynx-ng.h b/gcc/config/i386/lynx-ng.h
index 08fa60f430c..8d41add6cee 100644
--- a/gcc/config/i386/lynx-ng.h
+++ b/gcc/config/i386/lynx-ng.h
@@ -68,10 +68,3 @@ Boston, MA 02111-1307, USA. */
#undef ASM_GENERATE_INTERNAL_LABEL
#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER))
-
-/* This is how to output an internal numbered label where
- PREFIX is the class of label and NUM is the number within the class. */
-
-#undef ASM_OUTPUT_INTERNAL_LABEL
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, ".%s%d:\n", PREFIX, NUM)
diff --git a/gcc/config/i386/lynx.h b/gcc/config/i386/lynx.h
index 7835f2713f2..bdbfbe65f0f 100644
--- a/gcc/config/i386/lynx.h
+++ b/gcc/config/i386/lynx.h
@@ -69,10 +69,3 @@ Boston, MA 02111-1307, USA. */
#undef ASM_GENERATE_INTERNAL_LABEL
#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER))
-
-/* This is how to output an internal numbered label where
- PREFIX is the class of label and NUM is the number within the class. */
-
-#undef ASM_OUTPUT_INTERNAL_LABEL
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, ".%s%d:\n", PREFIX, NUM)
diff --git a/gcc/config/i386/pentium.md b/gcc/config/i386/pentium.md
index b4c5ece3678..24f8becb02a 100644
--- a/gcc/config/i386/pentium.md
+++ b/gcc/config/i386/pentium.md
@@ -194,7 +194,7 @@
(define_insn_reservation "pent_pop" 1
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "pop"))
+ (eq_attr "type" "pop,leave"))
"pentium-firstuv")
;; Call and branch instruction can execute in either pipe, but
diff --git a/gcc/config/i386/ppro.md b/gcc/config/i386/ppro.md
index 86906856469..67cc1f1e293 100644
--- a/gcc/config/i386/ppro.md
+++ b/gcc/config/i386/ppro.md
@@ -29,7 +29,7 @@
(define_attr "ppro_uops" "one,few,many"
(cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
(const_string "many")
- (eq_attr "type" "icmov,fcmov,str,cld")
+ (eq_attr "type" "icmov,fcmov,str,cld,leave")
(const_string "few")
(eq_attr "type" "imov")
(if_then_else (eq_attr "memory" "store,both")
@@ -118,7 +118,7 @@
(define_function_unit "ppro_p2" 1 0
(and (eq_attr "cpu" "pentiumpro")
- (ior (eq_attr "type" "pop")
+ (ior (eq_attr "type" "pop,leave")
(eq_attr "memory" "load,both")))
3 1)
diff --git a/gcc/config/i386/sco5.h b/gcc/config/i386/sco5.h
index 815e45771d6..3b25b65d826 100644
--- a/gcc/config/i386/sco5.h
+++ b/gcc/config/i386/sco5.h
@@ -343,7 +343,7 @@ do { \
do { \
if (TARGET_ELF) \
ASM_OUTPUT_ALIGN ((FILE), 2); \
- ASM_OUTPUT_INTERNAL_LABEL((FILE),(PREFIX),(NUM)); \
+ (*targetm.asm_out.internal_label)((FILE),(PREFIX),(NUM)); \
} while (0)
#undef ASM_OUTPUT_IDENT
@@ -354,10 +354,6 @@ do { \
#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
if (TARGET_ELF) (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0))
-#undef ASM_OUTPUT_INTERNAL_LABEL
-#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, ".%s%d:\n", PREFIX, NUM)
-
/* The prefix to add to user-visible assembler symbols. */
#undef USER_LABEL_PREFIX
diff --git a/gcc/config/i386/t-cygwin b/gcc/config/i386/t-cygwin
index 6fcb8340ddc..b4ea698cbc2 100644
--- a/gcc/config/i386/t-cygwin
+++ b/gcc/config/i386/t-cygwin
@@ -14,7 +14,9 @@ LIBGCC2_INCLUDES = -I$(srcdir)/../winsup/include \
-I$(srcdir)/../winsup/cygwin/include \
-I$(srcdir)/../winsup/w32api/include
-winnt.o: $(srcdir)/config/i386/winnt.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_P_H)
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/winnt.c
# Don't run fixproto
diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix
index 710de8b0881..d5fff6167b7 100644
--- a/gcc/config/i386/t-interix
+++ b/gcc/config/i386/t-interix
@@ -1,6 +1,7 @@
LIB1ASMSRC = i386/cygwin.asm
LIB1ASMFUNCS = _chkstk
-winnt.o: $(srcdir)/config/i386/winnt.c $(TM_P_H)
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/winnt.c
-
diff --git a/gcc/config/i386/vxi386.h b/gcc/config/i386/vxi386.h
deleted file mode 100644
index ee4a74093f8..00000000000
--- a/gcc/config/i386/vxi386.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* Definitions of target machine for GNU compiler. VxWorks i386 version.
- Copyright (C) 1998, 2002 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-#undef TARGET_VERSION
-#define TARGET_VERSION fprintf (stderr, " (80386, VxWorks BSD syntax)");
-
-#define TARGET_OS_CPP_BUILTINS() \
- do \
- { \
- builtin_define ("__vxworks"); \
- builtin_assert ("system=unix"); \
- \
- if (TARGET_386) \
- builtin_define ("CPU=I80386"); \
- else if (TARGET_486) \
- builtin_define ("CPU=I80486"); \
- else if (TARGET_PENTIUM) \
- { \
- builtin_define ("CPU=PENTIUM"); \
- builtin_define ("CPU_VARIANT=PENTIUM"); \
- } \
- else if (TARGET_PENTIUMPRO) \
- { \
- builtin_define ("CPU=PENTIUM"); \
- builtin_define ("CPU_VARIANT=PENTIUMPRO"); \
- } \
- } \
- while (0)
-
-#define HANDLE_SYSV_PRAGMA 1
-
-/* VxWorks does all the library stuff itself. */
-
-#undef LIB_SPEC
-#define LIB_SPEC ""
-
-/* VxWorks uses object files, not loadable images. make linker just
- combine objects. */
-
-#undef LINK_SPEC
-#define LINK_SPEC "-r"
-
-/* VxWorks provides the functionality of crt0.o and friends itself. */
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC ""
-
-#undef ENDFILE_SPEC
-#define ENDFILE_SPEC ""
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index 00b3dfd0442..bc2527aaca5 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -22,6 +22,8 @@ Boston, MA 02111-1307, USA. */
#include "config.h"
#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
diff --git a/gcc/config/i386/xm-i386-interix.h b/gcc/config/i386/xm-i386-interix.h
deleted file mode 100644
index bd010e47bc5..00000000000
--- a/gcc/config/i386/xm-i386-interix.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Configuration for GNU compiler
- for an Intel i386 or later processor running Interix.
- Copyright (C) 1999 Free Software Foundation, Inc.
- Contributed by Donn Terry (donn@interix.com)
- Derived from code by Douglas B. Rupp (drupp@cs.washington.edu)
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-#define HOST_BITS_PER_WIDEST_INT HOST_BITS_PER_LONGLONG
-#ifdef __GNUC__
-# define HOST_WIDEST_INT long long
-#else
-# define HOST_WIDEST_INT __int64
-#endif
-#define HOST_WIDEST_INT_PRINT_DEC "%lld"
-#define HOST_WIDEST_INT_PRINT_UNSIGNED "%llu"
-#define HOST_WIDEST_INT_PRINT_HEX "0x%llx"