diff options
Diffstat (limited to 'closures/compiler/x86/nx86add.pas')
-rw-r--r-- | closures/compiler/x86/nx86add.pas | 1080 |
1 files changed, 1080 insertions, 0 deletions
diff --git a/closures/compiler/x86/nx86add.pas b/closures/compiler/x86/nx86add.pas new file mode 100644 index 0000000000..b1285d29b8 --- /dev/null +++ b/closures/compiler/x86/nx86add.pas @@ -0,0 +1,1080 @@ +{ + Copyright (c) 2000-2002 by Florian Klaempfl + + Common code generation for add nodes on the i386 and x86 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + **************************************************************************** +} +unit nx86add; + +{$i fpcdefs.inc} + + interface + + uses + cgbase, + cpubase, + node,nadd,ncgadd; + + type + tx86addnode = class(tcgaddnode) + protected + function getresflags(unsigned : boolean) : tresflags; + procedure left_must_be_reg(opsize:TCGSize;noswap:boolean); + procedure check_left_and_right_fpureg(force_fpureg: boolean); + procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize); + procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean); + + procedure second_cmpfloatsse; + procedure second_addfloatsse; + public + procedure second_addfloat;override; + procedure second_addsmallset;override; + procedure second_add64bit;override; + procedure second_cmpfloat;override; + procedure second_cmpsmallset;override; + procedure second_cmp64bit;override; + procedure second_cmpordinal;override; +{$ifdef SUPPORT_MMX} + procedure second_opmmx;override; +{$endif SUPPORT_MMX} + procedure second_opvector;override; + end; + + + implementation + + uses + globtype,globals, + verbose,cutils, + cpuinfo, + aasmbase,aasmtai,aasmdata,aasmcpu, + symconst,symdef, + cgobj,cgx86,cga,cgutils, + paramgr,tgobj,ncgutil, + ncon,nset,ninl, + defutil; + + +{***************************************************************************** + Helpers +*****************************************************************************} + + procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean); + var + power : longint; + hl4 : tasmlabel; + r : Tregister; + begin + { at this point, left.location.loc should be LOC_REGISTER } + if right.location.loc=LOC_REGISTER then + begin + { right.location is a LOC_REGISTER } + { when swapped another result register } + if (nodetype=subn) and (nf_swapped in flags) then + begin + if extra_not then + emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register); + emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register); + { newly swapped also set swapped flag } + location_swap(left.location,right.location); + toggleflag(nf_swapped); + end + else + begin + if extra_not then + emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register); + if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then + location_swap(left.location,right.location); + emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register); + end; + end + else + begin + { right.location is not a LOC_REGISTER } + if (nodetype=subn) and (nf_swapped in flags) then + begin + if extra_not then + cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register); + r:=cg.getintregister(current_asmdata.CurrAsmList,opsize); + cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r); + emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r); + cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register); + end + else + begin + { Optimizations when right.location is a constant value } + if (op=A_CMP) and + (nodetype in [equaln,unequaln]) and + (right.location.loc=LOC_CONSTANT) and + (right.location.value=0) then + begin + emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register); + end + else + if (op=A_ADD) and + (right.location.loc=LOC_CONSTANT) and + (right.location.value=1) and + not(cs_check_overflow in current_settings.localswitches) then + begin + emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register); + end + else + if (op=A_SUB) and + (right.location.loc=LOC_CONSTANT) and + (right.location.value=1) and + not(cs_check_overflow in current_settings.localswitches) then + begin + emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register); + end + else + if (op=A_IMUL) and + (right.location.loc=LOC_CONSTANT) and + (ispowerof2(int64(right.location.value),power)) and + not(cs_check_overflow in current_settings.localswitches) then + begin + emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register); + end + else + begin + if extra_not then + begin + r:=cg.getintregister(current_asmdata.CurrAsmList,opsize); + cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r); + emit_reg(A_NOT,TCGSize2Opsize[opsize],r); + emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register); + end + else + begin + emit_op_right_left(op,opsize); + end; + end; + end; + end; + + { only in case of overflow operations } + { produce overflow code } + { we must put it here directly, because sign of operation } + { is in unsigned VAR!! } + if mboverflow then + begin + if cs_check_overflow in current_settings.localswitches then + begin + current_asmdata.getjumplabel(hl4); + if unsigned then + cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4) + else + cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4); + cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false); + cg.a_label(current_asmdata.CurrAsmList,hl4); + end; + end; + end; + + + procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean); + begin + { left location is not a register? } + if (left.location.loc<>LOC_REGISTER) then + begin + { if right is register then we can swap the locations } + if (not noswap) and + (right.location.loc=LOC_REGISTER) then + begin + location_swap(left.location,right.location); + toggleflag(nf_swapped); + end + else + begin + { maybe we can reuse a constant register when the + operation is a comparison that doesn't change the + value of the register } + location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln])); + end; + end; + if (right.location.loc<>LOC_CONSTANT) and + (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then + location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true); + if (left.location.loc<>LOC_CONSTANT) and + (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then + location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false); + end; + + + procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean); + begin + if (right.location.loc<>LOC_FPUREGISTER) then + begin + if (force_fpureg) then + begin + location_force_fpureg(current_asmdata.CurrAsmList,right.location,false); + if (left.location.loc<>LOC_FPUREGISTER) then + location_force_fpureg(current_asmdata.CurrAsmList,left.location,false) + else + { left was on the stack => swap } + toggleflag(nf_swapped); + end + end + { the nominator in st0 } + else if (left.location.loc<>LOC_FPUREGISTER) then + begin + if (force_fpureg) then + location_force_fpureg(current_asmdata.CurrAsmList,left.location,false) + end + else + begin + { fpu operands are always in the wrong order on the stack } + toggleflag(nf_swapped); + end; + end; + + + procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize); +{$ifdef x86_64} + var + tmpreg : tregister; +{$endif x86_64} + begin + if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then + location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true); + { left must be a register } + case right.location.loc of + LOC_REGISTER, + LOC_CREGISTER : + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register)); + LOC_REFERENCE, + LOC_CREFERENCE : + begin + tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference); + current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register)); + end; + LOC_CONSTANT : + begin +{$ifdef x86_64} + { x86_64 only supports signed 32 bits constants directly } + if (opsize in [OS_S64,OS_64]) and + ((right.location.value<low(longint)) or (right.location.value>high(longint))) then + begin + tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize); + cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg); + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register)); + end + else +{$endif x86_64} + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register)); + end; + else + internalerror(200203232); + end; + end; + + + function tx86addnode.getresflags(unsigned : boolean) : tresflags; + begin + case nodetype of + equaln : getresflags:=F_E; + unequaln : getresflags:=F_NE; + else + if not(unsigned) then + begin + if nf_swapped in flags then + case nodetype of + ltn : getresflags:=F_G; + lten : getresflags:=F_GE; + gtn : getresflags:=F_L; + gten : getresflags:=F_LE; + end + else + case nodetype of + ltn : getresflags:=F_L; + lten : getresflags:=F_LE; + gtn : getresflags:=F_G; + gten : getresflags:=F_GE; + end; + end + else + begin + if nf_swapped in flags then + case nodetype of + ltn : getresflags:=F_A; + lten : getresflags:=F_AE; + gtn : getresflags:=F_B; + gten : getresflags:=F_BE; + end + else + case nodetype of + ltn : getresflags:=F_B; + lten : getresflags:=F_BE; + gtn : getresflags:=F_A; + gten : getresflags:=F_AE; + end; + end; + end; + end; + + +{***************************************************************************** + AddSmallSet +*****************************************************************************} + + procedure tx86addnode.second_addsmallset; + var + setbase : aint; + opsize : TCGSize; + op : TAsmOp; + extra_not, + noswap : boolean; + all_member_optimization:boolean; + + begin + pass_left_right; + + noswap:=false; + extra_not:=false; + all_member_optimization:=false; + opsize:=int_cgsize(resultdef.size); + if (left.resultdef.typ=setdef) then + setbase:=tsetdef(left.resultdef).setbase + else + setbase:=tsetdef(right.resultdef).setbase; + case nodetype of + addn : + begin + { adding elements is not commutative } + if (nf_swapped in flags) and (left.nodetype=setelementn) then + swapleftright; + { are we adding set elements ? } + if right.nodetype=setelementn then + begin + { no range support for smallsets! } + if assigned(tsetelementnode(right).right) then + internalerror(43244); + { btsb isn't supported } + if opsize=OS_8 then + opsize:=OS_32; + { bts requires both elements to be registers } + location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false); + location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true); + register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase); + op:=A_BTS; + noswap:=true; + end + else + op:=A_OR; + end; + symdifn : + op:=A_XOR; + muln : + op:=A_AND; + subn : + begin + op:=A_AND; + if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or + ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then + all_member_optimization:=true; + + if (not(nf_swapped in flags)) and + (right.location.loc=LOC_CONSTANT) then + right.location.value := not(right.location.value) + else if (nf_swapped in flags) and + (left.location.loc=LOC_CONSTANT) then + left.location.value := not(left.location.value) + else + extra_not:=true; + end; + xorn : + op:=A_XOR; + orn : + op:=A_OR; + andn : + op:=A_AND; + else + internalerror(2003042215); + end; + if all_member_optimization then + begin + {A set expression [0..31]-x can be implemented with a simple NOT.} + if nf_swapped in flags then + begin + { newly swapped also set swapped flag } + location_swap(left.location,right.location); + toggleflag(nf_swapped); + end; + location_force_reg(current_asmdata.currAsmList,right.location,opsize,false); + emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register); + location:=right.location; + end + else + begin + { left must be a register } + left_must_be_reg(opsize,noswap); + emit_generic_code(op,opsize,true,extra_not,false); + location_freetemp(current_asmdata.CurrAsmList,right.location); + + { left is always a register and contains the result } + location:=left.location; + end; + + { fix the changed opsize we did above because of the missing btsb } + if opsize<>int_cgsize(resultdef.size) then + location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false); + end; + + + procedure tx86addnode.second_cmpsmallset; + var + opsize : TCGSize; + op : TAsmOp; + begin + pass_left_right; + opsize:=int_cgsize(left.resultdef.size); + case nodetype of + equaln, + unequaln : + op:=A_CMP; + lten,gten: + begin + if (not(nf_swapped in flags) and (nodetype = lten)) or + ((nf_swapped in flags) and (nodetype = gten)) then + swapleftright; + location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false); + emit_op_right_left(A_AND,opsize); + op:=A_CMP; + { warning: ugly hack, we need a JE so change the node to equaln } + nodetype:=equaln; + end; + else + internalerror(2003042215); + end; + { left must be a register } + left_must_be_reg(opsize,false); + emit_generic_code(op,opsize,true,false,false); + location_freetemp(current_asmdata.CurrAsmList,right.location); + location_freetemp(current_asmdata.CurrAsmList,left.location); + + location_reset(location,LOC_FLAGS,OS_NO); + location.resflags:=getresflags(true); + end; + + +{***************************************************************************** + AddMMX +*****************************************************************************} + +{$ifdef SUPPORT_MMX} + procedure tx86addnode.second_opmmx; + var + op : TAsmOp; + cmpop : boolean; + mmxbase : tmmxtype; + hreg, + hregister : tregister; + begin + pass_left_right; + + cmpop:=false; + mmxbase:=mmx_type(left.resultdef); + location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef)); + case nodetype of + addn : + begin + if (cs_mmx_saturation in current_settings.localswitches) then + begin + case mmxbase of + mmxs8bit: + op:=A_PADDSB; + mmxu8bit: + op:=A_PADDUSB; + mmxs16bit,mmxfixed16: + op:=A_PADDSW; + mmxu16bit: + op:=A_PADDUSW; + end; + end + else + begin + case mmxbase of + mmxs8bit,mmxu8bit: + op:=A_PADDB; + mmxs16bit,mmxu16bit,mmxfixed16: + op:=A_PADDW; + mmxs32bit,mmxu32bit: + op:=A_PADDD; + end; + end; + end; + muln : + begin + case mmxbase of + mmxs16bit,mmxu16bit: + op:=A_PMULLW; + mmxfixed16: + op:=A_PMULHW; + end; + end; + subn : + begin + if (cs_mmx_saturation in current_settings.localswitches) then + begin + case mmxbase of + mmxs8bit: + op:=A_PSUBSB; + mmxu8bit: + op:=A_PSUBUSB; + mmxs16bit,mmxfixed16: + op:=A_PSUBSB; + mmxu16bit: + op:=A_PSUBUSW; + end; + end + else + begin + case mmxbase of + mmxs8bit,mmxu8bit: + op:=A_PSUBB; + mmxs16bit,mmxu16bit,mmxfixed16: + op:=A_PSUBW; + mmxs32bit,mmxu32bit: + op:=A_PSUBD; + end; + end; + end; + xorn: + op:=A_PXOR; + orn: + op:=A_POR; + andn: + op:=A_PAND; + else + internalerror(2003042214); + end; + + { left and right no register? } + { then one must be demanded } + if (left.location.loc<>LOC_MMXREGISTER) then + begin + if (right.location.loc=LOC_MMXREGISTER) then + begin + location_swap(left.location,right.location); + toggleflag(nf_swapped); + end + else + begin + { register variable ? } + if (left.location.loc=LOC_CMMXREGISTER) then + begin + hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList); + emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister); + end + else + begin + if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then + internalerror(200203245); + + hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList); + tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference); + emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister); + end; + + location_reset(left.location,LOC_MMXREGISTER,OS_NO); + left.location.register:=hregister; + end; + end; + + { at this point, left.location.loc should be LOC_MMXREGISTER } + if right.location.loc<>LOC_MMXREGISTER then + begin + if (nodetype=subn) and (nf_swapped in flags) then + begin + hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList); + if right.location.loc=LOC_CMMXREGISTER then + begin + emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg); + emit_reg_reg(op,S_NO,left.location.register,hreg); + end + else + begin + if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then + internalerror(200203247); + tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference); + emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg); + emit_reg_reg(op,S_NO,left.location.register,hreg); + end; + location.register:=hreg; + end + else + begin + if (right.location.loc=LOC_CMMXREGISTER) then + emit_reg_reg(op,S_NO,right.location.register,left.location.register) + else + begin + if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then + internalerror(200203246); + tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference); + emit_ref_reg(op,S_NO,right.location.reference,left.location.register); + end; + location.register:=left.location.register; + end; + end + else + begin + { right.location=LOC_MMXREGISTER } + if (nodetype=subn) and (nf_swapped in flags) then + begin + emit_reg_reg(op,S_NO,left.location.register,right.location.register); + location_swap(left.location,right.location); + toggleflag(nf_swapped); + end + else + begin + emit_reg_reg(op,S_NO,right.location.register,left.location.register); + end; + location.register:=left.location.register; + end; + + location_freetemp(current_asmdata.CurrAsmList,right.location); + if cmpop then + location_freetemp(current_asmdata.CurrAsmList,left.location); + end; +{$endif SUPPORT_MMX} + + +{***************************************************************************** + AddFloat +*****************************************************************************} + + procedure tx86addnode.second_addfloatsse; + var + op : topcg; + sqr_sum : boolean; + tmp : tnode; + begin + sqr_sum:=false; + if (current_settings.fputype>=fpu_sse3) and + use_vectorfpu(resultdef) and + (nodetype in [addn,subn]) and + (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and + (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then + begin + sqr_sum:=true; + tmp:=tinlinenode(left).left; + tinlinenode(left).left:=nil; + left.free; + left:=tmp; + + tmp:=tinlinenode(right).left; + tinlinenode(right).left:=nil; + right.free; + right:=tmp; + end; + + pass_left_right; + check_left_and_right_fpureg(false); + + if (nf_swapped in flags) then + { can't use swapleftright if both are on the fpu stack, since then } + { both are "R_ST" -> nothing would change -> manually switch } + if (left.location.loc = LOC_FPUREGISTER) and + (right.location.loc = LOC_FPUREGISTER) then + emit_none(A_FXCH,S_NO) + else + swapleftright; + + case nodetype of + addn : + op:=OP_ADD; + muln : + op:=OP_MUL; + subn : + op:=OP_SUB; + slashn : + op:=OP_DIV; + else + internalerror(200312231); + end; + + location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); + + if sqr_sum then + begin + if nf_swapped in flags then + swapleftright; + + location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true); + location:=left.location; + if is_double(resultdef) then + begin + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register)); + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register)); + case nodetype of + addn: + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register)); + subn: + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register)); + else + internalerror(201108162); + end; + end + else + begin + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register)); + { ensure that bits 64..127 contain valid values } + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register)); + { the data is now in bits 0..32 and 64..95 } + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register)); + case nodetype of + addn: + begin + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register)); + end; + subn: + begin + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register)); + end; + else + internalerror(201108163); + end; + end + end + { we can use only right as left operand if the operation is commutative } + else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then + begin + location.register:=right.location.register; + { force floating point reg. location to be written to memory, + we don't force it to mm register because writing to memory + allows probably shorter code because there is no direct fpu->mm register + copy instruction + } + if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then + location_force_mem(current_asmdata.CurrAsmList,left.location); + cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar); + end + else + begin + if (nf_swapped in flags) then + swapleftright; + + location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + location.register:=left.location.register; + { force floating point reg. location to be written to memory, + we don't force it to mm register because writing to memory + allows probably shorter code because there is no direct fpu->mm register + copy instruction + } + if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then + location_force_mem(current_asmdata.CurrAsmList,right.location); + cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar); + end; + end; + + + procedure tx86addnode.second_cmpfloatsse; + var + op : tasmop; + begin + if is_single(left.resultdef) then + op:=A_COMISS + else if is_double(left.resultdef) then + op:=A_COMISD + else + internalerror(200402222); + pass_left_right; + + location_reset(location,LOC_FLAGS,def_cgsize(resultdef)); + { we can use only right as left operand if the operation is commutative } + if (right.location.loc=LOC_MMREGISTER) then + begin + { force floating point reg. location to be written to memory, + we don't force it to mm register because writing to memory + allows probably shorter code because there is no direct fpu->mm register + copy instruction + } + if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then + location_force_mem(current_asmdata.CurrAsmList,left.location); + case left.location.loc of + LOC_REFERENCE,LOC_CREFERENCE: + begin + tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference); + current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register)); + end; + LOC_MMREGISTER,LOC_CMMREGISTER: + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register)); + else + internalerror(200402221); + end; + if nf_swapped in flags then + exclude(flags,nf_swapped) + else + include(flags,nf_swapped) + end + else + begin + location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + { force floating point reg. location to be written to memory, + we don't force it to mm register because writing to memory + allows probably shorter code because there is no direct fpu->mm register + copy instruction + } + if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then + location_force_mem(current_asmdata.CurrAsmList,right.location); + case right.location.loc of + LOC_REFERENCE,LOC_CREFERENCE: + begin + tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference); + current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register)); + end; + LOC_MMREGISTER,LOC_CMMREGISTER: + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register)); + else + internalerror(200402223); + end; + end; + location.resflags:=getresflags(true); + end; + + + procedure tx86addnode.second_opvector; + var + op : topcg; + begin + pass_left_right; + if (nf_swapped in flags) then + swapleftright; + + case nodetype of + addn : + op:=OP_ADD; + muln : + op:=OP_MUL; + subn : + op:=OP_SUB; + slashn : + op:=OP_DIV; + else + internalerror(200610071); + end; + + if fits_in_mm_register(left.resultdef) then + begin + location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); + { we can use only right as left operand if the operation is commutative } + if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then + begin + location.register:=right.location.register; + cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil); + end + else + begin + location_force_mmreg(current_asmdata.CurrAsmList,left.location,false); + location.register:=left.location.register; + cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op, + tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil); + end; + end + else + begin + { not yet supported } + internalerror(200610072); + end + end; + + + procedure tx86addnode.second_addfloat; + var + op : TAsmOp; + begin + if use_vectorfpu(resultdef) then + begin + second_addfloatsse; + exit; + end; + + pass_left_right; + + case nodetype of + addn : + op:=A_FADDP; + muln : + op:=A_FMULP; + subn : + op:=A_FSUBP; + slashn : + op:=A_FDIVP; + else + internalerror(2003042214); + end; + + check_left_and_right_fpureg(true); + + { if we swaped the tree nodes, then use the reverse operator } + if nf_swapped in flags then + begin + if (nodetype=slashn) then + op:=A_FDIVRP + else if (nodetype=subn) then + op:=A_FSUBRP; + end; + + emit_reg_reg(op,S_NO,NR_ST,NR_ST1); + tcgx86(cg).dec_fpu_stack; + + location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef)); + location.register:=NR_ST; + end; + + + procedure tx86addnode.second_cmpfloat; + var + resflags : tresflags; + begin + if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then + begin + second_cmpfloatsse; + exit; + end; + + pass_left_right; + check_left_and_right_fpureg(true); + +{$ifndef x86_64} + if current_settings.cputype<cpu_Pentium2 then + begin + emit_none(A_FCOMPP,S_NO); + tcgx86(cg).dec_fpu_stack; + tcgx86(cg).dec_fpu_stack; + + { load fpu flags } + cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX); + emit_reg(A_FSTSW,S_NO,NR_AX); + emit_none(A_SAHF,S_NO); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX); + if nf_swapped in flags then + begin + case nodetype of + equaln : resflags:=F_E; + unequaln : resflags:=F_NE; + ltn : resflags:=F_A; + lten : resflags:=F_AE; + gtn : resflags:=F_B; + gten : resflags:=F_BE; + end; + end + else + begin + case nodetype of + equaln : resflags:=F_E; + unequaln : resflags:=F_NE; + ltn : resflags:=F_B; + lten : resflags:=F_BE; + gtn : resflags:=F_A; + gten : resflags:=F_AE; + end; + end; + end + else +{$endif x86_64} + begin + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0)); + { fcomip pops only one fpu register } + current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0)); + tcgx86(cg).dec_fpu_stack; + tcgx86(cg).dec_fpu_stack; + + { load fpu flags } + if nf_swapped in flags then + begin + case nodetype of + equaln : resflags:=F_E; + unequaln : resflags:=F_NE; + ltn : resflags:=F_A; + lten : resflags:=F_AE; + gtn : resflags:=F_B; + gten : resflags:=F_BE; + end; + end + else + begin + case nodetype of + equaln : resflags:=F_E; + unequaln : resflags:=F_NE; + ltn : resflags:=F_B; + lten : resflags:=F_BE; + gtn : resflags:=F_A; + gten : resflags:=F_AE; + end; + end; + end; + + location_reset(location,LOC_FLAGS,OS_NO); + location.resflags:=resflags; + end; + + +{***************************************************************************** + Add64bit +*****************************************************************************} + + procedure tx86addnode.second_add64bit; + begin +{$ifdef cpu64bitalu} + second_addordinal; +{$else cpu64bitalu} + { must be implemented separate } + internalerror(200402042); +{$endif cpu64bitalu} + end; + + + procedure tx86addnode.second_cmp64bit; + begin +{$ifdef cpu64bitalu} + second_cmpordinal; +{$else cpu64bitalu} + { must be implemented separate } + internalerror(200402043); +{$endif cpu64bitalu} + end; + + +{***************************************************************************** + AddOrdinal +*****************************************************************************} + + procedure tx86addnode.second_cmpordinal; + var + opsize : tcgsize; + unsigned : boolean; + begin + unsigned:=not(is_signed(left.resultdef)) or + not(is_signed(right.resultdef)); + opsize:=def_cgsize(left.resultdef); + + pass_left_right; + + left_must_be_reg(opsize,false); + emit_generic_code(A_CMP,opsize,unsigned,false,false); + location_freetemp(current_asmdata.CurrAsmList,right.location); + location_freetemp(current_asmdata.CurrAsmList,left.location); + + location_reset(location,LOC_FLAGS,OS_NO); + location.resflags:=getresflags(unsigned); + end; + +begin + caddnode:=tx86addnode; +end. |