diff options
Diffstat (limited to 'compiler/mips')
-rw-r--r-- | compiler/mips/aasmcpu.pas | 21 | ||||
-rw-r--r-- | compiler/mips/aoptcpu.pas | 316 | ||||
-rw-r--r-- | compiler/mips/cgcpu.pas | 131 | ||||
-rw-r--r-- | compiler/mips/cpuelf.pas | 12 | ||||
-rw-r--r-- | compiler/mips/cpugas.pas | 2 | ||||
-rw-r--r-- | compiler/mips/cpuinfo.pas | 53 | ||||
-rw-r--r-- | compiler/mips/ncpuadd.pas | 120 | ||||
-rw-r--r-- | compiler/mips/ncpucnv.pas | 5 | ||||
-rw-r--r-- | compiler/mips/rgcpu.pas | 42 |
9 files changed, 558 insertions, 144 deletions
diff --git a/compiler/mips/aasmcpu.pas b/compiler/mips/aasmcpu.pas index 6a6ac39edf..08a63e7bc9 100644 --- a/compiler/mips/aasmcpu.pas +++ b/compiler/mips/aasmcpu.pas @@ -33,9 +33,9 @@ uses const { "mov reg,reg" source operand number } - O_MOV_SOURCE = 0; + O_MOV_SOURCE = 1; { "mov reg,reg" source operand number } - O_MOV_DEST = 1; + O_MOV_DEST = 0; type { taicpu } @@ -401,13 +401,22 @@ end; A_SNE, A_EXT, A_INS, - A_MFC0]; + A_MFC0, + A_SEB, + A_SEH]; begin result := operand_read; - if opcode in op_write_set then - if opnr = 0 then - result := operand_write; + case opcode of + A_DIV, { these have 3 operands if used as macros } + A_DIVU: + if (ops=3) and (opnr=0) then + result:=operand_write; + else + if opcode in op_write_set then + if opnr = 0 then + result := operand_write; + end; end; diff --git a/compiler/mips/aoptcpu.pas b/compiler/mips/aoptcpu.pas index 8ae44180c2..451d0e040d 100644 --- a/compiler/mips/aoptcpu.pas +++ b/compiler/mips/aoptcpu.pas @@ -28,15 +28,18 @@ unit aoptcpu; Interface uses - cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai; + cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu; Type + TAsmOpSet = set of TAsmOp; + TCpuAsmOptimizer = class(TAsmOptimizer) + function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override; function GetNextInstructionUsingReg(Current: tai; var Next: tai; reg: TRegister): Boolean; - function RegUsedAfterInstruction(reg: Tregister; p: tai; - var AllUsedRegs: TAllUsedRegs): Boolean; function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; + function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean; + function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean; function PeepHoleOptPass1Cpu(var p: tai): boolean; override; procedure PeepHoleOptPass2; override; End; @@ -44,7 +47,7 @@ unit aoptcpu; Implementation uses - globals,aasmbase,aasmcpu,cpuinfo,verbose; + cutils,globtype,globals,aasmbase,cpuinfo,verbose; function MatchInstruction(const instr: tai; const op: TAsmOp): boolean; @@ -93,10 +96,15 @@ unit aoptcpu; end; - function CanBeCMOV(p: tai): boolean; + function CanBeCMOV(p: tai; condreg: tregister): boolean; begin result:=assigned(p) and (p.typ=ait_instruction) and - (taicpu(p).opcode in [A_MOV_D,A_MOV_S,A_MOVE]); + ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or + ( + { register with condition must not be overwritten } + (taicpu(p).opcode=A_MOVE) and + (taicpu(p).oper[0]^.reg<>condreg) + )); end; @@ -170,6 +178,20 @@ unit aoptcpu; end; + function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; + var + i : Longint; + begin + result:=false; + for i:=0 to taicpu(p1).ops-1 do + if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then + begin + result:=true; + exit; + end; + end; + + function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; var Next: tai; reg: TRegister): Boolean; begin @@ -178,7 +200,7 @@ unit aoptcpu; Result:=GetNextInstruction(Next,Next); until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or (is_calljmp(taicpu(Next).opcode)); - if Result and is_calljmp(taicpu(next).opcode) then + if Result and (next.typ=ait_instruction) and is_calljmp(taicpu(next).opcode) then begin result:=false; next:=nil; @@ -186,21 +208,6 @@ unit aoptcpu; end; - function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai; - var AllUsedRegs: TAllUsedRegs): Boolean; - begin - AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true); - RegUsedAfterInstruction := - AllUsedRegs[getregtype(reg)].IsUsed(reg) and - not(regLoadedWithNewValue(reg,p)) and - ( - not(GetNextInstruction(p,p)) or - instructionLoadsFromReg(reg,p) or - not(regLoadedWithNewValue(reg,p)) - ); - end; - - function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; var next,hp1: tai; @@ -215,7 +222,7 @@ unit aoptcpu; opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc. } result:=false; - if (taicpu(p).ops>1) and + if (taicpu(p).ops>0) and GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and MatchInstruction(next,opcode) and MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and @@ -256,21 +263,96 @@ unit aoptcpu; taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg); asml.remove(next); next.free; + result:=true; + end + else // no dealloc found + begin + { try to optimize the typical call sequence + lw $reg, (whatever) + <alloc volatile registers> + move $t9,$reg + jalr $t9 + Do not do so if the used register might contain a + register variable. } + if (opcode=A_MOVE) and + not(cs_opt_regvar in current_settings.optimizerswitches) and + (taicpu(next).oper[0]^.reg=NR_R25) and + GetNextInstruction(next,hp1) and + MatchInstruction(hp1,A_JALR) and + MatchOperand(taicpu(hp1).oper[0]^,NR_R25) then + begin + taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg); + asml.remove(next); + next.free; + result:=true; + end; end; end; end; + function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean; + begin + result:=(next.opcode in storeops) and + MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and + { Ry cannot be modified between move and store } + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))); + if result then + begin + next.loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end; + + + function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean; + begin + result:=(next.oper[1]^.typ=top_ref) and + (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))); + if result then + begin + next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; + asml.remove(p); + p.free; + p:=next; + end; + end; + + function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; var next,next2: tai; - TmpUsedRegs: TAllUsedRegs; begin result:=false; case p.typ of ait_instruction: begin case taicpu(p).opcode of + A_SEH: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + MatchInstruction(next,A_SH) and + MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and + (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else + TryRemoveMov(p,A_MOVE); + end; + A_SEB: + { TODO: can be handled similar to A_SEH, but it's almost never encountered } + TryRemoveMov(p,A_MOVE); + A_SLL: begin { if this is a sign extension... } @@ -289,10 +371,7 @@ unit aoptcpu; { the initial register may not be reused } (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then begin - CopyUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - UpdateUsedRegs(TmpUsedRegs, tai(next.next)); - if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next2,TmpUsedRegs) then + if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then begin taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg); asml.remove(p); @@ -301,7 +380,6 @@ unit aoptcpu; next.free; p:=next2; end; - ReleaseUsedRegs(TmpUsedRegs); end else TryRemoveMov(p,A_MOVE); @@ -309,6 +387,7 @@ unit aoptcpu; A_SRL: begin + { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions } { Remove 'andi' in sequences srl Rx,Ry,16 andi Rx,Rx,65535 @@ -368,37 +447,184 @@ unit aoptcpu; ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and (taicpu(next).oper[0]^.typ=top_reg) and - (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) then + (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and + assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then begin - CopyUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next,TmpUsedRegs) then + taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else + TryRemoveMov(p,A_MOVE); + end; + + A_MOV_S: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) then + begin + if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then + { optimization successful }; + end; + end; + + A_MOV_D: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) then + begin + if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then + { optimization successful }; + end; + end; + + A_MOVE: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then + begin + { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) } + if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then + { optimization successful } + else if TryRemoveMovToRefIndex(p,taicpu(next)) then + { successful as well } + { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any + MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry } + else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU]) and + MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then begin - taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); - asml.remove(p); - p.free; - p:=next; + if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE, + providing further optimization possibilities } + else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end + { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any } + else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU]) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end + { MULT[U] must be handled separately due to different operand numbers } + else if (taicpu(next).opcode in [A_MULT,A_MULTU]) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end + else if TryRemoveMov(p,A_MOVE) then + begin + { Ended up with move between same register? Suicide then. } + if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then + begin + GetNextInstruction(p,next); + asml.remove(p); + p.free; + p:=next; + end; end; - ReleaseUsedRegs(TmpUsedRegs); + { TODO: MOVE Rx,Ry; Bcc Rx,Rz,label; dealloc Rx ==> Bcc Ry,Rz,label } + end; + end; + + A_ADDIU: + begin + { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry) + ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry) + ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry) + ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) } + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) and + (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and + (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and + (taicpu(next).oper[1]^.ref^.offset=0) and + (taicpu(next).oper[1]^.ref^.symbol=nil) and + ( + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or + ( + (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and + (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW]) + ) + ) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then + begin + case taicpu(p).oper[2]^.typ of + top_const: + taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val; + + top_ref: + taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^; + else + InternalError(2014100401); + end; + taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; + asml.remove(p); + p.free; + p:=next; + result:=true; end else - TryRemoveMov(p,A_MOVE); + result:=TryRemoveMov(p,A_MOVE); end; + A_LB,A_LBU,A_LH,A_LHU,A_LW, A_ADD,A_ADDU, - A_ADDI,A_ADDIU, + A_ADDI, A_SUB,A_SUBU, A_SRA,A_SRAV, A_SRLV, A_SLLV, + A_MFLO,A_MFHI, A_AND,A_OR,A_XOR,A_ORI,A_XORI: TryRemoveMov(p,A_MOVE); + A_LWC1, A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s, A_ABS_s, A_NEG_s, A_SQRT_s, A_CVT_s_w, A_CVT_s_l, A_CVT_s_d: TryRemoveMov(p,A_MOV_s); + A_LDC1, A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d, A_ABS_d, A_NEG_d, A_SQRT_d, A_CVT_d_w, A_CVT_d_l, A_CVT_d_s: @@ -452,7 +678,7 @@ unit aoptcpu; } l:=0; GetNextInstruction(p, hp1); - while CanBeCMOV(hp1) do // CanBeCMOV returns False for nil or labels + while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels begin inc(l); GetNextInstruction(hp1,hp1); @@ -470,7 +696,7 @@ unit aoptcpu; repeat ChangeToCMOV(taicpu(hp1),condition,condreg); GetNextInstruction(hp1,hp1); - until not CanBeCMOV(hp1); + until not CanBeCMOV(hp1,condreg); { wait with removing else GetNextInstruction could ignore the label if it was the only usage in the jump moved away } @@ -508,7 +734,7 @@ unit aoptcpu; l:=0; { skip hp1 to <several moves 2> } GetNextInstruction(hp1, hp1); - while CanBeCMOV(hp1) do + while CanBeCMOV(hp1,condreg) do begin inc(l); GetNextInstruction(hp1, hp1); @@ -524,7 +750,7 @@ unit aoptcpu; repeat ChangeToCMOV(taicpu(hp1),condition,condreg); GetNextInstruction(hp1,hp1); - until not CanBeCMOV(hp1); + until not CanBeCMOV(hp1,condreg); { hp2 is still at b yyy } GetNextInstruction(hp2,hp1); { hp2 is now at xxx: } @@ -534,7 +760,7 @@ unit aoptcpu; repeat ChangeToCMOV(taicpu(hp1),condition,condreg); GetNextInstruction(hp1,hp1); - until not CanBeCMOV(hp1); + until not CanBeCMOV(hp1,condreg); { remove bCC } tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs; RemoveDelaySlot(hp3); diff --git a/compiler/mips/cgcpu.pas b/compiler/mips/cgcpu.pas index cdc8d213b2..79e4a68975 100644 --- a/compiler/mips/cgcpu.pas +++ b/compiler/mips/cgcpu.pas @@ -197,16 +197,9 @@ begin { PIC global symbol } ref.symbol:=nil; - if (ref.offset=0) then - exit; - if (ref.offset>=simm16lo) and (ref.offset<=simm16hi-sizeof(pint)) then - begin - list.concat(taicpu.op_reg_reg_const(A_ADDIU,ref.base,ref.base,ref.offset)); - ref.offset:=0; - exit; - end; + exit; { fallthrough to the case of large offset } end; @@ -295,10 +288,6 @@ begin [RS_F0,RS_F2,RS_F4,RS_F6, RS_F8,RS_F10,RS_F12,RS_F14, RS_F16,RS_F18,RS_F20,RS_F22, RS_F24,RS_F26,RS_F28,RS_F30], first_fpu_imreg, []); - - { needs at least one element for rgobj not to crash } - rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE, - [RS_R0],first_mm_imreg,[]); end; @@ -307,7 +296,6 @@ procedure TCGMIPS.done_register_allocators; begin rg[R_INTREGISTER].Free; rg[R_FPUREGISTER].Free; - rg[R_MMREGISTER].Free; inherited done_register_allocators; end; @@ -441,7 +429,7 @@ end; procedure TCGMIPS.a_load_const_reg(list: tasmlist; size: TCGSize; a: tcgint; reg: TRegister); begin if (a = 0) then - list.concat(taicpu.op_reg_reg(A_MOVE, reg, NR_R0)) + a_load_reg_reg(list, OS_INT, OS_INT, NR_R0, reg) else if (a >= simm16lo) and (a <= simm16hi) then list.concat(taicpu.op_reg_reg_const(A_ADDIU, reg, NR_R0, a)) else if (a>=0) and (a <= 65535) then @@ -546,13 +534,23 @@ begin done:=false; OS_S8: begin - list.concat(taicpu.op_reg_reg_const(A_SLL, reg2, reg1, 24)); - list.concat(taicpu.op_reg_reg_const(A_SRA, reg2, reg2, 24)); + if (CPUMIPS_HAS_ISA32R2 in cpu_capabilities[current_settings.cputype]) then + list.concat(taicpu.op_reg_reg(A_SEB,reg2,reg1)) + else + begin + list.concat(taicpu.op_reg_reg_const(A_SLL, reg2, reg1, 24)); + list.concat(taicpu.op_reg_reg_const(A_SRA, reg2, reg2, 24)); + end; end; OS_S16: begin - list.concat(taicpu.op_reg_reg_const(A_SLL, reg2, reg1, 16)); - list.concat(taicpu.op_reg_reg_const(A_SRA, reg2, reg2, 16)); + if (CPUMIPS_HAS_ISA32R2 in cpu_capabilities[current_settings.cputype]) then + list.concat(taicpu.op_reg_reg(A_SEH,reg2,reg1)) + else + begin + list.concat(taicpu.op_reg_reg_const(A_SLL, reg2, reg1, 16)); + list.concat(taicpu.op_reg_reg_const(A_SRA, reg2, reg2, 16)); + end; end; else internalerror(2002090901); @@ -820,6 +818,7 @@ var hreg: TRegister; asmop: TAsmOp; begin + a:=aint(a); ovloc.loc := LOC_VOID; optimize_op_const(size,op,a); signed:=(size in [OS_S8,OS_S16,OS_S32]); @@ -935,23 +934,30 @@ begin end; OP_MUL,OP_IMUL: begin - list.concat(taicpu.op_reg_reg(TOpCg2AsmOp[op], src2, src1)); - list.concat(taicpu.op_reg(A_MFLO, dst)); - if setflags then + if (CPUMIPS_HAS_ISA32R2 in cpu_capabilities[current_settings.cputype]) and + (not setflags) then + { NOTE: MUL is actually mips32r1 instruction; on older cores it is handled as macro } + list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1)) + else begin - current_asmdata.getjumplabel(hl); - hreg:=GetIntRegister(list,OS_INT); - list.concat(taicpu.op_reg(A_MFHI,hreg)); - if (op=OP_IMUL) then + list.concat(taicpu.op_reg_reg(TOpCg2AsmOp[op], src2, src1)); + list.concat(taicpu.op_reg(A_MFLO, dst)); + if setflags then begin - hreg2:=GetIntRegister(list,OS_INT); - list.concat(taicpu.op_reg_reg_const(A_SRA,hreg2,dst,31)); - a_cmp_reg_reg_label(list,OS_INT,OC_EQ,hreg2,hreg,hl); - end - else - a_cmp_reg_reg_label(list,OS_INT,OC_EQ,hreg,NR_R0,hl); - list.concat(taicpu.op_const(A_BREAK,6)); - a_label(list,hl); + current_asmdata.getjumplabel(hl); + hreg:=GetIntRegister(list,OS_INT); + list.concat(taicpu.op_reg(A_MFHI,hreg)); + if (op=OP_IMUL) then + begin + hreg2:=GetIntRegister(list,OS_INT); + list.concat(taicpu.op_reg_reg_const(A_SRA,hreg2,dst,31)); + a_cmp_reg_reg_label(list,OS_INT,OC_EQ,hreg2,hreg,hl); + end + else + a_cmp_reg_reg_label(list,OS_INT,OC_EQ,hreg,NR_R0,hl); + list.concat(taicpu.op_const(A_BREAK,6)); + a_label(list,hl); + end; end; end; OP_AND,OP_OR,OP_XOR: @@ -1071,7 +1077,28 @@ end; procedure TCGMIPS.a_jmp_flags(list: tasmlist; const f: TResFlags; l: tasmlabel); + var + ai: taicpu; begin + case f.reg1 of + NR_FCC0..NR_FCC7: + begin + if (f.reg1=NR_FCC0) then + ai:=taicpu.op_sym(A_BC,l) + else + ai:=taicpu.op_reg_sym(A_BC,f.reg1,l); + list.concat(ai); + { delay slot } + list.concat(taicpu.op_none(A_NOP)); + case f.cond of + OC_NE: ai.SetCondition(C_COP1TRUE); + OC_EQ: ai.SetCondition(C_COP1FALSE); + else + InternalError(2014082901); + end; + exit; + end; + end; if f.use_const then a_cmp_const_reg_label(list,OS_INT,f.cond,f.value,f.reg1,l) else @@ -1083,7 +1110,33 @@ procedure TCGMIPS.g_flags2reg(list: tasmlist; size: tcgsize; const f: tresflags; var left,right: tregister; unsigned: boolean; + hl: tasmlabel; begin + case f.reg1 of + NR_FCC0..NR_FCC7: + begin + if (current_settings.cputype>=cpu_mips4) then + begin + a_load_const_reg(list,size,1,reg); + case f.cond of + OC_NE: list.concat(taicpu.op_reg_reg_reg(A_MOVF,reg,NR_R0,f.reg1)); + OC_EQ: list.concat(taicpu.op_reg_reg_reg(A_MOVT,reg,NR_R0,f.reg1)); + else + InternalError(2014082902); + end; + end + else + begin + { TODO: still possible to do branchless by extracting appropriate bit from FCSR? } + current_asmdata.getjumplabel(hl); + a_load_const_reg(list,size,1,reg); + a_jmp_flags(list,f,hl); + a_load_const_reg(list,size,0,reg); + a_label(list,hl); + end; + exit; + end; + end; if (f.cond in [OC_EQ,OC_NE]) then begin left:=reg; @@ -1212,7 +1265,6 @@ var largeoffs : boolean; begin list.concat(tai_directive.create(asd_ent,current_procinfo.procdef.mangledname)); - a_reg_alloc(list,NR_STACK_POINTER_REG); if nostackframe then begin @@ -1221,9 +1273,6 @@ begin exit; end; - if (pi_needs_stackframe in current_procinfo.flags) then - a_reg_alloc(list,NR_FRAME_POINTER_REG); - helplist:=TAsmList.Create; reference_reset(href,0); @@ -1236,7 +1285,7 @@ begin begin if reg in (rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall)) then begin - fmask:=fmask or (1 shl ord(reg)); + fmask:=fmask or (longword(1) shl ord(reg)); href.offset:=nextoffset; lastfpuoffset:=nextoffset; helplist.concat(taicpu.op_reg_ref(A_SWC1,newreg(R_FPUREGISTER,reg,R_SUBFS),href)); @@ -1266,7 +1315,7 @@ begin begin if reg in saveregs then begin - mask:=mask or (1 shl ord(reg)); + mask:=mask or (longword(1) shl ord(reg)); href.offset:=nextoffset; lastintoffset:=nextoffset; if (reg=RS_FRAME_POINTER_REG) then @@ -1282,8 +1331,8 @@ begin //list.concat(Taicpu.Op_reg_reg_const(A_ADDIU,NR_FRAME_POINTER_REG,NR_STACK_POINTER_REG,current_procinfo.para_stack_size)); list.concat(Taicpu.op_none(A_P_SET_NOMIPS16)); list.concat(Taicpu.op_reg_const_reg(A_P_FRAME,current_procinfo.framepointer,LocalSize,NR_R31)); - list.concat(Taicpu.op_const_const(A_P_MASK,mask,-(LocalSize-lastintoffset))); - list.concat(Taicpu.op_const_const(A_P_FMASK,Fmask,-(LocalSize-lastfpuoffset))); + list.concat(Taicpu.op_const_const(A_P_MASK,aint(mask),-(LocalSize-lastintoffset))); + list.concat(Taicpu.op_const_const(A_P_FMASK,aint(Fmask),-(LocalSize-lastfpuoffset))); list.concat(Taicpu.op_none(A_P_SET_NOREORDER)); if (cs_create_pic in current_settings.moduleswitches) and (pi_needs_got in current_procinfo.flags) then diff --git a/compiler/mips/cpuelf.pas b/compiler/mips/cpuelf.pas index b3d0dcf7ad..1c2de17e0c 100644 --- a/compiler/mips/cpuelf.pas +++ b/compiler/mips/cpuelf.pas @@ -207,6 +207,8 @@ implementation result:=R_MIPS_NONE; RELOC_ABSOLUTE: result:=R_MIPS_32; + RELOC_GOTOFF: {For case jumptables only } + result:=R_MIPS_GPREL32; else result:=0; InternalError(2012110602); @@ -739,15 +741,15 @@ implementation if (lowreloc.ftype=R_MIPS_LO16) then begin; found:=true; + objsec.Data.Seek(objreloc.DataOffset); + objsec.Data.Read(hipart,sizeof(hipart)); + objsec.Data.Seek(lowreloc.DataOffset); + objsec.Data.Read(lopart,sizeof(lopart)); break; end; end; if not found then InternalError(2013030102); - objsec.Data.Seek(objreloc.DataOffset); - objsec.Data.Read(hipart,sizeof(hipart)); - objsec.Data.Seek(lowreloc.DataOffset); - objsec.Data.Read(lopart,sizeof(lopart)); if (source_info.endian<>target_info.endian) then begin hipart:=swapendian(hipart); @@ -809,7 +811,7 @@ implementation else reltyp:=objreloc.ftype; - if ElfTarget.relocs_use_addend then + if (oso_rela_relocs in objsec.SecOptions) then address:=objreloc.orgsize else begin diff --git a/compiler/mips/cpugas.pas b/compiler/mips/cpugas.pas index f6397270a5..65a884a52b 100644 --- a/compiler/mips/cpugas.pas +++ b/compiler/mips/cpugas.pas @@ -225,7 +225,7 @@ unit cpugas; not (ai.condition in [C_EQ,C_NE,C_GTZ,C_GEZ,C_LTZ,C_LEZ,C_COP1TRUE,C_COP1FALSE])) {or (op=A_JAL)} or (op=A_REM) or (op=A_REMU) { DIV and DIVU are normally macros, but use $zero as first arg to generate a CPU instruction. } - or ((op=A_DIV) or (op=A_DIVU) and + or (((op=A_DIV) or (op=A_DIVU)) and ((ai.ops<>3) or (ai.oper[0]^.typ<>top_reg) or (ai.oper[0]^.reg<>NR_R0))) or (op=A_MULO) or (op=A_MULOU) { A_LI is only a macro if the immediate is not in thez 16-bit range } diff --git a/compiler/mips/cpuinfo.pas b/compiler/mips/cpuinfo.pas index 01accd5f85..91355b99b1 100644 --- a/compiler/mips/cpuinfo.pas +++ b/compiler/mips/cpuinfo.pas @@ -21,6 +21,9 @@ Interface Type bestreal = double; +{$if FPC_FULLVERSION>20700} + bestrealrec = TDoubleRec; +{$endif FPC_FULLVERSION>20700} ts32real = single; ts64real = double; ts80real = type double; @@ -87,7 +90,7 @@ Const fputypestr : array[tfputype] of string[9] = ('', 'SOFT', - 'FPU_MIPS2','FPU_MIPS3' + 'MIPS2','MIPS3' ); { abi strings as accepted by @@ -104,23 +107,49 @@ Const mips_abi : tabitype = abi_default; -{$ifdef MIPSEL} type - tcpuflags=(CPUMIPS_HAS_XXXX); //Todo: Does this need to be filled? + tcpuflags=( + CPUMIPS_HAS_CMOV, { conditional move instructions (mips4+) } + CPUMIPS_HAS_ISA32R2 { mips32r2 instructions (also on PIC32) } + ); const cpu_capabilities : array[tcputype] of set of tcpuflags = - ( { cpu_none } [], - { cpu_mips1 } [], - { cpu_mips2 } [], - { cpu_mips3 } [], - { cpu_mips4 } [], - { cpu_mips5 } [], - { cpu_mips32 } [], - { cpu_mips32r2 } [], - { cpu_pic32mx } [] + ( { cpu_none } [], + { cpu_mips1 } [], + { cpu_mips2 } [], + { cpu_mips3 } [], + { cpu_mips4 } [CPUMIPS_HAS_CMOV], + { cpu_mips5 } [CPUMIPS_HAS_CMOV], + { cpu_mips32 } [CPUMIPS_HAS_CMOV], + { cpu_mips32r2 } [CPUMIPS_HAS_CMOV,CPUMIPS_HAS_ISA32R2], + { cpu_pic32mx } [CPUMIPS_HAS_CMOV,CPUMIPS_HAS_ISA32R2] ); +{$ifndef MIPSEL} +type + tcontrollertype = + (ct_none + ); + + +Const + { Is there support for dealing with multiple microcontrollers available } + { for this platform? } + ControllerSupport = false; + + { We know that there are fields after sramsize + but we don't care about this warning } + {$PUSH} + {$WARN 3177 OFF} + embedded_controllers : array [tcontrollertype] of tcontrollerdatatype = + ( + (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0)); + {$POP} +{$ELSE MIPSEL} + { Is there support for dealing with multiple microcontrollers available } + { for this platform? } + ControllerSupport = true; type tcontrollertype = diff --git a/compiler/mips/ncpuadd.pas b/compiler/mips/ncpuadd.pas index e1c17336e4..f55c89f452 100644 --- a/compiler/mips/ncpuadd.pas +++ b/compiler/mips/ncpuadd.pas @@ -37,27 +37,30 @@ type procedure cmp64_lt(left_reg, right_reg: TRegister64;unsigned:boolean); procedure cmp64_le(left_reg, right_reg: TRegister64;unsigned:boolean); procedure second_generic_cmp32(unsigned: boolean); + procedure second_mul64bit; protected procedure second_addfloat; override; procedure second_cmpfloat; override; procedure second_cmpboolean; override; procedure second_cmpsmallset; override; + procedure second_add64bit; override; procedure second_cmp64bit; override; procedure second_cmpordinal; override; procedure second_addordinal; override; public - function pass_1: tnode; override; function use_generic_mul32to64: boolean; override; + function use_generic_mul64bit: boolean; override; end; implementation uses - systems, + systems, globtype, globals, cutils, verbose, paramgr, aasmtai, aasmcpu, aasmdata, defutil, + cpuinfo, {cgbase,} cgcpu, cgutils, cpupara, procinfo, @@ -91,6 +94,15 @@ begin end; +procedure tmipsaddnode.second_add64bit; +begin + if (nodetype=muln) then + second_mul64bit + else + inherited second_add64bit; +end; + + const cmpops: array[boolean] of TOpCmp = (OC_LT,OC_B); @@ -185,21 +197,6 @@ begin end; -function tmipsaddnode.pass_1 : tnode; - begin - result:=inherited pass_1; - - if not(assigned(result)) then - begin - if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) then - begin - if (left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef) then - expectloc:=LOC_JUMP; - end; - end; - end; - - procedure tmipsaddnode.second_addfloat; var op: TAsmOp; @@ -273,7 +270,7 @@ begin hlcg.location_force_fpureg(current_asmdata.CurrAsmList, left.location, left.resultdef, True); hlcg.location_force_fpureg(current_asmdata.CurrAsmList, right.location, right.resultdef, True); - location_reset(location, LOC_JUMP, OS_NO); + location_reset(location, LOC_FLAGS, OS_NO); op:=ops_cmpfloat[left.location.size=OS_F64,nodetype]; @@ -289,14 +286,11 @@ begin end; current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,lreg,rreg)); - ai:=taicpu.op_sym(A_BC,current_procinfo.CurrTrueLabel); + location.resflags.reg1:=NR_FCC0; if (nodetype=unequaln) then - ai.SetCondition(C_COP1FALSE) + location.resflags.cond:=OC_EQ else - ai.SetCondition(C_COP1TRUE); - current_asmdata.CurrAsmList.concat(ai); - current_asmdata.CurrAsmList.concat(TAiCpu.Op_none(A_NOP)); - cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel); + location.resflags.cond:=OC_NE; end; @@ -345,12 +339,90 @@ begin inherited second_addordinal; end; +procedure tmipsaddnode.second_mul64bit; +var + list: TAsmList; + hreg1,hreg2,tmpreg: TRegister; +begin + list:=current_asmdata.CurrAsmList; + pass_left_right; + location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); + hlcg.location_force_reg(list,left.location,left.resultdef,left.resultdef,true); + { calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) } + hreg1:=NR_NO; + hreg2:=NR_NO; + tmpreg:=NR_NO; + if (right.location.loc=LOC_CONSTANT) then + begin + { Omit zero terms, if any } + if hi(right.location.value64)<>0 then + begin + hreg2:=cg.getintregister(list,OS_INT); + tmpreg:=cg.getintregister(list,OS_INT); + cg.a_load_const_reg(list,OS_INT,longint(hi(right.location.value64)),tmpreg); + list.concat(taicpu.op_reg_reg_reg(A_MUL,hreg2,tmpreg,left.location.register64.reglo)); + end; + tmpreg:=NR_NO; + if lo(right.location.value64)<>0 then + begin + hreg1:=cg.getintregister(list,OS_INT); + tmpreg:=cg.getintregister(list,OS_INT); + cg.a_load_const_reg(list,OS_INT,longint(lo(right.location.value64)),tmpreg); + list.concat(taicpu.op_reg_reg_reg(A_MUL,hreg1,tmpreg,left.location.register64.reghi)); + end; + end + else + begin + hlcg.location_force_reg(list,right.location,right.resultdef,right.resultdef,true); + tmpreg:=right.location.register64.reglo; + hreg1:=cg.getintregister(list,OS_INT); + hreg2:=cg.getintregister(list,OS_INT); + list.concat(taicpu.op_reg_reg_reg(A_MUL,hreg1,right.location.register64.reglo,left.location.register64.reghi)); + list.concat(taicpu.op_reg_reg_reg(A_MUL,hreg2,right.location.register64.reghi,left.location.register64.reglo)); + end; + + { At this point, tmpreg is either lo(right) or NR_NO if lo(left)*lo(right) is zero } + if (tmpreg=NR_NO) then + begin + if (hreg2<>NR_NO) and (hreg1<>NR_NO) then + begin + location.register64.reghi:=cg.getintregister(list,OS_INT); + list.concat(taicpu.op_reg_reg_reg(A_ADDU,location.register64.reghi,hreg1,hreg2)); + end + else if (hreg2<>NR_NO) then + location.register64.reghi:=hreg2 + else if (hreg1<>NR_NO) then + location.register64.reghi:=hreg1 + else + InternalError(2014122701); + location.register64.reglo:=NR_R0; + end + else + begin + list.concat(taicpu.op_reg_reg(A_MULTU,left.location.register64.reglo,tmpreg)); + location.register64.reghi:=cg.getintregister(list,OS_INT); + location.register64.reglo:=cg.getintregister(list,OS_INT); + current_asmdata.CurrAsmList.Concat(taicpu.op_reg(A_MFLO,location.register64.reglo)); + current_asmdata.CurrAsmList.Concat(taicpu.op_reg(A_MFHI,location.register64.reghi)); + if (hreg2<>NR_NO) then + list.concat(taicpu.op_reg_reg_reg(A_ADDU,location.register64.reghi,location.register64.reghi,hreg2)); + if (hreg1<>NR_NO) then + list.concat(taicpu.op_reg_reg_reg(A_ADDU,location.register64.reghi,location.register64.reghi,hreg1)); + end; +end; function tmipsaddnode.use_generic_mul32to64: boolean; begin result:=false; end; +function tmipsaddnode.use_generic_mul64bit: boolean; +begin + result:=(cs_check_overflow in current_settings.localswitches) or + (not (CPUMIPS_HAS_ISA32R2 in cpu_capabilities[current_settings.cputype])); +end; + + begin caddnode := tmipsaddnode; end. diff --git a/compiler/mips/ncpucnv.pas b/compiler/mips/ncpucnv.pas index de219edec9..4101753442 100644 --- a/compiler/mips/ncpucnv.pas +++ b/compiler/mips/ncpucnv.pas @@ -156,12 +156,11 @@ begin current_asmdata.getdatalabel(l1); current_asmdata.getjumplabel(l2); reference_reset_symbol(href, l1, 0, sizeof(aint)); - hregister := cg.getintregister(current_asmdata.CurrAsmList, OS_32); - hlcg.a_load_loc_reg(current_asmdata.CurrAsmList, left.resultdef, u32inttype, left.location, hregister); + hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true); { Always load into 64-bit FPU register } loadsigned(s64real); - cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, OS_INT, OC_GTE, 0, hregister, l2); + cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, OS_INT, OC_GTE, 0, left.location.register, l2); case tfloatdef(resultdef).floattype of { converting dword to s64real first and cut off at the end avoids precision loss } diff --git a/compiler/mips/rgcpu.pas b/compiler/mips/rgcpu.pas index b022cd5c7c..2378045de0 100644 --- a/compiler/mips/rgcpu.pas +++ b/compiler/mips/rgcpu.pas @@ -131,22 +131,50 @@ implementation result:=false; { Replace 'move orgreg,src' with 'sw src,spilltemp' and 'move dst,orgreg' with 'lw dst,spilltemp' } - { TODO: A_MOV_S and A_MOV_D for float registers are also replaceable } - if (instr.opcode<>A_MOVE) or (abs(spilltemp.offset)>32767) then + + if (not (instr.opcode in [A_MOVE,A_MOV_S,A_MOV_D,A_MTC1])) or (abs(spilltemp.offset)>32767) then exit; if (instr.ops<>2) or (instr.oper[0]^.typ<>top_reg) or - (instr.oper[1]^.typ<>top_reg) or - (getregtype(instr.oper[0]^.reg)<>regtype) or - (getregtype(instr.oper[1]^.reg)<>regtype) then + (instr.oper[1]^.typ<>top_reg) then InternalError(2013061001); + if (getregtype(instr.oper[0]^.reg)<>regtype) or + (getregtype(instr.oper[1]^.reg)<>regtype) then + begin + if (instr.opcode=A_MTC1) then + begin + { TODO: MTC1 src,orgreg ==> SW src,0/4(spilltemp) (endian-dependent!!) } + if (regtype=R_FPUREGISTER) then + exit; + end + else + InternalError(2013061003); + end; if get_alias(getsupreg(instr.oper[1]^.reg))=orgreg then begin - instr.opcode:=A_LW; + case instr.opcode of + A_MOVE: instr.opcode:=A_LW; + A_MOV_S: instr.opcode:=A_LWC1; + A_MOV_D: instr.opcode:=A_LDC1; + else + InternalError(2013061004); + end; end else if get_alias(getsupreg(instr.oper[0]^.reg))=orgreg then begin - instr.opcode:=A_SW; + case instr.opcode of + A_MOVE: instr.opcode:=A_SW; + A_MOV_S: instr.opcode:=A_SWC1; + A_MOV_D: instr.opcode:=A_SDC1; + A_MTC1: + begin + if (getregtype(instr.oper[0]^.reg)<>R_INTREGISTER) then + InternalError(2013061006); + instr.opcode:=A_LWC1; + end + else + InternalError(2013061005); + end; instr.oper[0]^:=instr.oper[1]^; end else |