diff options
Diffstat (limited to 'compiler/mips/aoptcpu.pas')
-rw-r--r-- | compiler/mips/aoptcpu.pas | 316 |
1 files changed, 271 insertions, 45 deletions
diff --git a/compiler/mips/aoptcpu.pas b/compiler/mips/aoptcpu.pas index 8ae44180c2..451d0e040d 100644 --- a/compiler/mips/aoptcpu.pas +++ b/compiler/mips/aoptcpu.pas @@ -28,15 +28,18 @@ unit aoptcpu; Interface uses - cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai; + cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu; Type + TAsmOpSet = set of TAsmOp; + TCpuAsmOptimizer = class(TAsmOptimizer) + function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override; function GetNextInstructionUsingReg(Current: tai; var Next: tai; reg: TRegister): Boolean; - function RegUsedAfterInstruction(reg: Tregister; p: tai; - var AllUsedRegs: TAllUsedRegs): Boolean; function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; + function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean; + function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean; function PeepHoleOptPass1Cpu(var p: tai): boolean; override; procedure PeepHoleOptPass2; override; End; @@ -44,7 +47,7 @@ unit aoptcpu; Implementation uses - globals,aasmbase,aasmcpu,cpuinfo,verbose; + cutils,globtype,globals,aasmbase,cpuinfo,verbose; function MatchInstruction(const instr: tai; const op: TAsmOp): boolean; @@ -93,10 +96,15 @@ unit aoptcpu; end; - function CanBeCMOV(p: tai): boolean; + function CanBeCMOV(p: tai; condreg: tregister): boolean; begin result:=assigned(p) and (p.typ=ait_instruction) and - (taicpu(p).opcode in [A_MOV_D,A_MOV_S,A_MOVE]); + ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or + ( + { register with condition must not be overwritten } + (taicpu(p).opcode=A_MOVE) and + (taicpu(p).oper[0]^.reg<>condreg) + )); end; @@ -170,6 +178,20 @@ unit aoptcpu; end; + function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; + var + i : Longint; + begin + result:=false; + for i:=0 to taicpu(p1).ops-1 do + if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then + begin + result:=true; + exit; + end; + end; + + function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; var Next: tai; reg: TRegister): Boolean; begin @@ -178,7 +200,7 @@ unit aoptcpu; Result:=GetNextInstruction(Next,Next); until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or (is_calljmp(taicpu(Next).opcode)); - if Result and is_calljmp(taicpu(next).opcode) then + if Result and (next.typ=ait_instruction) and is_calljmp(taicpu(next).opcode) then begin result:=false; next:=nil; @@ -186,21 +208,6 @@ unit aoptcpu; end; - function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai; - var AllUsedRegs: TAllUsedRegs): Boolean; - begin - AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true); - RegUsedAfterInstruction := - AllUsedRegs[getregtype(reg)].IsUsed(reg) and - not(regLoadedWithNewValue(reg,p)) and - ( - not(GetNextInstruction(p,p)) or - instructionLoadsFromReg(reg,p) or - not(regLoadedWithNewValue(reg,p)) - ); - end; - - function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; var next,hp1: tai; @@ -215,7 +222,7 @@ unit aoptcpu; opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc. } result:=false; - if (taicpu(p).ops>1) and + if (taicpu(p).ops>0) and GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and MatchInstruction(next,opcode) and MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and @@ -256,21 +263,96 @@ unit aoptcpu; taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg); asml.remove(next); next.free; + result:=true; + end + else // no dealloc found + begin + { try to optimize the typical call sequence + lw $reg, (whatever) + <alloc volatile registers> + move $t9,$reg + jalr $t9 + Do not do so if the used register might contain a + register variable. } + if (opcode=A_MOVE) and + not(cs_opt_regvar in current_settings.optimizerswitches) and + (taicpu(next).oper[0]^.reg=NR_R25) and + GetNextInstruction(next,hp1) and + MatchInstruction(hp1,A_JALR) and + MatchOperand(taicpu(hp1).oper[0]^,NR_R25) then + begin + taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg); + asml.remove(next); + next.free; + result:=true; + end; end; end; end; + function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean; + begin + result:=(next.opcode in storeops) and + MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and + { Ry cannot be modified between move and store } + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))); + if result then + begin + next.loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end; + + + function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean; + begin + result:=(next.oper[1]^.typ=top_ref) and + (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))); + if result then + begin + next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; + asml.remove(p); + p.free; + p:=next; + end; + end; + + function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; var next,next2: tai; - TmpUsedRegs: TAllUsedRegs; begin result:=false; case p.typ of ait_instruction: begin case taicpu(p).opcode of + A_SEH: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + MatchInstruction(next,A_SH) and + MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and + (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else + TryRemoveMov(p,A_MOVE); + end; + A_SEB: + { TODO: can be handled similar to A_SEH, but it's almost never encountered } + TryRemoveMov(p,A_MOVE); + A_SLL: begin { if this is a sign extension... } @@ -289,10 +371,7 @@ unit aoptcpu; { the initial register may not be reused } (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then begin - CopyUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - UpdateUsedRegs(TmpUsedRegs, tai(next.next)); - if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next2,TmpUsedRegs) then + if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then begin taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg); asml.remove(p); @@ -301,7 +380,6 @@ unit aoptcpu; next.free; p:=next2; end; - ReleaseUsedRegs(TmpUsedRegs); end else TryRemoveMov(p,A_MOVE); @@ -309,6 +387,7 @@ unit aoptcpu; A_SRL: begin + { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions } { Remove 'andi' in sequences srl Rx,Ry,16 andi Rx,Rx,65535 @@ -368,37 +447,184 @@ unit aoptcpu; ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and (taicpu(next).oper[0]^.typ=top_reg) and - (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) then + (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and + assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then begin - CopyUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next,TmpUsedRegs) then + taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else + TryRemoveMov(p,A_MOVE); + end; + + A_MOV_S: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) then + begin + if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then + { optimization successful }; + end; + end; + + A_MOV_D: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) then + begin + if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then + { optimization successful }; + end; + end; + + A_MOVE: + begin + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then + begin + { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) } + if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then + { optimization successful } + else if TryRemoveMovToRefIndex(p,taicpu(next)) then + { successful as well } + { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any + MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry } + else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU]) and + MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then begin - taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); - asml.remove(p); - p.free; - p:=next; + if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE, + providing further optimization possibilities } + else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end + { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any } + else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU]) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end + { MULT[U] must be handled separately due to different operand numbers } + else if (taicpu(next).opcode in [A_MULT,A_MULTU]) and + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then + begin + if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end + else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then + begin + taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + end + else if TryRemoveMov(p,A_MOVE) then + begin + { Ended up with move between same register? Suicide then. } + if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then + begin + GetNextInstruction(p,next); + asml.remove(p); + p.free; + p:=next; + end; end; - ReleaseUsedRegs(TmpUsedRegs); + { TODO: MOVE Rx,Ry; Bcc Rx,Rz,label; dealloc Rx ==> Bcc Ry,Rz,label } + end; + end; + + A_ADDIU: + begin + { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry) + ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry) + ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry) + ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) } + if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and + (next.typ=ait_instruction) and + (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and + (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and + (taicpu(next).oper[1]^.ref^.offset=0) and + (taicpu(next).oper[1]^.ref^.symbol=nil) and + ( + Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or + ( + (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and + (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW]) + ) + ) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then + begin + case taicpu(p).oper[2]^.typ of + top_const: + taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val; + + top_ref: + taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^; + else + InternalError(2014100401); + end; + taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; + asml.remove(p); + p.free; + p:=next; + result:=true; end else - TryRemoveMov(p,A_MOVE); + result:=TryRemoveMov(p,A_MOVE); end; + A_LB,A_LBU,A_LH,A_LHU,A_LW, A_ADD,A_ADDU, - A_ADDI,A_ADDIU, + A_ADDI, A_SUB,A_SUBU, A_SRA,A_SRAV, A_SRLV, A_SLLV, + A_MFLO,A_MFHI, A_AND,A_OR,A_XOR,A_ORI,A_XORI: TryRemoveMov(p,A_MOVE); + A_LWC1, A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s, A_ABS_s, A_NEG_s, A_SQRT_s, A_CVT_s_w, A_CVT_s_l, A_CVT_s_d: TryRemoveMov(p,A_MOV_s); + A_LDC1, A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d, A_ABS_d, A_NEG_d, A_SQRT_d, A_CVT_d_w, A_CVT_d_l, A_CVT_d_s: @@ -452,7 +678,7 @@ unit aoptcpu; } l:=0; GetNextInstruction(p, hp1); - while CanBeCMOV(hp1) do // CanBeCMOV returns False for nil or labels + while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels begin inc(l); GetNextInstruction(hp1,hp1); @@ -470,7 +696,7 @@ unit aoptcpu; repeat ChangeToCMOV(taicpu(hp1),condition,condreg); GetNextInstruction(hp1,hp1); - until not CanBeCMOV(hp1); + until not CanBeCMOV(hp1,condreg); { wait with removing else GetNextInstruction could ignore the label if it was the only usage in the jump moved away } @@ -508,7 +734,7 @@ unit aoptcpu; l:=0; { skip hp1 to <several moves 2> } GetNextInstruction(hp1, hp1); - while CanBeCMOV(hp1) do + while CanBeCMOV(hp1,condreg) do begin inc(l); GetNextInstruction(hp1, hp1); @@ -524,7 +750,7 @@ unit aoptcpu; repeat ChangeToCMOV(taicpu(hp1),condition,condreg); GetNextInstruction(hp1,hp1); - until not CanBeCMOV(hp1); + until not CanBeCMOV(hp1,condreg); { hp2 is still at b yyy } GetNextInstruction(hp2,hp1); { hp2 is now at xxx: } @@ -534,7 +760,7 @@ unit aoptcpu; repeat ChangeToCMOV(taicpu(hp1),condition,condreg); GetNextInstruction(hp1,hp1); - until not CanBeCMOV(hp1); + until not CanBeCMOV(hp1,condreg); { remove bCC } tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs; RemoveDelaySlot(hp3); |