summaryrefslogtreecommitdiff
path: root/compiler/arm/aoptcpu.pas
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/arm/aoptcpu.pas')
-rw-r--r--compiler/arm/aoptcpu.pas102
1 files changed, 81 insertions, 21 deletions
diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas
index 229a63816f..2cca31ec52 100644
--- a/compiler/arm/aoptcpu.pas
+++ b/compiler/arm/aoptcpu.pas
@@ -30,7 +30,7 @@ Unit aoptcpu;
Interface
-uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
+uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
Type
TCpuAsmOptimizer = class(TAsmOptimizer)
@@ -49,7 +49,8 @@ Type
change in program flow.
If there is none, it returns false and
sets p1 to nil }
- Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
+ Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
+ Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
{ outputs a debug message into the assembler file }
procedure DebugMsg(const s: string; p: tai);
@@ -79,7 +80,7 @@ Implementation
cutils,verbose,globtype,globals,
systems,
cpuinfo,
- cgobj,cgutils,procinfo,
+ cgobj,procinfo,
aasmbase,aasmdata;
function CanBeCond(p : tai) : boolean;
@@ -317,15 +318,46 @@ Implementation
RegLoadedWithNewValue(reg,p);
end;
-
function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
- var Next: tai; reg: TRegister): Boolean;
+ Out Next: tai; reg: TRegister): Boolean;
begin
Next:=Current;
repeat
Result:=GetNextInstruction(Next,Next);
- until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
- (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
+ until not (Result) or
+ not(cs_opt_level3 in current_settings.optimizerswitches) or
+ (Next.typ<>ait_instruction) or
+ RegInInstruction(reg,Next) or
+ is_calljmp(taicpu(Next).opcode) or
+ RegModifiedByInstruction(NR_PC,Next);
+ end;
+
+ function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
+ Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
+ begin
+ Next:=Current;
+ repeat
+ Result:=GetNextInstruction(Next,Next);
+ if Result and
+ (Next.typ=ait_instruction) and
+ (taicpu(Next).opcode in [A_LDR, A_STR]) and
+ (
+ ((taicpu(Next).ops = 2) and
+ (taicpu(Next).oper[1]^.typ = top_ref) and
+ RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
+ ((taicpu(Next).ops = 3) and { LDRD/STRD }
+ (taicpu(Next).oper[2]^.typ = top_ref) and
+ RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
+ ) then
+ {We've found an instruction LDR or STR with the same reference}
+ exit;
+ until not(Result) or
+ (Next.typ<>ait_instruction) or
+ not(cs_opt_level3 in current_settings.optimizerswitches) or
+ is_calljmp(taicpu(Next).opcode) or
+ (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
+ RegModifiedByInstruction(NR_PC,Next);
+ Result:=false;
end;
{$ifdef DEBUG_AOPTCPU}
@@ -482,7 +514,8 @@ Implementation
hp1 : tai;
begin
Result:=false;
- if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
+ if (p.oper[1]^.typ = top_ref) and
+ (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
(p.oper[1]^.ref^.index=NR_NO) and
(p.oper[1]^.ref^.offset=0) and
GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
@@ -538,6 +571,7 @@ Implementation
TmpUsedRegs: TAllUsedRegs;
tempop: tasmop;
oldreg: tregister;
+ dealloc: tai_regalloc;
function IsPowerOf2(const value: DWord): boolean; inline;
begin
@@ -607,12 +641,17 @@ Implementation
str reg1,ref
mov reg2,reg1
}
- if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+ if (taicpu(p).oper[1]^.typ = top_ref) and
+ (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
(taicpu(p).oppostfix=PF_None) and
- GetNextInstruction(p,hp1) and
- MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
- RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
- (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
+ (taicpu(p).condition=C_None) and
+ GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
+ MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
+ (taicpu(hp1).oper[1]^.typ=top_ref) and
+ (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
+ not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
+ ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
+ ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
begin
if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
begin
@@ -633,7 +672,7 @@ Implementation
str reg1,ref
str reg2,ref
into
- strd reg1,ref
+ strd reg1,reg2,ref
}
else if (GenerateARMCode or GenerateThumb2Code) and
(CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
@@ -654,6 +693,9 @@ Implementation
begin
DebugMsg('Peephole StrStr2Strd done', p);
taicpu(p).oppostfix:=PF_D;
+ taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
+ taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+ taicpu(p).ops:=3;
asml.remove(hp1);
hp1.free;
result:=true;
@@ -667,7 +709,8 @@ Implementation
ldr reg2,ref
into ...
}
- if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+ if (taicpu(p).oper[1]^.typ = top_ref) and
+ (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
GetNextInstruction(p,hp1) and
{ ldrd is not allowed here }
MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
@@ -700,7 +743,7 @@ Implementation
end
{
...
- ldrd reg1,ref
+ ldrd reg1,reg1+1,ref
}
else if (GenerateARMCode or GenerateThumb2Code) and
(CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
@@ -718,6 +761,9 @@ Implementation
AlignedToQWord(taicpu(p).oper[1]^.ref^) then
begin
DebugMsg('Peephole LdrLdr2Ldrd done', p);
+ taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
+ taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+ taicpu(p).ops:=3;
taicpu(p).oppostfix:=PF_D;
asml.remove(hp1);
hp1.free;
@@ -1200,6 +1246,7 @@ Implementation
(taicpu(p).oppostfix = PF_NONE) and
GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
+ (taicpu(hp1).oper[1]^.typ = top_ref) and
{ We can change the base register only when the instruction uses AM_OFFSET }
((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
@@ -1222,6 +1269,13 @@ Implementation
if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
+ dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
+ if Assigned(dealloc) then
+ begin
+ asml.remove(dealloc);
+ asml.InsertAfter(dealloc,hp1);
+ end;
+
GetNextInstruction(p, hp1);
asml.remove(p);
p.free;
@@ -1583,13 +1637,14 @@ Implementation
and reg1,reg0,2^n-1
mov reg2,reg1, lsl imm1
=>
- mov reg2,reg1, lsl imm1
+ mov reg2,reg0, lsl imm1
if imm1>i
}
- else if i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm then
+ else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
+ not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
begin
DebugMsg('Peephole AndLsl2Lsl done', p);
- taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[0]^.reg;
+ taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
GetNextInstruction(p, hp1);
asml.Remove(p);
p.free;
@@ -1616,6 +1671,7 @@ Implementation
while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
{ we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
+ (taicpu(hp1).oper[1]^.typ = top_ref) and
(taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
{ don't optimize if the register is stored/overwritten }
(taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
@@ -2389,7 +2445,7 @@ Implementation
begin
result:=true;
- list:=TAsmList.create_without_marker;
+ list:=TAsmList.create;
p:=BlockStart;
while p<>BlockEnd Do
begin
@@ -2410,6 +2466,7 @@ Implementation
) or
{ try to prove that the memory accesses don't overlapp }
((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
+ (taicpu(p).oper[1]^.typ = top_ref) and
(taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
(taicpu(p).oppostfix=PF_None) and
(taicpu(hp1).oppostfix=PF_None) and
@@ -2435,7 +2492,10 @@ Implementation
{ first instruction might not change the register used as index }
((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
- ) then
+ ) and
+ { if we modify the basereg AND the first instruction used that reg, we can not schedule }
+ ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
+ not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
begin
hp3:=tai(p.Previous);
hp5:=tai(p.next);