summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlaksen <laksen@3ad0048d-3df7-0310-abae-a5850022a9f2>2018-09-20 20:27:58 +0000
committerlaksen <laksen@3ad0048d-3df7-0310-abae-a5850022a9f2>2018-09-20 20:27:58 +0000
commitbc1b01077062ffab38a84aae6b2a1ffcbbe1bf82 (patch)
tree2dd2138a69cf9c52e37b92e5ab92af047e1bef30
parentc041e65b3d2f03a695b7843aa8446ab1838595f9 (diff)
downloadfpc-bc1b01077062ffab38a84aae6b2a1ffcbbe1bf82.tar.gz
Add a number of optimizations.
Don't do CSE restructuring when has full evaluation enabled. git-svn-id: https://svn.freepascal.org/svn/fpc/branches/laksen@39781 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r--riscv_new/compiler/optcse.pas5
-rw-r--r--riscv_new/compiler/riscv64/aoptcpu.pas179
2 files changed, 183 insertions, 1 deletions
diff --git a/riscv_new/compiler/optcse.pas b/riscv_new/compiler/optcse.pas
index f04a65fd94..32db6b26bc 100644
--- a/riscv_new/compiler/optcse.pas
+++ b/riscv_new/compiler/optcse.pas
@@ -320,7 +320,8 @@ unit optcse;
{ either if fastmath is on }
((cs_opt_fastmath in current_settings.optimizerswitches) or
{ or for the logical operators, they cannot overflow }
- (n.nodetype in [andn,orn]) or
+ ((n.nodetype in [andn,orn]) and
+ (n.localswitches*[cs_full_boolean_eval]=[])) or
{ or for integers if range checking is off }
((is_integer(n.resultdef) and
(n.localswitches*[cs_check_range,cs_check_overflow]=[]) and
@@ -329,6 +330,8 @@ unit optcse;
(is_set(n.resultdef))
) then
while (n.nodetype=tbinarynode(n).left.nodetype) and
+ { don't swap elements with full boolean evaluation. this might not be safe }
+ (tbinarynode(n).left.localswitches*[cs_full_boolean_eval]=[]) and
{ the resulttypes of the operands we'll swap must be equal,
required in case of a 32x32->64 multiplication, then we
cannot swap out one of the 32 bit operands for a 64 bit one
diff --git a/riscv_new/compiler/riscv64/aoptcpu.pas b/riscv_new/compiler/riscv64/aoptcpu.pas
index 5deff9617e..2de9f6b8a6 100644
--- a/riscv_new/compiler/riscv64/aoptcpu.pas
+++ b/riscv_new/compiler/riscv64/aoptcpu.pas
@@ -186,6 +186,7 @@ implementation
DebugMsg('Peephole AddiMem2Mem performed', hp1);
GetNextInstruction(p,hp1);
+
AsmL.Remove(p);
p.Free;
p:=hp1;
@@ -193,6 +194,184 @@ implementation
result:=true;
end;
end;
+ A_SUB:
+ begin
+ {
+ Turn
+ sub x,y,z
+ bgeu X0,x,...
+ dealloc x
+ Into
+ bne y,x,...
+ }
+ if (taicpu(p).ops=3) and
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+ (hp1.typ=ait_instruction) and
+ (taicpu(hp1).opcode=A_Bxx) and
+ (taicpu(hp1).ops=3) and
+ (taicpu(hp1).oper[0]^.typ=top_reg) and
+ (taicpu(hp1).oper[0]^.reg=NR_X0) and
+ (taicpu(hp1).oper[1]^.typ=top_reg) and
+ (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+ (taicpu(hp1).condition=C_GEU) and
+ (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
+ (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+ begin
+ taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
+ taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
+ taicpu(hp1).condition:=C_EQ;
+
+ DebugMsg('Peephole SubBgeu2Bne performed', hp1);
+
+ GetNextInstruction(p,hp1);
+
+ asml.remove(p);
+ p.Free;
+
+ p:=hp1;
+
+ result:=true;
+ end;
+ end;
+ A_SLTU:
+ begin
+ {
+ Turn
+ sltu x,X0,y
+ beq/bne x, X0, ...
+ dealloc x
+ Into
+ bltu/geu X0, y, ...
+ }
+ if (taicpu(p).ops=3) and
+ (taicpu(p).oper[1]^.typ=top_reg) and
+ (taicpu(p).oper[1]^.reg=NR_X0) and
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+ (hp1.typ=ait_instruction) and
+ (taicpu(hp1).opcode=A_Bxx) and
+ (taicpu(hp1).ops=3) and
+ (taicpu(hp1).oper[0]^.typ=top_reg) and
+ (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
+ (taicpu(hp1).oper[1]^.typ=top_reg) and
+ (taicpu(hp1).oper[1]^.reg=NR_X0) and
+ (taicpu(hp1).condition in [C_NE,C_EQ]) and
+ (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+ begin
+ taicpu(hp1).loadreg(0,NR_X0);
+ taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
+
+ if taicpu(hp1).condition=C_NE then
+ taicpu(hp1).condition:=C_LTU
+ else
+ taicpu(hp1).condition:=C_GEU;
+
+ DebugMsg('Peephole SltuB2B performed', hp1);
+
+ if not GetLastInstruction(p,hp1) then
+ GetNextInstruction(p,hp1);
+
+ asml.remove(p);
+ p.Free;
+
+ p:=hp1;
+
+ result:=true;
+ end;
+ end;
+ A_SLTIU:
+ begin
+ {
+ Turn
+ sltiu x,y,1
+ beq/ne x,x0,...
+ dealloc x
+ Into
+ bne y,x0,...
+ }
+ if (taicpu(p).ops=3) and
+ (taicpu(p).oper[2]^.typ=top_const) and
+ (taicpu(p).oper[2]^.val=1) and
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+ (hp1.typ=ait_instruction) and
+ (taicpu(hp1).opcode=A_Bxx) and
+ (taicpu(hp1).ops=3) and
+ (taicpu(hp1).oper[0]^.typ=top_reg) and
+ (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
+ (taicpu(hp1).oper[1]^.typ=top_reg) and
+ (taicpu(hp1).oper[1]^.reg=NR_X0) and
+ (taicpu(hp1).condition in [C_NE,C_EQ]) and
+ (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+ begin
+ taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
+ taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition);
+
+ DebugMsg('Peephole Sltiu0B2B performed', hp1);
+
+ if not GetLastInstruction(p,hp1) then
+ GetNextInstruction(p,hp1);
+
+ asml.remove(p);
+ p.Free;
+
+ p:=hp1;
+
+ result:=true;
+ end;
+ end;
+ A_SLTI:
+ begin
+ {
+ Turn
+ slti x,y,0
+ beq/ne x,x0,...
+ dealloc x
+ Into
+ bne y,x0,...
+ }
+ if (taicpu(p).ops=3) and
+ (taicpu(p).oper[2]^.typ=top_const) and
+ (taicpu(p).oper[2]^.val=0) and
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+ (hp1.typ=ait_instruction) and
+ (taicpu(hp1).opcode=A_Bxx) and
+ (taicpu(hp1).ops=3) and
+ (taicpu(hp1).oper[0]^.typ=top_reg) and
+ (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
+ (taicpu(hp1).oper[1]^.typ=top_reg) and
+ (taicpu(hp1).oper[1]^.reg=NR_X0) and
+ (taicpu(hp1).condition in [C_NE,C_EQ]) and
+ (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+ begin
+ if taicpu(hp1).condition=C_NE then
+ begin
+ taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
+ taicpu(hp1).loadreg(1,NR_X0);
+ taicpu(hp1).condition:=C_LT;
+ end
+ else
+ begin
+ taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg);
+ taicpu(hp1).loadreg(1,NR_X0);
+ taicpu(hp1).condition:=C_GE;
+ end;
+
+ DebugMsg('Peephole Slti0B2B performed', hp1);
+
+ if not GetLastInstruction(p,hp1) then
+ GetNextInstruction(p,hp1);
+
+ asml.remove(p);
+ p.Free;
+
+ p:=hp1;
+
+ result:=true;
+ end;
+ end;
end;
end;
end;