diff options
author | laksen <laksen@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2018-09-20 20:27:58 +0000 |
---|---|---|
committer | laksen <laksen@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2018-09-20 20:27:58 +0000 |
commit | bc1b01077062ffab38a84aae6b2a1ffcbbe1bf82 (patch) | |
tree | 2dd2138a69cf9c52e37b92e5ab92af047e1bef30 | |
parent | c041e65b3d2f03a695b7843aa8446ab1838595f9 (diff) | |
download | fpc-bc1b01077062ffab38a84aae6b2a1ffcbbe1bf82.tar.gz |
Add a number of optimizations.
Don't do CSE restructuring when has full evaluation enabled.
git-svn-id: https://svn.freepascal.org/svn/fpc/branches/laksen@39781 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r-- | riscv_new/compiler/optcse.pas | 5 | ||||
-rw-r--r-- | riscv_new/compiler/riscv64/aoptcpu.pas | 179 |
2 files changed, 183 insertions, 1 deletions
diff --git a/riscv_new/compiler/optcse.pas b/riscv_new/compiler/optcse.pas index f04a65fd94..32db6b26bc 100644 --- a/riscv_new/compiler/optcse.pas +++ b/riscv_new/compiler/optcse.pas @@ -320,7 +320,8 @@ unit optcse; { either if fastmath is on } ((cs_opt_fastmath in current_settings.optimizerswitches) or { or for the logical operators, they cannot overflow } - (n.nodetype in [andn,orn]) or + ((n.nodetype in [andn,orn]) and + (n.localswitches*[cs_full_boolean_eval]=[])) or { or for integers if range checking is off } ((is_integer(n.resultdef) and (n.localswitches*[cs_check_range,cs_check_overflow]=[]) and @@ -329,6 +330,8 @@ unit optcse; (is_set(n.resultdef)) ) then while (n.nodetype=tbinarynode(n).left.nodetype) and + { don't swap elements with full boolean evaluation. this might not be safe } + (tbinarynode(n).left.localswitches*[cs_full_boolean_eval]=[]) and { the resulttypes of the operands we'll swap must be equal, required in case of a 32x32->64 multiplication, then we cannot swap out one of the 32 bit operands for a 64 bit one diff --git a/riscv_new/compiler/riscv64/aoptcpu.pas b/riscv_new/compiler/riscv64/aoptcpu.pas index 5deff9617e..2de9f6b8a6 100644 --- a/riscv_new/compiler/riscv64/aoptcpu.pas +++ b/riscv_new/compiler/riscv64/aoptcpu.pas @@ -186,6 +186,7 @@ implementation DebugMsg('Peephole AddiMem2Mem performed', hp1); GetNextInstruction(p,hp1); + AsmL.Remove(p); p.Free; p:=hp1; @@ -193,6 +194,184 @@ implementation result:=true; end; end; + A_SUB: + begin + { + Turn + sub x,y,z + bgeu X0,x,... + dealloc x + Into + bne y,x,... + } + if (taicpu(p).ops=3) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + (hp1.typ=ait_instruction) and + (taicpu(hp1).opcode=A_Bxx) and + (taicpu(hp1).ops=3) and + (taicpu(hp1).oper[0]^.typ=top_reg) and + (taicpu(hp1).oper[0]^.reg=NR_X0) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and + (taicpu(hp1).condition=C_GEU) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and + (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg); + taicpu(hp1).condition:=C_EQ; + + DebugMsg('Peephole SubBgeu2Bne performed', hp1); + + GetNextInstruction(p,hp1); + + asml.remove(p); + p.Free; + + p:=hp1; + + result:=true; + end; + end; + A_SLTU: + begin + { + Turn + sltu x,X0,y + beq/bne x, X0, ... + dealloc x + Into + bltu/geu X0, y, ... + } + if (taicpu(p).ops=3) and + (taicpu(p).oper[1]^.typ=top_reg) and + (taicpu(p).oper[1]^.reg=NR_X0) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + (hp1.typ=ait_instruction) and + (taicpu(hp1).opcode=A_Bxx) and + (taicpu(hp1).ops=3) and + (taicpu(hp1).oper[0]^.typ=top_reg) and + (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + (taicpu(hp1).oper[1]^.reg=NR_X0) and + (taicpu(hp1).condition in [C_NE,C_EQ]) and + (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p,hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + taicpu(hp1).loadreg(0,NR_X0); + taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg); + + if taicpu(hp1).condition=C_NE then + taicpu(hp1).condition:=C_LTU + else + taicpu(hp1).condition:=C_GEU; + + DebugMsg('Peephole SltuB2B performed', hp1); + + if not GetLastInstruction(p,hp1) then + GetNextInstruction(p,hp1); + + asml.remove(p); + p.Free; + + p:=hp1; + + result:=true; + end; + end; + A_SLTIU: + begin + { + Turn + sltiu x,y,1 + beq/ne x,x0,... + dealloc x + Into + bne y,x0,... + } + if (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ=top_const) and + (taicpu(p).oper[2]^.val=1) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + (hp1.typ=ait_instruction) and + (taicpu(hp1).opcode=A_Bxx) and + (taicpu(hp1).ops=3) and + (taicpu(hp1).oper[0]^.typ=top_reg) and + (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + (taicpu(hp1).oper[1]^.reg=NR_X0) and + (taicpu(hp1).condition in [C_NE,C_EQ]) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); + taicpu(hp1).condition:=inverse_cond(taicpu(hp1).condition); + + DebugMsg('Peephole Sltiu0B2B performed', hp1); + + if not GetLastInstruction(p,hp1) then + GetNextInstruction(p,hp1); + + asml.remove(p); + p.Free; + + p:=hp1; + + result:=true; + end; + end; + A_SLTI: + begin + { + Turn + slti x,y,0 + beq/ne x,x0,... + dealloc x + Into + bne y,x0,... + } + if (taicpu(p).ops=3) and + (taicpu(p).oper[2]^.typ=top_const) and + (taicpu(p).oper[2]^.val=0) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + (hp1.typ=ait_instruction) and + (taicpu(hp1).opcode=A_Bxx) and + (taicpu(hp1).ops=3) and + (taicpu(hp1).oper[0]^.typ=top_reg) and + (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and + (taicpu(hp1).oper[1]^.typ=top_reg) and + (taicpu(hp1).oper[1]^.reg=NR_X0) and + (taicpu(hp1).condition in [C_NE,C_EQ]) and + (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p,hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + if taicpu(hp1).condition=C_NE then + begin + taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(1,NR_X0); + taicpu(hp1).condition:=C_LT; + end + else + begin + taicpu(hp1).loadreg(0,taicpu(p).oper[1]^.reg); + taicpu(hp1).loadreg(1,NR_X0); + taicpu(hp1).condition:=C_GE; + end; + + DebugMsg('Peephole Slti0B2B performed', hp1); + + if not GetLastInstruction(p,hp1) then + GetNextInstruction(p,hp1); + + asml.remove(p); + p.Free; + + p:=hp1; + + result:=true; + end; + end; end; end; end; |