diff options
author | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2020-01-12 09:20:00 +0000 |
---|---|---|
committer | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2020-01-12 09:20:00 +0000 |
commit | ada96d677013f4b6ca030454ccc9631d88aaa187 (patch) | |
tree | 245e2177338c342d51151414cc858078c5d00404 | |
parent | 2a69f5b1e7d060b0ed5658f995cd1b15c8ce54bc (diff) | |
download | fpc-ada96d677013f4b6ca030454ccc9631d88aaa187.tar.gz |
* patch by J. Gareth Moreton: EAX -> EDX:EAX sign extension shortcuts, and MOVSX shortcuts for AX register, part 1 of #36551
git-svn-id: https://svn.freepascal.org/svn/fpc/trunk@43917 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r-- | compiler/x86/aoptx86.pas | 292 |
1 files changed, 289 insertions, 3 deletions
diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index 465d7b5bd0..73949b044a 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -3363,9 +3363,10 @@ unit aoptx86; end; var - hp1,hp2: tai; -{$ifdef x86_64} - hp3: tai; + hp1,hp2,hp3: tai; +{$ifndef x86_64} + hp4: tai; + OperIdx: Integer; {$endif x86_64} begin Result:=false; @@ -3510,6 +3511,291 @@ unit aoptx86; Result:=true; exit; end + else if MatchOpType(taicpu(p),top_reg,top_reg) and + MatchInstruction(hp1, A_SAR, []) then + begin + if MatchOperand(taicpu(hp1).oper[0]^, 31) then + begin + { the use of %edx also covers the opsize being S_L } + if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then + begin + { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers } + if (taicpu(p).oper[0]^.reg = NR_EAX) and + (taicpu(p).oper[1]^.reg = NR_EDX) then + begin + { Change: + movl %eax,%edx + sarl $31,%edx + To: + cltd + } + DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p); + Asml.Remove(hp1); + hp1.Free; + taicpu(p).opcode := A_CDQ; + taicpu(p).opsize := S_NO; + taicpu(p).clearop(1); + taicpu(p).clearop(0); + taicpu(p).ops:=0; + Result := True; + end + else if (cs_opt_size in current_settings.optimizerswitches) and + (taicpu(p).oper[0]^.reg = NR_EDX) and + (taicpu(p).oper[1]^.reg = NR_EAX) then + begin + { Change: + movl %edx,%eax + sarl $31,%edx + To: + movl %edx,%eax + cltd + + Note that this creates a dependency between the two instructions, + so only perform if optimising for size. + } + DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p); + taicpu(hp1).opcode := A_CDQ; + taicpu(hp1).opsize := S_NO; + taicpu(hp1).clearop(1); + taicpu(hp1).clearop(0); + taicpu(hp1).ops:=0; + end; +{$ifndef x86_64} + end + { Don't bother if CMOV is supported, because a more optimal + sequence would have been generated for the Abs() intrinsic } + else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and + { the use of %eax also covers the opsize being S_L } + MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and + (taicpu(p).oper[0]^.reg = NR_EAX) and + (taicpu(p).oper[1]^.reg = NR_EDX) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2, A_XOR, [S_L]) and + MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and + MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and + + GetNextInstruction(hp2, hp3) and + MatchInstruction(hp3, A_SUB, [S_L]) and + MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and + MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then + begin + { Change: + movl %eax,%edx + sarl $31,%eax + xorl %eax,%edx + subl %eax,%edx + (Instruction that uses %edx) + (%eax deallocated) + (%edx deallocated) + To: + cltd + xorl %edx,%eax <-- Note the registers have swapped + subl %edx,%eax + (Instruction that uses %eax) <-- %eax rather than %edx + } + + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); + UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next)); + UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next)); + + if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then + begin + if GetNextInstruction(hp3, hp4) and + not RegModifiedByInstruction(NR_EDX, hp4) and + not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then + begin + DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p); + + taicpu(p).opcode := A_CDQ; + taicpu(p).clearop(1); + taicpu(p).clearop(0); + taicpu(p).ops:=0; + + AsmL.Remove(hp1); + hp1.Free; + + taicpu(hp2).loadreg(0, NR_EDX); + taicpu(hp2).loadreg(1, NR_EAX); + + taicpu(hp3).loadreg(0, NR_EDX); + taicpu(hp3).loadreg(1, NR_EAX); + + AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs); + { Convert references in the following instruction (hp4) from %edx to %eax } + for OperIdx := 0 to taicpu(hp4).ops - 1 do + with taicpu(hp4).oper[OperIdx]^ do + case typ of + top_reg: + if reg = NR_EDX then + reg := NR_EAX; + top_ref: + begin + if ref^.base = NR_EDX then + ref^.base := NR_EAX; + if ref^.index = NR_EDX then + ref^.index := NR_EAX; + end; + else + ; + end; + end; + end; +{$else x86_64} + end; + end + else if MatchOperand(taicpu(hp1).oper[0]^, 63) and + { the use of %rdx also covers the opsize being S_Q } + MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then + begin + { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers } + if (taicpu(p).oper[0]^.reg = NR_RAX) and + (taicpu(p).oper[1]^.reg = NR_RDX) then + begin + { Change: + movq %rax,%rdx + sarq $63,%rdx + To: + cqto + } + DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p); + Asml.Remove(hp1); + hp1.Free; + taicpu(p).opcode := A_CQO; + taicpu(p).opsize := S_NO; + taicpu(p).clearop(1); + taicpu(p).clearop(0); + taicpu(p).ops:=0; + Result := True; + end + else if (cs_opt_size in current_settings.optimizerswitches) and + (taicpu(p).oper[0]^.reg = NR_RDX) and + (taicpu(p).oper[1]^.reg = NR_RAX) then + begin + { Change: + movq %rdx,%rax + sarq $63,%rdx + To: + movq %rdx,%rax + cqto + + Note that this creates a dependency between the two instructions, + so only perform if optimising for size. + } + DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p); + taicpu(hp1).opcode := A_CQO; + taicpu(hp1).opsize := S_NO; + taicpu(hp1).clearop(1); + taicpu(hp1).clearop(0); + taicpu(hp1).ops:=0; +{$endif x86_64} + end; + end; + end + else if MatchInstruction(hp1, A_MOV, []) and + (taicpu(hp1).oper[1]^.typ = top_reg) then + { Though "GetNextInstruction" could be factored out, along with + the instructions that depend on hp2, it is an expensive call that + should be delayed for as long as possible, hence we do cheaper + checks first that are likely to be False. [Kit] } + begin + + if MatchOperand(taicpu(p).oper[1]^, NR_EDX) and + ( + ( + (taicpu(hp1).oper[1]^.reg = NR_EAX) and + ( + MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or + MatchOperand(taicpu(hp1).oper[0]^, NR_EDX) + ) + ) or + ( + (taicpu(hp1).oper[1]^.reg = NR_EDX) and + ( + MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or + MatchOperand(taicpu(hp1).oper[0]^, NR_EAX) + ) + ) + ) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2, A_SAR, []) and + MatchOperand(taicpu(hp2).oper[0]^, 31) then + begin + if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then + begin + { Change: + movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax + movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx + sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx + To: + movl r/m,%eax <- Note the change in register + cltd + } + DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p); + + AllocRegBetween(NR_EAX, p, hp1, UsedRegs); + taicpu(p).loadreg(1, NR_EAX); + + taicpu(hp1).opcode := A_CDQ; + taicpu(hp1).clearop(1); + taicpu(hp1).clearop(0); + taicpu(hp1).ops:=0; + + AsmL.Remove(hp2); + hp2.Free; +(* +{$ifdef x86_64} + end + else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and + { This code sequence does not get generated - however it might become useful + if and when 128-bit signed integer types make an appearance, so the code + is kept here for when it is eventually needed. [Kit] } + ( + ( + (taicpu(hp1).oper[1]^.reg = NR_RAX) and + ( + MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or + MatchOperand(taicpu(hp1).oper[0]^, NR_RDX) + ) + ) or + ( + (taicpu(hp1).oper[1]^.reg = NR_RDX) and + ( + MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or + MatchOperand(taicpu(hp1).oper[0]^, NR_RAX) + ) + ) + ) and + GetNextInstruction(hp1, hp2) and + MatchInstruction(hp2, A_SAR, [S_Q]) and + MatchOperand(taicpu(hp2).oper[0]^, 63) and + MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then + begin + { Change: + movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax + movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx + sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx + To: + movq r/m,%rax <- Note the change in register + cqto + } + DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p); + + AllocRegBetween(NR_RAX, p, hp1, UsedRegs); + taicpu(p).loadreg(1, NR_RAX); + + taicpu(hp1).opcode := A_CQO; + taicpu(hp1).clearop(1); + taicpu(hp1).clearop(0); + taicpu(hp1).ops:=0; + + AsmL.Remove(hp2); + hp2.Free; +{$endif x86_64} +*) + end; + end; + end else if (taicpu(p).oper[0]^.typ = top_ref) and (hp1.typ = ait_instruction) and { while the GetNextInstruction(hp1,hp2) call could be factored out, |