summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorflorian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>2020-01-12 09:20:00 +0000
committerflorian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>2020-01-12 09:20:00 +0000
commitada96d677013f4b6ca030454ccc9631d88aaa187 (patch)
tree245e2177338c342d51151414cc858078c5d00404
parent2a69f5b1e7d060b0ed5658f995cd1b15c8ce54bc (diff)
downloadfpc-ada96d677013f4b6ca030454ccc9631d88aaa187.tar.gz
* patch by J. Gareth Moreton: EAX -> EDX:EAX sign extension shortcuts, and MOVSX shortcuts for AX register, part 1 of #36551
git-svn-id: https://svn.freepascal.org/svn/fpc/trunk@43917 3ad0048d-3df7-0310-abae-a5850022a9f2
-rw-r--r--compiler/x86/aoptx86.pas292
1 files changed, 289 insertions, 3 deletions
diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas
index 465d7b5bd0..73949b044a 100644
--- a/compiler/x86/aoptx86.pas
+++ b/compiler/x86/aoptx86.pas
@@ -3363,9 +3363,10 @@ unit aoptx86;
end;
var
- hp1,hp2: tai;
-{$ifdef x86_64}
- hp3: tai;
+ hp1,hp2,hp3: tai;
+{$ifndef x86_64}
+ hp4: tai;
+ OperIdx: Integer;
{$endif x86_64}
begin
Result:=false;
@@ -3510,6 +3511,291 @@ unit aoptx86;
Result:=true;
exit;
end
+ else if MatchOpType(taicpu(p),top_reg,top_reg) and
+ MatchInstruction(hp1, A_SAR, []) then
+ begin
+ if MatchOperand(taicpu(hp1).oper[0]^, 31) then
+ begin
+ { the use of %edx also covers the opsize being S_L }
+ if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
+ begin
+ { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
+ if (taicpu(p).oper[0]^.reg = NR_EAX) and
+ (taicpu(p).oper[1]^.reg = NR_EDX) then
+ begin
+ { Change:
+ movl %eax,%edx
+ sarl $31,%edx
+ To:
+ cltd
+ }
+ DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
+ Asml.Remove(hp1);
+ hp1.Free;
+ taicpu(p).opcode := A_CDQ;
+ taicpu(p).opsize := S_NO;
+ taicpu(p).clearop(1);
+ taicpu(p).clearop(0);
+ taicpu(p).ops:=0;
+ Result := True;
+ end
+ else if (cs_opt_size in current_settings.optimizerswitches) and
+ (taicpu(p).oper[0]^.reg = NR_EDX) and
+ (taicpu(p).oper[1]^.reg = NR_EAX) then
+ begin
+ { Change:
+ movl %edx,%eax
+ sarl $31,%edx
+ To:
+ movl %edx,%eax
+ cltd
+
+ Note that this creates a dependency between the two instructions,
+ so only perform if optimising for size.
+ }
+ DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
+ taicpu(hp1).opcode := A_CDQ;
+ taicpu(hp1).opsize := S_NO;
+ taicpu(hp1).clearop(1);
+ taicpu(hp1).clearop(0);
+ taicpu(hp1).ops:=0;
+ end;
+{$ifndef x86_64}
+ end
+ { Don't bother if CMOV is supported, because a more optimal
+ sequence would have been generated for the Abs() intrinsic }
+ else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
+ { the use of %eax also covers the opsize being S_L }
+ MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
+ (taicpu(p).oper[0]^.reg = NR_EAX) and
+ (taicpu(p).oper[1]^.reg = NR_EDX) and
+ GetNextInstruction(hp1, hp2) and
+ MatchInstruction(hp2, A_XOR, [S_L]) and
+ MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
+ MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
+
+ GetNextInstruction(hp2, hp3) and
+ MatchInstruction(hp3, A_SUB, [S_L]) and
+ MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
+ MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
+ begin
+ { Change:
+ movl %eax,%edx
+ sarl $31,%eax
+ xorl %eax,%edx
+ subl %eax,%edx
+ (Instruction that uses %edx)
+ (%eax deallocated)
+ (%edx deallocated)
+ To:
+ cltd
+ xorl %edx,%eax <-- Note the registers have swapped
+ subl %edx,%eax
+ (Instruction that uses %eax) <-- %eax rather than %edx
+ }
+
+ TransferUsedRegs(TmpUsedRegs);
+ UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
+ UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
+ UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
+
+ if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
+ begin
+ if GetNextInstruction(hp3, hp4) and
+ not RegModifiedByInstruction(NR_EDX, hp4) and
+ not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
+ begin
+ DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
+
+ taicpu(p).opcode := A_CDQ;
+ taicpu(p).clearop(1);
+ taicpu(p).clearop(0);
+ taicpu(p).ops:=0;
+
+ AsmL.Remove(hp1);
+ hp1.Free;
+
+ taicpu(hp2).loadreg(0, NR_EDX);
+ taicpu(hp2).loadreg(1, NR_EAX);
+
+ taicpu(hp3).loadreg(0, NR_EDX);
+ taicpu(hp3).loadreg(1, NR_EAX);
+
+ AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
+ { Convert references in the following instruction (hp4) from %edx to %eax }
+ for OperIdx := 0 to taicpu(hp4).ops - 1 do
+ with taicpu(hp4).oper[OperIdx]^ do
+ case typ of
+ top_reg:
+ if reg = NR_EDX then
+ reg := NR_EAX;
+ top_ref:
+ begin
+ if ref^.base = NR_EDX then
+ ref^.base := NR_EAX;
+ if ref^.index = NR_EDX then
+ ref^.index := NR_EAX;
+ end;
+ else
+ ;
+ end;
+ end;
+ end;
+{$else x86_64}
+ end;
+ end
+ else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
+ { the use of %rdx also covers the opsize being S_Q }
+ MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
+ begin
+ { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
+ if (taicpu(p).oper[0]^.reg = NR_RAX) and
+ (taicpu(p).oper[1]^.reg = NR_RDX) then
+ begin
+ { Change:
+ movq %rax,%rdx
+ sarq $63,%rdx
+ To:
+ cqto
+ }
+ DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
+ Asml.Remove(hp1);
+ hp1.Free;
+ taicpu(p).opcode := A_CQO;
+ taicpu(p).opsize := S_NO;
+ taicpu(p).clearop(1);
+ taicpu(p).clearop(0);
+ taicpu(p).ops:=0;
+ Result := True;
+ end
+ else if (cs_opt_size in current_settings.optimizerswitches) and
+ (taicpu(p).oper[0]^.reg = NR_RDX) and
+ (taicpu(p).oper[1]^.reg = NR_RAX) then
+ begin
+ { Change:
+ movq %rdx,%rax
+ sarq $63,%rdx
+ To:
+ movq %rdx,%rax
+ cqto
+
+ Note that this creates a dependency between the two instructions,
+ so only perform if optimising for size.
+ }
+ DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
+ taicpu(hp1).opcode := A_CQO;
+ taicpu(hp1).opsize := S_NO;
+ taicpu(hp1).clearop(1);
+ taicpu(hp1).clearop(0);
+ taicpu(hp1).ops:=0;
+{$endif x86_64}
+ end;
+ end;
+ end
+ else if MatchInstruction(hp1, A_MOV, []) and
+ (taicpu(hp1).oper[1]^.typ = top_reg) then
+ { Though "GetNextInstruction" could be factored out, along with
+ the instructions that depend on hp2, it is an expensive call that
+ should be delayed for as long as possible, hence we do cheaper
+ checks first that are likely to be False. [Kit] }
+ begin
+
+ if MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
+ (
+ (
+ (taicpu(hp1).oper[1]^.reg = NR_EAX) and
+ (
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+ MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
+ )
+ ) or
+ (
+ (taicpu(hp1).oper[1]^.reg = NR_EDX) and
+ (
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+ MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
+ )
+ )
+ ) and
+ GetNextInstruction(hp1, hp2) and
+ MatchInstruction(hp2, A_SAR, []) and
+ MatchOperand(taicpu(hp2).oper[0]^, 31) then
+ begin
+ if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
+ begin
+ { Change:
+ movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax
+ movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx
+ sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx
+ To:
+ movl r/m,%eax <- Note the change in register
+ cltd
+ }
+ DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
+
+ AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
+ taicpu(p).loadreg(1, NR_EAX);
+
+ taicpu(hp1).opcode := A_CDQ;
+ taicpu(hp1).clearop(1);
+ taicpu(hp1).clearop(0);
+ taicpu(hp1).ops:=0;
+
+ AsmL.Remove(hp2);
+ hp2.Free;
+(*
+{$ifdef x86_64}
+ end
+ else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
+ { This code sequence does not get generated - however it might become useful
+ if and when 128-bit signed integer types make an appearance, so the code
+ is kept here for when it is eventually needed. [Kit] }
+ (
+ (
+ (taicpu(hp1).oper[1]^.reg = NR_RAX) and
+ (
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+ MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
+ )
+ ) or
+ (
+ (taicpu(hp1).oper[1]^.reg = NR_RDX) and
+ (
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+ MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
+ )
+ )
+ ) and
+ GetNextInstruction(hp1, hp2) and
+ MatchInstruction(hp2, A_SAR, [S_Q]) and
+ MatchOperand(taicpu(hp2).oper[0]^, 63) and
+ MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
+ begin
+ { Change:
+ movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax
+ movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx
+ sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx
+ To:
+ movq r/m,%rax <- Note the change in register
+ cqto
+ }
+ DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
+
+ AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
+ taicpu(p).loadreg(1, NR_RAX);
+
+ taicpu(hp1).opcode := A_CQO;
+ taicpu(hp1).clearop(1);
+ taicpu(hp1).clearop(0);
+ taicpu(hp1).ops:=0;
+
+ AsmL.Remove(hp2);
+ hp2.Free;
+{$endif x86_64}
+*)
+ end;
+ end;
+ end
else if (taicpu(p).oper[0]^.typ = top_ref) and
(hp1.typ = ait_instruction) and
{ while the GetNextInstruction(hp1,hp2) call could be factored out,