summaryrefslogtreecommitdiff
path: root/compiler/x86/aoptx86.pas
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/x86/aoptx86.pas')
-rw-r--r--compiler/x86/aoptx86.pas88
1 files changed, 79 insertions, 9 deletions
diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas
index 646f27d526..c68280130e 100644
--- a/compiler/x86/aoptx86.pas
+++ b/compiler/x86/aoptx86.pas
@@ -3076,6 +3076,55 @@ unit aoptx86;
Result:=true;
exit;
end;
+
+{$ifdef x86_64}
+ { Convert:
+ movq x(ref),%reg64
+ shrq y,%reg64
+ To:
+ movq x+4(ref),%reg32
+ shrq y-32,%reg32 (Remove if y = 32)
+ }
+ if (taicpu(p).opsize = S_Q) and
+ (taicpu(p).oper[0]^.typ = top_ref) and { Second operand will be a register }
+ (taicpu(p).oper[0]^.ref^.offset <= $7FFFFFFB) and
+ MatchInstruction(hp1, A_SHR, [taicpu(p).opsize]) and
+ MatchOpType(taicpu(hp1), top_const, top_reg) and
+ (taicpu(hp1).oper[0]^.val >= 32) and
+ (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
+ begin
+ RegName1 := debug_regname(taicpu(hp1).oper[1]^.reg);
+ PreMessage := 'movq ' + debug_operstr(taicpu(p).oper[0]^) + ',' + RegName1 + '; ' +
+ 'shrq $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + RegName1 + ' -> movl ';
+
+ { Convert to 32-bit }
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
+ taicpu(p).opsize := S_L;
+
+ Inc(taicpu(p).oper[0]^.ref^.offset, 4);
+
+ PreMessage := PreMessage + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg);
+ if (taicpu(hp1).oper[0]^.val = 32) then
+ begin
+ DebugMsg(SPeepholeOptimization + PreMessage + ' (MovShr2Mov)', p);
+ RemoveInstruction(hp1);
+ end
+ else
+ begin
+ { This will potentially open up more arithmetic operations since
+ the peephole optimizer now has a big hint that only the lower
+ 32 bits are currently in use (and opcodes are smaller in size) }
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
+ taicpu(hp1).opsize := S_L;
+
+ Dec(taicpu(hp1).oper[0]^.val, 32);
+ DebugMsg(SPeepholeOptimization + PreMessage +
+ '; shrl $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (MovShr2MovShr)', p);
+ end;
+ Result := True;
+ Exit;
+ end;
+{$endif x86_64}
end;
@@ -7076,19 +7125,26 @@ unit aoptx86;
movl/q %reg1,%reg2
To:
leal/q $-x(%reg1),%reg2
- subl/q $x,%reg1
+ subl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
Breaks the dependency chain and potentially permits the removal of
a CMP instruction if one follows.
}
Result := False;
- if not (cs_opt_size in current_settings.optimizerswitches) and
- (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
+ if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
MatchOpType(taicpu(p),top_const,top_reg) and
GetNextInstruction(p, hp1) and
MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
- MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
+ (
+ { Don't do SubMov2LeaSub under -Os, but do allow SubMov2Lea }
+ not (cs_opt_size in current_settings.optimizerswitches) or
+ (
+ not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
+ RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
+ )
+ ) then
begin
{ Change the MOV instruction to a LEA instruction, and update the
first operand }
@@ -7100,12 +7156,26 @@ unit aoptx86;
taicpu(hp1).opcode := A_LEA;
taicpu(hp1).loadref(0, NewRef);
- { Move what is now the LEA instruction to before the SUB instruction }
- Asml.Remove(hp1);
- Asml.InsertBefore(hp1, p);
- AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
+ TransferUsedRegs(TmpUsedRegs);
+ UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
+ if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
+ RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
+ begin
+ { Move what is now the LEA instruction to before the SUB instruction }
+ Asml.Remove(hp1);
+ Asml.InsertBefore(hp1, p);
+ AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
+
+ DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
+ p := hp1;
+ end
+ else
+ begin
+ { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
+ RemoveCurrentP(p, hp1);
+ DebugMsg(SPeepholeOptimization + 'SubMov2Lea', p);
+ end;
- DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
Result := True;
end;
end;