summaryrefslogtreecommitdiff
path: root/compiler/nadd.pas
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nadd.pas')
-rw-r--r--compiler/nadd.pas238
1 files changed, 217 insertions, 21 deletions
diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index bf7b7f2a4e..ab89377400 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -73,6 +73,10 @@ interface
{ full 64 bit multiplies. }
function use_generic_mul64bit: boolean; virtual;
+ { shall be overriden if the target cpu supports
+ an fma instruction
+ }
+ function use_fma : boolean; virtual;
{ This routine calls internal runtime library helpers
for all floating point arithmetic in the case
where the emulation switches is on. Otherwise
@@ -80,18 +84,22 @@ interface
the code generation phase.
}
function first_addfloat : tnode; virtual;
- private
- { checks whether a muln can be calculated as a 32bit }
- { * 32bit -> 64 bit }
- function try_make_mul32to64: boolean;
- { Match against the ranges, i.e.:
- var a:1..10;
- begin
- if a>0 then
- ...
- always evaluates to true. (DM)
- }
- function cmp_of_disjunct_ranges(var res : boolean) : boolean;
+ private
+ { checks whether a muln can be calculated as a 32bit }
+ { * 32bit -> 64 bit }
+ function try_make_mul32to64: boolean;
+
+ { Match against the ranges, i.e.:
+ var a:1..10;
+ begin
+ if a>0 then
+ ...
+ always evaluates to true. (DM)
+ }
+ function cmp_of_disjunct_ranges(var res : boolean) : boolean;
+
+ { tries to replace the current node by a fma node }
+ function try_fma(ld,rd : tdef) : tnode;
end;
taddnodeclass = class of taddnode;
@@ -401,8 +409,7 @@ implementation
end;
{ both are int constants }
- if (
- (
+ if (
is_constintnode(left) and
is_constintnode(right)
) or
@@ -414,7 +421,7 @@ implementation
(
is_constenumnode(left) and
is_constenumnode(right) and
- allowenumop(nodetype))
+ (allowenumop(nodetype) or (nf_internal in flags))
) or
(
(lt = pointerconstn) and
@@ -676,6 +683,50 @@ implementation
result:=t;
exit;
end;
+{$if FPC_FULLVERSION>20700}
+ { bestrealrec is 2.7.1+ only }
+
+ { replace .../const by a multiplication, but only if fastmath is enabled or
+ the division is done by a power of 2, do not mess with special floating point values like Inf etc.
+
+ do this after constant folding to avoid unnecessary precision loss if
+ an slash expresion would be first converted into a multiplication and later
+ folded }
+ if (nodetype=slashn) and
+ { do not mess with currency types }
+ (not(is_currency(right.resultdef))) and
+ (((cs_opt_fastmath in current_settings.optimizerswitches) and (rt=ordconstn)) or
+ ((cs_opt_fastmath in current_settings.optimizerswitches) and (rt=realconstn) and
+ (bestrealrec(trealconstnode(right).value_real).SpecialType in [fsPositive,fsNegative])
+ ) or
+ ((rt=realconstn) and
+ (bestrealrec(trealconstnode(right).value_real).SpecialType in [fsPositive,fsNegative]) and
+ { mantissa returns the mantissa/fraction without the hidden 1, so power of two means only the hidden
+ bit is set => mantissa must be 0 }
+ (bestrealrec(trealconstnode(right).value_real).Mantissa=0)
+ )
+ ) then
+ case rt of
+ ordconstn:
+ begin
+ { the normal code handles div/0 }
+ if (tordconstnode(right).value<>0) then
+ begin
+ nodetype:=muln;
+ t:=crealconstnode.create(1/tordconstnode(right).value,resultdef);
+ right.free;
+ right:=t;
+ exit;
+ end;
+ end;
+ realconstn:
+ begin
+ nodetype:=muln;
+ trealconstnode(right).value_real:=1.0/trealconstnode(right).value_real;
+ exit;
+ end;
+ end;
+{$endif FPC_FULLVERSION>20700}
{ first, we handle widestrings, so we can check later for }
{ stringconstn only }
@@ -1013,6 +1064,14 @@ implementation
change : boolean;
{$endif}
+ function maybe_cast_ordconst(var n: tnode; adef: tdef): boolean;
+ begin
+ result:=(tordconstnode(n).value>=torddef(adef).low) and
+ (tordconstnode(n).value<=torddef(adef).high);
+ if result then
+ inserttypeconv(n,adef);
+ end;
+
begin
result:=nil;
rlow:=0;
@@ -1420,6 +1479,18 @@ implementation
inserttypeconv(right,nd);
end;
end
+ { don't extend (sign-mismatched) comparisons if either side is a constant
+ whose value is within range of opposite side }
+ else if is_integer(ld) and is_integer(rd) and
+ (nodetype in [equaln,unequaln,gtn,gten,ltn,lten]) and
+ (is_signed(ld)<>is_signed(rd)) and
+ (
+ ((lt=ordconstn) and maybe_cast_ordconst(left,rd)) or
+ ((rt=ordconstn) and maybe_cast_ordconst(right,ld))
+ ) then
+ begin
+ { done here }
+ end
{ is there a signed 64 bit type ? }
else if ((torddef(rd).ordtype=s64bit) or (torddef(ld).ordtype=s64bit)) then
begin
@@ -1967,7 +2038,7 @@ implementation
end
else
resultdef:=right.resultdef;
- inserttypeconv(left,get_int_type_for_pointer_arithmetic(rd));
+ inserttypeconv(left,tpointerdef(right.resultdef).pointer_arithmetic_int_type);
if nodetype=addn then
begin
if (rt=niln) then
@@ -1981,7 +2052,7 @@ implementation
(tpointerdef(rd).pointeddef.size>1) then
begin
left:=caddnode.create(muln,left,
- cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(rd),true));
+ cordconstnode.create(tpointerdef(rd).pointeddef.size,tpointerdef(right.resultdef).pointer_arithmetic_int_type,true));
typecheckpass(left);
end;
end
@@ -2000,7 +2071,7 @@ implementation
else
resultdef:=left.resultdef;
- inserttypeconv(right,get_int_type_for_pointer_arithmetic(ld));
+ inserttypeconv(right,tpointerdef(left.resultdef).pointer_arithmetic_int_type);
if nodetype in [addn,subn] then
begin
if (lt=niln) then
@@ -2017,7 +2088,7 @@ implementation
if (tpointerdef(ld).pointeddef.size>1) then
begin
right:=caddnode.create(muln,right,
- cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(ld),true));
+ cordconstnode.create(tpointerdef(ld).pointeddef.size,tpointerdef(left.resultdef).pointer_arithmetic_int_type,true));
typecheckpass(right);
end
end else
@@ -2025,7 +2096,7 @@ implementation
(tarraydef(ld).elementdef.size>1) then
begin
right:=caddnode.create(muln,right,
- cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(ld),true));
+ cordconstnode.create(tarraydef(ld).elementdef.size,tpointerdef(left.resultdef).pointer_arithmetic_int_type,true));
typecheckpass(right);
end;
end
@@ -2068,7 +2139,7 @@ implementation
{ enums }
else if (ld.typ=enumdef) and (rd.typ=enumdef) then
begin
- if allowenumop(nodetype) then
+ if allowenumop(nodetype) or (nf_internal in flags) then
inserttypeconv(right,left.resultdef)
else
CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),ld.typename,rd.typename);
@@ -2592,6 +2663,127 @@ implementation
end;
+ function taddnode.use_fma : boolean;
+ begin
+ result:=false;
+ end;
+
+
+ function taddnode.try_fma(ld,rd : tdef) : tnode;
+ var
+ inlinennr : Integer;
+ begin
+ result:=nil;
+ if (cs_opt_fastmath in current_settings.optimizerswitches) and
+ use_fma and
+ (nodetype in [addn,subn]) and
+ (rd.typ=floatdef) and (ld.typ=floatdef) and
+ (is_single(rd) or is_double(rd)) and
+ equal_defs(rd,ld) and
+ { transforming a*b+c into fma(a,b,c) makes only sense if c can be
+ calculated easily. Consider a*b+c*d which results in
+
+ fmul
+ fmul
+ fadd
+
+ and in
+
+ fmul
+ fma
+
+ when using the fma optimization. On a super scalar architecture, the first instruction
+ sequence requires clock_cycles(fmul)+clock_cycles(fadd) clock cycles because the fmuls can be executed in parallel.
+ The second sequence requires clock_cycles(fmul)+clock_cycles(fma) because the fma has to wait for the
+ result of the fmul. Since typically clock_cycles(fma)>clock_cycles(fadd) applies, the first sequence is better.
+ }
+ (((left.nodetype=muln) and (node_complexity(right)<3)) or
+ ((right.nodetype=muln) and (node_complexity(left)<3)) or
+ ((left.nodetype=inlinen) and
+ (tinlinenode(left).inlinenumber=in_sqr_real) and
+ (node_complexity(right)<3)) or
+ ((right.nodetype=inlinen) and
+ (tinlinenode(right).inlinenumber=in_sqr_real) and
+ (node_complexity(left)<3))
+ ) then
+ begin
+ case tfloatdef(ld).floattype of
+ s32real:
+ inlinennr:=in_fma_single;
+ s64real:
+ inlinennr:=in_fma_double;
+ s80real:
+ inlinennr:=in_fma_extended;
+ s128real:
+ inlinennr:=in_fma_float128;
+ else
+ internalerror(2014042601);
+ end;
+ if left.nodetype=muln then
+ begin
+ if nodetype=subn then
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(cunaryminusnode.create(right),
+ ccallparanode.create(taddnode(left).right,
+ ccallparanode.create(taddnode(left).left,nil
+ ))))
+ else
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(right,
+ ccallparanode.create(taddnode(left).right,
+ ccallparanode.create(taddnode(left).left,nil
+ ))));
+ right:=nil;
+ taddnode(left).right:=nil;
+ taddnode(left).left:=nil;
+ end
+ else if right.nodetype=muln then
+ begin
+ if nodetype=subn then
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+ ccallparanode.create(cunaryminusnode.create(taddnode(right).right),
+ ccallparanode.create(taddnode(right).left,nil
+ ))))
+ else
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+ ccallparanode.create(taddnode(right).right,
+ ccallparanode.create(taddnode(right).left,nil
+ ))));
+ left:=nil;
+ taddnode(right).right:=nil;
+ taddnode(right).left:=nil;
+ end
+ else if (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) then
+ begin
+ if nodetype=subn then
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(cunaryminusnode.create(right),
+ ccallparanode.create(tinlinenode(left).left.getcopy,
+ ccallparanode.create(tinlinenode(left).left.getcopy,nil
+ ))))
+ else
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(right,
+ ccallparanode.create(tinlinenode(left).left.getcopy,
+ ccallparanode.create(tinlinenode(left).left.getcopy,nil
+ ))));
+ right:=nil;
+ end
+ { we get here only if right is a sqr node }
+ else if (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
+ begin
+ if nodetype=subn then
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+ ccallparanode.create(cunaryminusnode.create(tinlinenode(right).left.getcopy),
+ ccallparanode.create(tinlinenode(right).left.getcopy,nil
+ ))))
+ else
+ result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+ ccallparanode.create(tinlinenode(right).left.getcopy,
+ ccallparanode.create(tinlinenode(right).left.getcopy,nil
+ ))));
+ left:=nil;
+ end;
+ end;
+ end;
+
+
function taddnode.first_add64bitint: tnode;
var
procname: string[31];
@@ -3089,6 +3281,10 @@ implementation
expectloc:=LOC_FPUREGISTER
else
expectloc:=LOC_FLAGS;
+
+ result:=try_fma(ld,rd);
+ if assigned(result) then
+ exit;
end
{ pointer comperation and subtraction }