From 8c973bfb5a227352b00bb0725dd5778262efc28a Mon Sep 17 00:00:00 2001 From: nickysn Date: Thu, 24 Jul 2014 13:28:19 +0000 Subject: * taddnode.pass_typecheck_internal: always pass a tpointerdef to get_int_type_for_pointer_arithmetic, even when adding a zero-based array with a number git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28257 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index bf7b7f2a4e..2afee16eb4 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -1967,7 +1967,7 @@ implementation end else resultdef:=right.resultdef; - inserttypeconv(left,get_int_type_for_pointer_arithmetic(rd)); + inserttypeconv(left,get_int_type_for_pointer_arithmetic(right.resultdef)); if nodetype=addn then begin if (rt=niln) then @@ -1981,7 +1981,7 @@ implementation (tpointerdef(rd).pointeddef.size>1) then begin left:=caddnode.create(muln,left, - cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(rd),true)); + cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(right.resultdef),true)); typecheckpass(left); end; end @@ -2000,7 +2000,7 @@ implementation else resultdef:=left.resultdef; - inserttypeconv(right,get_int_type_for_pointer_arithmetic(ld)); + inserttypeconv(right,get_int_type_for_pointer_arithmetic(left.resultdef)); if nodetype in [addn,subn] then begin if (lt=niln) then @@ -2017,7 +2017,7 @@ implementation if (tpointerdef(ld).pointeddef.size>1) then begin right:=caddnode.create(muln,right, - cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(ld),true)); + cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true)); typecheckpass(right); end end else @@ -2025,7 +2025,7 @@ implementation (tarraydef(ld).elementdef.size>1) then begin right:=caddnode.create(muln,right, - cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(ld),true)); + cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true)); typecheckpass(right); end; end -- cgit v1.2.1 From 69e87c4f5f2ba151e9887e0cfbb305b25845a9a8 Mon Sep 17 00:00:00 2001 From: nickysn Date: Thu, 24 Jul 2014 14:18:57 +0000 Subject: * defutil.get_int_type_for_pointer_arithmetic replaced with a virtual method tpointerdef.pointer_arithmetic_int_type git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28258 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index 2afee16eb4..2929ba8711 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -1967,7 +1967,7 @@ implementation end else resultdef:=right.resultdef; - inserttypeconv(left,get_int_type_for_pointer_arithmetic(right.resultdef)); + inserttypeconv(left,tpointerdef(right.resultdef).pointer_arithmetic_int_type); if nodetype=addn then begin if (rt=niln) then @@ -1981,7 +1981,7 @@ implementation (tpointerdef(rd).pointeddef.size>1) then begin left:=caddnode.create(muln,left, - cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(right.resultdef),true)); + cordconstnode.create(tpointerdef(rd).pointeddef.size,tpointerdef(right.resultdef).pointer_arithmetic_int_type,true)); typecheckpass(left); end; end @@ -2000,7 +2000,7 @@ implementation else resultdef:=left.resultdef; - inserttypeconv(right,get_int_type_for_pointer_arithmetic(left.resultdef)); + inserttypeconv(right,tpointerdef(left.resultdef).pointer_arithmetic_int_type); if nodetype in [addn,subn] then begin if (lt=niln) then @@ -2017,7 +2017,7 @@ implementation if (tpointerdef(ld).pointeddef.size>1) then begin right:=caddnode.create(muln,right, - cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true)); + cordconstnode.create(tpointerdef(ld).pointeddef.size,tpointerdef(left.resultdef).pointer_arithmetic_int_type,true)); typecheckpass(right); end end else @@ -2025,7 +2025,7 @@ implementation (tarraydef(ld).elementdef.size>1) then begin right:=caddnode.create(muln,right, - cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true)); + cordconstnode.create(tarraydef(ld).elementdef.size,tpointerdef(left.resultdef).pointer_arithmetic_int_type,true)); typecheckpass(right); end; end -- cgit v1.2.1 From f9bf974b69c09f1723605dcfc8ee3742a53a646a Mon Sep 17 00:00:00 2001 From: sergei Date: Fri, 25 Jul 2014 21:40:22 +0000 Subject: * Do not promote integer comparison operations to native size if one side is a constant and its value is within range of opposite side. Instead, cast the constant to type of the opposite side. Comparisons are already not promoted if sides are equally signed, but this wasn't true for "unsigned vs constant" case, because constants are preferably parsed as signed. The change considerably improves generated code for i386 and even more considerably for m68k. Tested on i386-win32. git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28266 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index 2929ba8711..696148df38 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -1013,6 +1013,14 @@ implementation change : boolean; {$endif} + function maybe_cast_ordconst(var n: tnode; adef: tdef): boolean; + begin + result:=(tordconstnode(n).value>=torddef(adef).low) and + (tordconstnode(n).value<=torddef(adef).high); + if result then + inserttypeconv(n,adef); + end; + begin result:=nil; rlow:=0; @@ -1420,6 +1428,18 @@ implementation inserttypeconv(right,nd); end; end + { don't extend (sign-mismatched) comparisons if either side is a constant + whose value is within range of opposite side } + else if is_integer(ld) and is_integer(rd) and + (nodetype in [equaln,unequaln,gtn,gten,ltn,lten]) and + (is_signed(ld)<>is_signed(rd)) and + ( + ((lt=ordconstn) and maybe_cast_ordconst(left,rd)) or + ((rt=ordconstn) and maybe_cast_ordconst(right,ld)) + ) then + begin + { done here } + end { is there a signed 64 bit type ? } else if ((torddef(rd).ordtype=s64bit) or (torddef(ld).ordtype=s64bit)) then begin -- cgit v1.2.1 From 2009bf87145a3898cf3a0f878f2437fcaf0c3a1e Mon Sep 17 00:00:00 2001 From: florian Date: Mon, 11 Aug 2014 20:50:21 +0000 Subject: + automatically insert fma inlines into floating point code if possible and fastmath is activated git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28382 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 157 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 12 deletions(-) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index 696148df38..da5c15525d 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -73,6 +73,10 @@ interface { full 64 bit multiplies. } function use_generic_mul64bit: boolean; virtual; + { shall be overriden if the target cpu supports + an fma instruction + } + function use_fma : boolean; virtual; { This routine calls internal runtime library helpers for all floating point arithmetic in the case where the emulation switches is on. Otherwise @@ -80,18 +84,22 @@ interface the code generation phase. } function first_addfloat : tnode; virtual; - private - { checks whether a muln can be calculated as a 32bit } - { * 32bit -> 64 bit } - function try_make_mul32to64: boolean; - { Match against the ranges, i.e.: - var a:1..10; - begin - if a>0 then - ... - always evaluates to true. (DM) - } - function cmp_of_disjunct_ranges(var res : boolean) : boolean; + private + { checks whether a muln can be calculated as a 32bit } + { * 32bit -> 64 bit } + function try_make_mul32to64: boolean; + + { Match against the ranges, i.e.: + var a:1..10; + begin + if a>0 then + ... + always evaluates to true. (DM) + } + function cmp_of_disjunct_ranges(var res : boolean) : boolean; + + { tries to replace the current node by a fma node } + function try_fma(ld,rd : tdef) : tnode; end; taddnodeclass = class of taddnode; @@ -2612,6 +2620,127 @@ implementation end; + function taddnode.use_fma : boolean; + begin + result:=false; + end; + + + function taddnode.try_fma(ld,rd : tdef) : tnode; + var + inlinennr : Integer; + begin + result:=nil; + if (cs_opt_fastmath in current_settings.optimizerswitches) and + use_fma and + (nodetype in [addn,subn]) and + (rd.typ=floatdef) and (ld.typ=floatdef) and + (is_single(rd) or is_double(rd)) and + equal_defs(rd,ld) and + { transforming a*b+c into fma(a,b,c) makes only sense if c can be + calculated easily. Consider a*b+c*d which results in + + fmul + fmul + fadd + + and in + + fmul + fma + + when using the fma optimization. On a super scalar architecture, the first instruction + sequence requires clock_cycles(fmul)+clock_cycles(fadd) clock cycles because the fmuls can be executed in parallel. + The second sequence requires clock_cycles(fmul)+clock_cycles(fma) because the fma has to wait for the + result of the fmul. Since typically clock_cycles(fma)>clock_cycles(fadd) applies, the first sequence is better. + } + (((left.nodetype=muln) and (node_complexity(right)<3)) or + ((right.nodetype=muln) and (node_complexity(left)<3)) or + ((left.nodetype=inlinen) and + (tinlinenode(left).inlinenumber=in_sqr_real) and + (node_complexity(right)<3)) or + ((right.nodetype=inlinen) and + (tinlinenode(right).inlinenumber=in_sqr_real) and + (node_complexity(left)<3)) + ) then + begin + case tfloatdef(ld).floattype of + s32real: + inlinennr:=in_fma_single; + s64real: + inlinennr:=in_fma_double; + s80real: + inlinennr:=in_fma_extended; + s128real: + inlinennr:=in_fma_float128; + else + internalerror(2014042601); + end; + if left.nodetype=muln then + begin + if nodetype=subn then + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(cunaryminusnode.create(right), + ccallparanode.create(taddnode(left).right, + ccallparanode.create(taddnode(left).left,nil + )))) + else + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(right, + ccallparanode.create(taddnode(left).right, + ccallparanode.create(taddnode(left).left,nil + )))); + right:=nil; + taddnode(left).right:=nil; + taddnode(left).left:=nil; + end + else if right.nodetype=muln then + begin + if nodetype=subn then + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left, + ccallparanode.create(cunaryminusnode.create(taddnode(right).right), + ccallparanode.create(taddnode(right).left,nil + )))) + else + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left, + ccallparanode.create(taddnode(right).right, + ccallparanode.create(taddnode(right).left,nil + )))); + left:=nil; + taddnode(right).right:=nil; + taddnode(right).left:=nil; + end + else if (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) then + begin + if nodetype=subn then + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(cunaryminusnode.create(right), + ccallparanode.create(tinlinenode(left).left.getcopy, + ccallparanode.create(tinlinenode(left).left.getcopy,nil + )))) + else + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(right, + ccallparanode.create(tinlinenode(left).left.getcopy, + ccallparanode.create(tinlinenode(left).left.getcopy,nil + )))); + right:=nil; + end + { we get here only if right is a sqr node } + else if (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then + begin + if nodetype=subn then + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left, + ccallparanode.create(cunaryminusnode.create(tinlinenode(right).left.getcopy), + ccallparanode.create(tinlinenode(right).left.getcopy,nil + )))) + else + result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left, + ccallparanode.create(tinlinenode(right).left.getcopy, + ccallparanode.create(tinlinenode(right).left.getcopy,nil + )))); + left:=nil; + end; + end; + end; + + function taddnode.first_add64bitint: tnode; var procname: string[31]; @@ -3109,6 +3238,10 @@ implementation expectloc:=LOC_FPUREGISTER else expectloc:=LOC_FLAGS; + + result:=try_fma(ld,rd); + if assigned(result) then + exit; end { pointer comperation and subtraction } -- cgit v1.2.1 From 8ecbbb9b635324481ba82d6ac0b5bfdbbd5a81f3 Mon Sep 17 00:00:00 2001 From: florian Date: Thu, 13 Nov 2014 21:13:12 +0000 Subject: + convert floating point division by a constant into a multiplication if fastmath is turned on git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@29069 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index da5c15525d..fd3082b978 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -685,6 +685,31 @@ implementation exit; end; + { replace .../const by a multiplication, but only if fastmath is enabled, + do this after constant folding to avoid unnecessary precision loss if + an slash expresion would be first converted into a multiplication and later + folded } + if (nodetype=slashn) and + { do not mess with currency types } + (not(is_currency(right.resultdef))) and + (cs_opt_fastmath in current_settings.optimizerswitches) then + case rt of + ordconstn: + begin + nodetype:=muln; + t:=crealconstnode.create(1/tordconstnode(right).value,resultdef); + right.free; + right:=t; + exit; + end; + realconstn: + begin + nodetype:=muln; + trealconstnode(right).value_real:=1.0/trealconstnode(right).value_real; + exit; + end; + end; + { first, we handle widestrings, so we can check later for } { stringconstn only } -- cgit v1.2.1 From e4827c36fd5d9a718899819afbc78837c81198b9 Mon Sep 17 00:00:00 2001 From: florian Date: Sun, 16 Nov 2014 20:47:38 +0000 Subject: + change always floating point divisions into multiplications if they are a power of two, this is an exact operation so it is always allowed * change only divisions by normal numbers into multiplications git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@29085 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index fd3082b978..9424c4e82b 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -684,23 +684,41 @@ implementation result:=t; exit; end; +{$if FPC_FULLVERSION>20700} + { bestrealrec is 2.7.1+ only } + + { replace .../const by a multiplication, but only if fastmath is enabled or + the division is done by a power of 2, do not mess with special floating point values like Inf etc. - { replace .../const by a multiplication, but only if fastmath is enabled, do this after constant folding to avoid unnecessary precision loss if an slash expresion would be first converted into a multiplication and later folded } if (nodetype=slashn) and { do not mess with currency types } (not(is_currency(right.resultdef))) and - (cs_opt_fastmath in current_settings.optimizerswitches) then + (((cs_opt_fastmath in current_settings.optimizerswitches) and (rt=ordconstn)) or + ((cs_opt_fastmath in current_settings.optimizerswitches) and (rt=realconstn) and + (bestrealrec(trealconstnode(right).value_real).SpecialType in [fsPositive,fsNegative]) + ) or + ((rt=realconstn) and + (bestrealrec(trealconstnode(right).value_real).SpecialType in [fsPositive,fsNegative]) and + { mantissa returns the mantissa/fraction without the hidden 1, so power of two means only the hidden + bit is set => mantissa must be 0 } + (bestrealrec(trealconstnode(right).value_real).Mantissa=0) + ) + ) then case rt of ordconstn: begin - nodetype:=muln; - t:=crealconstnode.create(1/tordconstnode(right).value,resultdef); - right.free; - right:=t; - exit; + { the normal code handles div/0 } + if (tordconstnode(right).value<>0) then + begin + nodetype:=muln; + t:=crealconstnode.create(1/tordconstnode(right).value,resultdef); + right.free; + right:=t; + exit; + end; end; realconstn: begin @@ -709,6 +727,7 @@ implementation exit; end; end; +{$endif FPC_FULLVERSION>20700} { first, we handle widestrings, so we can check later for } { stringconstn only } -- cgit v1.2.1 From 73fce9e62bb41be30ee07328054f34eeea22d99c Mon Sep 17 00:00:00 2001 From: florian Date: Mon, 12 Jan 2015 20:56:17 +0000 Subject: * allow subtraction/addition of internally generated nodes, resolves #27256 git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@29456 3ad0048d-3df7-0310-abae-a5850022a9f2 --- compiler/nadd.pas | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'compiler/nadd.pas') diff --git a/compiler/nadd.pas b/compiler/nadd.pas index 9424c4e82b..ab89377400 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -409,8 +409,7 @@ implementation end; { both are int constants } - if ( - ( + if ( is_constintnode(left) and is_constintnode(right) ) or @@ -422,7 +421,7 @@ implementation ( is_constenumnode(left) and is_constenumnode(right) and - allowenumop(nodetype)) + (allowenumop(nodetype) or (nf_internal in flags)) ) or ( (lt = pointerconstn) and @@ -2140,7 +2139,7 @@ implementation { enums } else if (ld.typ=enumdef) and (rd.typ=enumdef) then begin - if allowenumop(nodetype) then + if allowenumop(nodetype) or (nf_internal in flags) then inserttypeconv(right,left.resultdef) else CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),ld.typename,rd.typename); -- cgit v1.2.1