From 8c973bfb5a227352b00bb0725dd5778262efc28a Mon Sep 17 00:00:00 2001
From: nickysn <nickysn@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Thu, 24 Jul 2014 13:28:19 +0000
Subject: * taddnode.pass_typecheck_internal: always pass a tpointerdef to  
 get_int_type_for_pointer_arithmetic, even when adding a zero-based array with
   a number

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28257 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index bf7b7f2a4e..2afee16eb4 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -1967,7 +1967,7 @@ implementation
               end
             else
               resultdef:=right.resultdef;
-            inserttypeconv(left,get_int_type_for_pointer_arithmetic(rd));
+            inserttypeconv(left,get_int_type_for_pointer_arithmetic(right.resultdef));
             if nodetype=addn then
               begin
                 if (rt=niln) then
@@ -1981,7 +1981,7 @@ implementation
                    (tpointerdef(rd).pointeddef.size>1) then
                    begin
                      left:=caddnode.create(muln,left,
-                       cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(rd),true));
+                       cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(right.resultdef),true));
                      typecheckpass(left);
                    end;
               end
@@ -2000,7 +2000,7 @@ implementation
              else
                resultdef:=left.resultdef;
 
-             inserttypeconv(right,get_int_type_for_pointer_arithmetic(ld));
+             inserttypeconv(right,get_int_type_for_pointer_arithmetic(left.resultdef));
              if nodetype in [addn,subn] then
                begin
                  if (lt=niln) then
@@ -2017,7 +2017,7 @@ implementation
                    if (tpointerdef(ld).pointeddef.size>1) then
                    begin
                      right:=caddnode.create(muln,right,
-                       cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(ld),true));
+                       cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true));
                      typecheckpass(right);
                    end
                  end else
@@ -2025,7 +2025,7 @@ implementation
                       (tarraydef(ld).elementdef.size>1) then
                      begin
                        right:=caddnode.create(muln,right,
-                         cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(ld),true));
+                         cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true));
                        typecheckpass(right);
                      end;
                end
-- 
cgit v1.2.1


From 69e87c4f5f2ba151e9887e0cfbb305b25845a9a8 Mon Sep 17 00:00:00 2001
From: nickysn <nickysn@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Thu, 24 Jul 2014 14:18:57 +0000
Subject: * defutil.get_int_type_for_pointer_arithmetic replaced with a virtual
 method   tpointerdef.pointer_arithmetic_int_type

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28258 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index 2afee16eb4..2929ba8711 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -1967,7 +1967,7 @@ implementation
               end
             else
               resultdef:=right.resultdef;
-            inserttypeconv(left,get_int_type_for_pointer_arithmetic(right.resultdef));
+            inserttypeconv(left,tpointerdef(right.resultdef).pointer_arithmetic_int_type);
             if nodetype=addn then
               begin
                 if (rt=niln) then
@@ -1981,7 +1981,7 @@ implementation
                    (tpointerdef(rd).pointeddef.size>1) then
                    begin
                      left:=caddnode.create(muln,left,
-                       cordconstnode.create(tpointerdef(rd).pointeddef.size,get_int_type_for_pointer_arithmetic(right.resultdef),true));
+                       cordconstnode.create(tpointerdef(rd).pointeddef.size,tpointerdef(right.resultdef).pointer_arithmetic_int_type,true));
                      typecheckpass(left);
                    end;
               end
@@ -2000,7 +2000,7 @@ implementation
              else
                resultdef:=left.resultdef;
 
-             inserttypeconv(right,get_int_type_for_pointer_arithmetic(left.resultdef));
+             inserttypeconv(right,tpointerdef(left.resultdef).pointer_arithmetic_int_type);
              if nodetype in [addn,subn] then
                begin
                  if (lt=niln) then
@@ -2017,7 +2017,7 @@ implementation
                    if (tpointerdef(ld).pointeddef.size>1) then
                    begin
                      right:=caddnode.create(muln,right,
-                       cordconstnode.create(tpointerdef(ld).pointeddef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true));
+                       cordconstnode.create(tpointerdef(ld).pointeddef.size,tpointerdef(left.resultdef).pointer_arithmetic_int_type,true));
                      typecheckpass(right);
                    end
                  end else
@@ -2025,7 +2025,7 @@ implementation
                       (tarraydef(ld).elementdef.size>1) then
                      begin
                        right:=caddnode.create(muln,right,
-                         cordconstnode.create(tarraydef(ld).elementdef.size,get_int_type_for_pointer_arithmetic(left.resultdef),true));
+                         cordconstnode.create(tarraydef(ld).elementdef.size,tpointerdef(left.resultdef).pointer_arithmetic_int_type,true));
                        typecheckpass(right);
                      end;
                end
-- 
cgit v1.2.1


From f9bf974b69c09f1723605dcfc8ee3742a53a646a Mon Sep 17 00:00:00 2001
From: sergei <sergei@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Fri, 25 Jul 2014 21:40:22 +0000
Subject: * Do not promote integer comparison operations to native size if one
 side is a constant and its value is within range of opposite side. Instead,
 cast the constant to type of the opposite side. Comparisons are already not
 promoted if sides are equally signed, but this wasn't true for "unsigned vs
 constant" case, because constants are preferably parsed as signed. The change
 considerably improves generated code for i386 and even more considerably for
 m68k. Tested on i386-win32.

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28266 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index 2929ba8711..696148df38 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -1013,6 +1013,14 @@ implementation
         change      : boolean;
 {$endif}
 
+        function maybe_cast_ordconst(var n: tnode; adef: tdef): boolean;
+          begin
+            result:=(tordconstnode(n).value>=torddef(adef).low) and
+              (tordconstnode(n).value<=torddef(adef).high);
+            if result then
+              inserttypeconv(n,adef);
+          end;
+
       begin
          result:=nil;
          rlow:=0;
@@ -1420,6 +1428,18 @@ implementation
                      inserttypeconv(right,nd);
                    end;
                end
+             { don't extend (sign-mismatched) comparisons if either side is a constant
+               whose value is within range of opposite side }
+             else if is_integer(ld) and is_integer(rd) and
+                     (nodetype in [equaln,unequaln,gtn,gten,ltn,lten]) and
+                     (is_signed(ld)<>is_signed(rd)) and
+                     (
+                       ((lt=ordconstn) and maybe_cast_ordconst(left,rd)) or
+                       ((rt=ordconstn) and maybe_cast_ordconst(right,ld))
+                     ) then
+               begin
+                 { done here }
+               end
              { is there a signed 64 bit type ? }
              else if ((torddef(rd).ordtype=s64bit) or (torddef(ld).ordtype=s64bit)) then
                begin
-- 
cgit v1.2.1


From 2009bf87145a3898cf3a0f878f2437fcaf0c3a1e Mon Sep 17 00:00:00 2001
From: florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Mon, 11 Aug 2014 20:50:21 +0000
Subject: + automatically insert fma inlines into floating point code if
 possible and fastmath is activated

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@28382 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 157 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 145 insertions(+), 12 deletions(-)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index 696148df38..da5c15525d 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -73,6 +73,10 @@ interface
           { full 64 bit multiplies.                                }
           function use_generic_mul64bit: boolean; virtual;
 
+          { shall be overriden if the target cpu supports
+            an fma instruction
+          }
+          function use_fma : boolean; virtual;
           { This routine calls internal runtime library helpers
             for all floating point arithmetic in the case
             where the emulation switches is on. Otherwise
@@ -80,18 +84,22 @@ interface
             the code generation phase.
           }
           function first_addfloat : tnode; virtual;
-         private
-           { checks whether a muln can be calculated as a 32bit }
-           { * 32bit -> 64 bit                                  }
-           function try_make_mul32to64: boolean;
-           { Match against the ranges, i.e.:
-             var a:1..10;
-             begin
-               if a>0 then
-                 ...
-             always evaluates to true. (DM)
-           }
-           function cmp_of_disjunct_ranges(var res : boolean) : boolean;
+       private
+          { checks whether a muln can be calculated as a 32bit }
+          { * 32bit -> 64 bit                                  }
+          function try_make_mul32to64: boolean;
+
+          { Match against the ranges, i.e.:
+            var a:1..10;
+            begin
+              if a>0 then
+                ...
+            always evaluates to true. (DM)
+          }
+          function cmp_of_disjunct_ranges(var res : boolean) : boolean;
+
+          { tries to replace the current node by a fma node }
+          function try_fma(ld,rd : tdef) : tnode;
        end;
        taddnodeclass = class of taddnode;
 
@@ -2612,6 +2620,127 @@ implementation
       end;
 
 
+    function taddnode.use_fma : boolean;
+      begin
+        result:=false;
+      end;
+
+
+    function taddnode.try_fma(ld,rd : tdef) : tnode;
+      var
+        inlinennr : Integer;
+      begin
+        result:=nil;
+        if (cs_opt_fastmath in current_settings.optimizerswitches) and
+          use_fma and
+          (nodetype in [addn,subn]) and
+          (rd.typ=floatdef) and (ld.typ=floatdef) and
+          (is_single(rd) or is_double(rd)) and
+          equal_defs(rd,ld) and
+          { transforming a*b+c into fma(a,b,c) makes only sense if c can be
+            calculated easily. Consider a*b+c*d which results in
+
+            fmul
+            fmul
+            fadd
+
+            and in
+
+            fmul
+            fma
+
+            when using the fma optimization. On a super scalar architecture, the first instruction
+            sequence requires clock_cycles(fmul)+clock_cycles(fadd) clock cycles because the fmuls can be executed in parallel.
+            The second sequence requires clock_cycles(fmul)+clock_cycles(fma) because the fma has to wait for the
+            result of the fmul. Since typically clock_cycles(fma)>clock_cycles(fadd) applies, the first sequence is better.
+          }
+          (((left.nodetype=muln) and (node_complexity(right)<3)) or
+           ((right.nodetype=muln) and (node_complexity(left)<3)) or
+           ((left.nodetype=inlinen) and
+            (tinlinenode(left).inlinenumber=in_sqr_real) and
+             (node_complexity(right)<3)) or
+           ((right.nodetype=inlinen) and
+            (tinlinenode(right).inlinenumber=in_sqr_real) and
+            (node_complexity(left)<3))
+          ) then
+          begin
+            case tfloatdef(ld).floattype of
+              s32real:
+               inlinennr:=in_fma_single;
+              s64real:
+               inlinennr:=in_fma_double;
+              s80real:
+               inlinennr:=in_fma_extended;
+              s128real:
+               inlinennr:=in_fma_float128;
+              else
+                internalerror(2014042601);
+            end;
+            if left.nodetype=muln then
+              begin
+                if nodetype=subn then
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(cunaryminusnode.create(right),
+                    ccallparanode.create(taddnode(left).right,
+                    ccallparanode.create(taddnode(left).left,nil
+                    ))))
+                else
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(right,
+                    ccallparanode.create(taddnode(left).right,
+                    ccallparanode.create(taddnode(left).left,nil
+                    ))));
+                right:=nil;
+                taddnode(left).right:=nil;
+                taddnode(left).left:=nil;
+              end
+            else if right.nodetype=muln then
+              begin
+                if nodetype=subn then
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+                    ccallparanode.create(cunaryminusnode.create(taddnode(right).right),
+                    ccallparanode.create(taddnode(right).left,nil
+                    ))))
+                else
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+                    ccallparanode.create(taddnode(right).right,
+                    ccallparanode.create(taddnode(right).left,nil
+                    ))));
+                left:=nil;
+                taddnode(right).right:=nil;
+                taddnode(right).left:=nil;
+              end
+            else if (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) then
+              begin
+                if nodetype=subn then
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(cunaryminusnode.create(right),
+                    ccallparanode.create(tinlinenode(left).left.getcopy,
+                    ccallparanode.create(tinlinenode(left).left.getcopy,nil
+                    ))))
+                else
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(right,
+                    ccallparanode.create(tinlinenode(left).left.getcopy,
+                    ccallparanode.create(tinlinenode(left).left.getcopy,nil
+                    ))));
+                right:=nil;
+              end
+            { we get here only if right is a sqr node }
+            else if (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
+              begin
+                if nodetype=subn then
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+                    ccallparanode.create(cunaryminusnode.create(tinlinenode(right).left.getcopy),
+                    ccallparanode.create(tinlinenode(right).left.getcopy,nil
+                    ))))
+                else
+                  result:=cinlinenode.create(inlinennr,false,ccallparanode.create(left,
+                    ccallparanode.create(tinlinenode(right).left.getcopy,
+                    ccallparanode.create(tinlinenode(right).left.getcopy,nil
+                    ))));
+                left:=nil;
+              end;
+          end;
+      end;
+
+
     function taddnode.first_add64bitint: tnode;
       var
         procname: string[31];
@@ -3109,6 +3238,10 @@ implementation
                 expectloc:=LOC_FPUREGISTER
               else
                 expectloc:=LOC_FLAGS;
+
+              result:=try_fma(ld,rd);
+              if assigned(result) then
+                exit;
             end
 
          { pointer comperation and subtraction }
-- 
cgit v1.2.1


From 8ecbbb9b635324481ba82d6ac0b5bfdbbd5a81f3 Mon Sep 17 00:00:00 2001
From: florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Thu, 13 Nov 2014 21:13:12 +0000
Subject: + convert floating point division by a constant into a multiplication
 if fastmath is turned on

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@29069 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index da5c15525d..fd3082b978 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -685,6 +685,31 @@ implementation
              exit;
           end;
 
+        { replace .../const by a multiplication, but only if fastmath is enabled,
+          do this after constant folding to avoid unnecessary precision loss if
+          an slash expresion would be first converted into a multiplication and later
+          folded }
+        if (nodetype=slashn) and
+          { do not mess with currency types }
+          (not(is_currency(right.resultdef))) and
+          (cs_opt_fastmath in current_settings.optimizerswitches) then
+          case rt of
+            ordconstn:
+              begin
+                nodetype:=muln;
+                t:=crealconstnode.create(1/tordconstnode(right).value,resultdef);
+                right.free;
+                right:=t;
+                exit;
+              end;
+            realconstn:
+              begin
+                nodetype:=muln;
+                trealconstnode(right).value_real:=1.0/trealconstnode(right).value_real;
+                exit;
+              end;
+          end;
+
         { first, we handle widestrings, so we can check later for }
         { stringconstn only                                       }
 
-- 
cgit v1.2.1


From e4827c36fd5d9a718899819afbc78837c81198b9 Mon Sep 17 00:00:00 2001
From: florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Sun, 16 Nov 2014 20:47:38 +0000
Subject: + change always floating point divisions into multiplications if they
 are a power of two,   this is an exact operation so it is always allowed *
 change only divisions by normal numbers into multiplications

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@29085 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index fd3082b978..9424c4e82b 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -684,23 +684,41 @@ implementation
              result:=t;
              exit;
           end;
+{$if FPC_FULLVERSION>20700}
+        { bestrealrec is 2.7.1+ only }
+
+        { replace .../const by a multiplication, but only if fastmath is enabled or
+          the division is done by a power of 2, do not mess with special floating point values like Inf etc.
 
-        { replace .../const by a multiplication, but only if fastmath is enabled,
           do this after constant folding to avoid unnecessary precision loss if
           an slash expresion would be first converted into a multiplication and later
           folded }
         if (nodetype=slashn) and
           { do not mess with currency types }
           (not(is_currency(right.resultdef))) and
-          (cs_opt_fastmath in current_settings.optimizerswitches) then
+          (((cs_opt_fastmath in current_settings.optimizerswitches) and (rt=ordconstn)) or
+           ((cs_opt_fastmath in current_settings.optimizerswitches) and (rt=realconstn) and
+            (bestrealrec(trealconstnode(right).value_real).SpecialType in [fsPositive,fsNegative])
+           ) or
+           ((rt=realconstn) and
+            (bestrealrec(trealconstnode(right).value_real).SpecialType in [fsPositive,fsNegative]) and
+            { mantissa returns the mantissa/fraction without the hidden 1, so power of two means only the hidden
+              bit is set => mantissa must be 0 }
+            (bestrealrec(trealconstnode(right).value_real).Mantissa=0)
+           )
+          ) then
           case rt of
             ordconstn:
               begin
-                nodetype:=muln;
-                t:=crealconstnode.create(1/tordconstnode(right).value,resultdef);
-                right.free;
-                right:=t;
-                exit;
+                { the normal code handles div/0 }
+                if (tordconstnode(right).value<>0) then
+                  begin
+                    nodetype:=muln;
+                    t:=crealconstnode.create(1/tordconstnode(right).value,resultdef);
+                    right.free;
+                    right:=t;
+                    exit;
+                  end;
               end;
             realconstn:
               begin
@@ -709,6 +727,7 @@ implementation
                 exit;
               end;
           end;
+{$endif FPC_FULLVERSION>20700}
 
         { first, we handle widestrings, so we can check later for }
         { stringconstn only                                       }
-- 
cgit v1.2.1


From 73fce9e62bb41be30ee07328054f34eeea22d99c Mon Sep 17 00:00:00 2001
From: florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2>
Date: Mon, 12 Jan 2015 20:56:17 +0000
Subject: * allow subtraction/addition of internally generated nodes, resolves
 #27256

git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@29456 3ad0048d-3df7-0310-abae-a5850022a9f2
---
 compiler/nadd.pas | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'compiler/nadd.pas')

diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index 9424c4e82b..ab89377400 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -409,8 +409,7 @@ implementation
           end;
 
         { both are int constants }
-        if (
-            (
+        if  (
              is_constintnode(left) and
              is_constintnode(right)
             ) or
@@ -422,7 +421,7 @@ implementation
             (
              is_constenumnode(left) and
              is_constenumnode(right) and
-             allowenumop(nodetype))
+             (allowenumop(nodetype) or (nf_internal in flags))
             ) or
             (
              (lt = pointerconstn) and
@@ -2140,7 +2139,7 @@ implementation
          { enums }
          else if (ld.typ=enumdef) and (rd.typ=enumdef) then
           begin
-            if allowenumop(nodetype) then
+            if allowenumop(nodetype) or (nf_internal in flags) then
               inserttypeconv(right,left.resultdef)
             else
               CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),ld.typename,rd.typename);
-- 
cgit v1.2.1