summaryrefslogtreecommitdiff
path: root/nodeopt/compiler/optunrol.pas
blob: 49f1fadde762da6b1b8ef9a3723b2c97d35e9f24 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
unit optunrol;

{$i fpcdefs.inc}

  interface

    uses
      node;

    type
      tunrollinfo = record
        { if count isn divisable by unrolls then
          the for loop must jump to this label to get the correct
          number of executions }
        entrylabel : tnode;
      end;

    function unroll_loop(node : tnode;out unrollinfo : tunrollinfo) : tnode;

  implementation

    uses
      globtype,globals,
      cpuinfo,
      nutils,
      nbas,nflw,ncon,ninl,ncal;

    var
      nodecount : aint;

    function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
      begin
        inc(nodecount);
        result:=fen_false;
      end;


    { rough estimation how large the tree "node" is }
    function countnodes(node : tnode) : aint;
      begin
        nodecount:=0;
        foreachnodestatic(node,@donodecount,nil);
        result:=nodecount;
      end;


    function number_unrolls(node : tnode) : integer;
      begin
        { multiply by 2 for CPUs with a long pipeline }
        if aktoptprocessor in [ClassPentium4] then
          number_unrolls:=60 div countnodes(node)
        else
          number_unrolls:=30 div countnodes(node);

        if number_unrolls=0 then
          number_unrolls:=1;
      end;


    function unroll_loop(node : tnode) : tnode;
      var
        unrolls,i : integer;
        counts : qword;
        unrollstatement : tstatementnode;
        unrollblock : tblocknode;
        entrylabel : tlabelnode;
      begin
        result:=nil;
        if (cs_littlesize in aktglobalswitches) then
          exit;
        if not(node.nodetype in [forn]) then
          exit;
        fillchar(tfornode(node).unrollinfo,sizeof(tunrollinfo),0);
        unrolls:=number_unrolls(tfornode(node).t2);
        if unrolls>1 then
          begin
            { number of executions known? }
            if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
              begin
                if lnf_backward in tfornode(node).loopflags then
                  counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
                else
                  counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;

                { don't unroll more than we need }
                if unrolls>counts then
                  unrolls:=counts;

                { create block statement }
                unrollblock:=internalstatements(unrollstatement);

                { let's unroll (and rock of course) }
                for i:=1 to unrolls do
                  begin
                    { set and insert entry label? }
                    if (counts mod unrolls<>0) and
                      ((counts mod unrolls)=unrolls-i+1) then
                      begin
                        tfornode(node).unrollinfo.entrylabel:=clabelnode.createcase(
                      end;
                    { create and insert copy of the statement block }
                    addstatement(unrollstatement,tfornode(tfornode(node).t2).getcopy);

                    { for itself increases at the last iteration }
                    if i<unrolls then
                      begin
                        { insert incrementation of counter var }
                        addstatement(unrollstatement,
                          geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
                      end;
                  end;
                { can we get rid of the for statement? }
                if unrolls=counts then
                  result:=unrollblock;
              end
            else
              begin
                { for now, we can't handle this }
                exit;
              end;
            if not(assigned(result)) then
              begin
                tfornode(node).t2.free;
                tfornode(node).t2:=unrollblock;
              end;
          end;
      end;

end.