1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
unit optunrol;
{$i fpcdefs.inc}
interface
uses
node;
type
tunrollinfo = record
{ if count isn divisable by unrolls then
the for loop must jump to this label to get the correct
number of executions }
entrylabel : tnode;
end;
function unroll_loop(node : tnode;out unrollinfo : tunrollinfo) : tnode;
implementation
uses
globtype,globals,
cpuinfo,
nutils,
nbas,nflw,ncon,ninl,ncal;
var
nodecount : aint;
function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
begin
inc(nodecount);
result:=fen_false;
end;
{ rough estimation how large the tree "node" is }
function countnodes(node : tnode) : aint;
begin
nodecount:=0;
foreachnodestatic(node,@donodecount,nil);
result:=nodecount;
end;
function number_unrolls(node : tnode) : integer;
begin
{ multiply by 2 for CPUs with a long pipeline }
if aktoptprocessor in [ClassPentium4] then
number_unrolls:=60 div countnodes(node)
else
number_unrolls:=30 div countnodes(node);
if number_unrolls=0 then
number_unrolls:=1;
end;
function unroll_loop(node : tnode) : tnode;
var
unrolls,i : integer;
counts : qword;
unrollstatement : tstatementnode;
unrollblock : tblocknode;
entrylabel : tlabelnode;
begin
result:=nil;
if (cs_littlesize in aktglobalswitches) then
exit;
if not(node.nodetype in [forn]) then
exit;
fillchar(tfornode(node).unrollinfo,sizeof(tunrollinfo),0);
unrolls:=number_unrolls(tfornode(node).t2);
if unrolls>1 then
begin
{ number of executions known? }
if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
begin
if lnf_backward in tfornode(node).loopflags then
counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
else
counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;
{ don't unroll more than we need }
if unrolls>counts then
unrolls:=counts;
{ create block statement }
unrollblock:=internalstatements(unrollstatement);
{ let's unroll (and rock of course) }
for i:=1 to unrolls do
begin
{ set and insert entry label? }
if (counts mod unrolls<>0) and
((counts mod unrolls)=unrolls-i+1) then
begin
tfornode(node).unrollinfo.entrylabel:=clabelnode.createcase(
end;
{ create and insert copy of the statement block }
addstatement(unrollstatement,tfornode(tfornode(node).t2).getcopy);
{ for itself increases at the last iteration }
if i<unrolls then
begin
{ insert incrementation of counter var }
addstatement(unrollstatement,
geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
end;
end;
{ can we get rid of the for statement? }
if unrolls=counts then
result:=unrollblock;
end
else
begin
{ for now, we can't handle this }
exit;
end;
if not(assigned(result)) then
begin
tfornode(node).t2.free;
tfornode(node).t2:=unrollblock;
end;
end;
end;
end.
|