summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
blob: 5f3f0ac495cb470d6fea032525fb33ca1645c86b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
{-# OPTIONS -fno-warn-unused-binds #-}

module RegAlloc.Graph.TrivColorable (
	trivColorable,
)

where

#include "HsVersions.h"

import RegClass
import Reg

import GraphBase

import UniqFM
import FastTypes


-- trivColorable ---------------------------------------------------------------

-- trivColorable function for the graph coloring allocator
--
--	This gets hammered by scanGraph during register allocation,
--	so needs to be fairly efficient.
--
--	NOTE: 	This only works for arcitectures with just RcInteger and RcDouble
--		(which are disjoint) ie. x86, x86_64 and ppc
--
--	BL 2007/09
--	Doing a nice fold over the UniqSet makes trivColorable use
--	32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
--
-- 	The number of allocatable regs is hard coded here so we can do a fast
-- 		comparision in trivColorable. 
--
-- 	It's ok if these numbers are _less_ than the actual number of free regs, 
--		but they can't be more or the register conflict graph won't color.
--
-- 	If the graph doesn't color then the allocator will panic, but it won't 
--		generate bad object code or anything nasty like that.
--
-- 	There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
-- 	is too slow for us here.
--
-- 	Look at includes/MachRegs.h to get these numbers.
--

#if i386_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(6))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(0))


#elif x86_64_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(2))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(0))


#elif powerpc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(26))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(0))


#elif sparc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(11))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(22))


#else
#error ToDo: choose which trivColorable function to use for this architecture.
#endif



-- Disjoint registers ----------------------------------------------------------
--	
--	The definition has been unfolded into individual cases for speed.
-- 	Each architecture has a different register setup, so we use a
--	different regSqueeze function for each.
--
accSqueeze 
	:: FastInt 
	-> FastInt
	-> (reg -> FastInt) 
	-> UniqFM reg
	-> FastInt

accSqueeze count maxCount squeeze ufm 
 = case ufm of
 	NodeUFM _ _ left right
	 -> case accSqueeze count maxCount squeeze right of
	 	count' -> case count' >=# maxCount of
				False -> accSqueeze count' maxCount squeeze left
				True  -> count'
				
	LeafUFM _ reg	-> count +# squeeze reg
 	EmptyUFM	-> count


trivColorable
	:: (RegClass -> VirtualReg -> FastInt)
	-> (RegClass -> RealReg    -> FastInt)
	-> Triv VirtualReg RegClass RealReg

trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
	| count2	<- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER 
				(virtualRegSqueeze RcInteger)
				conflicts
				
	, count3	<- accSqueeze  count2    ALLOCATABLE_REGS_INTEGER
				(realRegSqueeze   RcInteger)
				exclusions

	= count3 <# ALLOCATABLE_REGS_INTEGER

trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
	| count2	<- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT
				(virtualRegSqueeze RcFloat)
				conflicts
				
	, count3	<- accSqueeze  count2    ALLOCATABLE_REGS_FLOAT
				(realRegSqueeze   RcFloat)
				exclusions

	= count3 <# ALLOCATABLE_REGS_FLOAT

trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
	| count2	<- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE
				(virtualRegSqueeze RcDouble)
				conflicts
				
	, count3	<- accSqueeze  count2    ALLOCATABLE_REGS_DOUBLE
				(realRegSqueeze   RcDouble)
				exclusions

	= count3 <# ALLOCATABLE_REGS_DOUBLE


-- Specification Code ----------------------------------------------------------
--
--	The trivColorable function for each particular architecture should
--	implement the following function, but faster.
--

{-
trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
trivColorable classN conflicts exclusions
 = let

	acc :: Reg -> (Int, Int) -> (Int, Int)
	acc r (cd, cf)	
	 = case regClass r of
		RcInteger	-> (cd+1, cf)
		RcFloat		-> (cd,   cf+1)
		_		-> panic "Regs.trivColorable: reg class not handled"

	tmp			= foldUniqSet acc (0, 0) conflicts
	(countInt,  countFloat)	= foldUniqSet acc tmp    exclusions

	squeese		= worst countInt   classN RcInteger
			+ worst countFloat classN RcFloat

   in	squeese < allocatableRegsInClass classN

-- | Worst case displacement
--	node N of classN has n neighbors of class C.
--
--	We currently only have RcInteger and RcDouble, which don't conflict at all.
--	This is a bit boring compared to what's in RegArchX86.
--
worst :: Int -> RegClass -> RegClass -> Int
worst n classN classC
 = case classN of
 	RcInteger
	 -> case classC of
	 	RcInteger	-> min n (allocatableRegsInClass RcInteger)
		RcFloat		-> 0
		
	RcDouble
	 -> case classC of
	 	RcFloat		-> min n (allocatableRegsInClass RcFloat)
		RcInteger	-> 0

-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
-- register allocator to attempt to map VRegs to.
allocatableRegs :: [RegNo]
allocatableRegs
   = let isFree i = isFastTrue (freeReg i)
     in  filter isFree allMachRegNos


-- | The number of regs in each class.
--	We go via top level CAFs to ensure that we're not recomputing
--	the length of these lists each time the fn is called.
allocatableRegsInClass :: RegClass -> Int
allocatableRegsInClass cls
 = case cls of
 	RcInteger	-> allocatableRegsInteger
	RcFloat		-> allocatableRegsDouble

allocatableRegsInteger :: Int
allocatableRegsInteger	
	= length $ filter (\r -> regClass r == RcInteger) 
		 $ map RealReg allocatableRegs

allocatableRegsFloat :: Int
allocatableRegsFloat
	= length $ filter (\r -> regClass r == RcFloat 
		 $ map RealReg allocatableRegs
-}