summaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/tighten.go
blob: 85b6a84cc3f426c905160af30fe6622a360fc1c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ssa

import "cmd/compile/internal/base"

// tighten moves Values closer to the Blocks in which they are used.
// This can reduce the amount of register spilling required,
// if it doesn't also create more live values.
// A Value can be moved to any block that
// dominates all blocks in which it is used.
func tighten(f *Func) {
	if base.Flag.N != 0 && len(f.Blocks) < 10000 {
		// Skip the optimization in -N mode, except for huge functions.
		// Too many values live across blocks can cause pathological
		// behavior in the register allocator (see issue 52180).
		return
	}

	canMove := f.Cache.allocBoolSlice(f.NumValues())
	defer f.Cache.freeBoolSlice(canMove)

	// Compute the memory states of each block.
	startMem := f.Cache.allocValueSlice(f.NumBlocks())
	defer f.Cache.freeValueSlice(startMem)
	endMem := f.Cache.allocValueSlice(f.NumBlocks())
	defer f.Cache.freeValueSlice(endMem)
	memState(f, startMem, endMem)

	for _, b := range f.Blocks {
		for _, v := range b.Values {
			if v.Op.isLoweredGetClosurePtr() {
				// Must stay in the entry block.
				continue
			}
			switch v.Op {
			case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN:
				// Phis need to stay in their block.
				// Arg must stay in the entry block.
				// Tuple selectors must stay with the tuple generator.
				// SelectN is typically, ultimately, a register.
				continue
			}
			// Count arguments which will need a register.
			narg := 0
			for _, a := range v.Args {
				// SP and SB are special registers and have no effect on
				// the allocation of general-purpose registers.
				if a.needRegister() && a.Op != OpSB && a.Op != OpSP {
					narg++
				}
			}
			if narg >= 2 && !v.Type.IsFlags() {
				// Don't move values with more than one input, as that may
				// increase register pressure.
				// We make an exception for flags, as we want flag generators
				// moved next to uses (because we only have 1 flag register).
				continue
			}
			canMove[v.ID] = true
		}
	}

	// Build data structure for fast least-common-ancestor queries.
	lca := makeLCArange(f)

	// For each moveable value, record the block that dominates all uses found so far.
	target := f.Cache.allocBlockSlice(f.NumValues())
	defer f.Cache.freeBlockSlice(target)

	// Grab loop information.
	// We use this to make sure we don't tighten a value into a (deeper) loop.
	idom := f.Idom()
	loops := f.loopnest()
	loops.calculateDepths()

	changed := true
	for changed {
		changed = false

		// Reset target
		for i := range target {
			target[i] = nil
		}

		// Compute target locations (for moveable values only).
		// target location = the least common ancestor of all uses in the dominator tree.
		for _, b := range f.Blocks {
			for _, v := range b.Values {
				for i, a := range v.Args {
					if !canMove[a.ID] {
						continue
					}
					use := b
					if v.Op == OpPhi {
						use = b.Preds[i].b
					}
					if target[a.ID] == nil {
						target[a.ID] = use
					} else {
						target[a.ID] = lca.find(target[a.ID], use)
					}
				}
			}
			for _, c := range b.ControlValues() {
				if !canMove[c.ID] {
					continue
				}
				if target[c.ID] == nil {
					target[c.ID] = b
				} else {
					target[c.ID] = lca.find(target[c.ID], b)
				}
			}
		}

		// If the target location is inside a loop,
		// move the target location up to just before the loop head.
		for _, b := range f.Blocks {
			origloop := loops.b2l[b.ID]
			for _, v := range b.Values {
				t := target[v.ID]
				if t == nil {
					continue
				}
				targetloop := loops.b2l[t.ID]
				for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) {
					t = idom[targetloop.header.ID]
					target[v.ID] = t
					targetloop = loops.b2l[t.ID]
				}
			}
		}

		// Move values to target locations.
		for _, b := range f.Blocks {
			for i := 0; i < len(b.Values); i++ {
				v := b.Values[i]
				t := target[v.ID]
				if t == nil || t == b {
					// v is not moveable, or is already in correct place.
					continue
				}
				if mem := v.MemoryArg(); mem != nil {
					if startMem[t.ID] != mem {
						// We can't move a value with a memory arg unless the target block
						// has that memory arg as its starting memory.
						continue
					}
				}
				if f.pass.debug > 0 {
					b.Func.Warnl(v.Pos, "%v is moved", v.Op)
				}
				// Move v to the block which dominates its uses.
				t.Values = append(t.Values, v)
				v.Block = t
				last := len(b.Values) - 1
				b.Values[i] = b.Values[last]
				b.Values[last] = nil
				b.Values = b.Values[:last]
				changed = true
				i--
			}
		}
	}
}

// phiTighten moves constants closer to phi users.
// This pass avoids having lots of constants live for lots of the program.
// See issue 16407.
func phiTighten(f *Func) {
	for _, b := range f.Blocks {
		for _, v := range b.Values {
			if v.Op != OpPhi {
				continue
			}
			for i, a := range v.Args {
				if !a.rematerializeable() {
					continue // not a constant we can move around
				}
				if a.Block == b.Preds[i].b {
					continue // already in the right place
				}
				// Make a copy of a, put in predecessor block.
				v.SetArg(i, a.copyInto(b.Preds[i].b))
			}
		}
	}
}

// memState computes the memory state at the beginning and end of each block of
// the function. The memory state is represented by a value of mem type.
// The returned result is stored in startMem and endMem, and endMem is nil for
// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not
// suitable for infinite loop blocks that do not contain any mem operations.
// For example:
// b1:
//
//	(some values)
//
// plain -> b2
// b2: <- b1 b2
// Plain -> b2
//
// Algorithm introduction:
//  1. The start memory state of a block is InitMem, a Phi node of type mem or
//     an incoming memory value.
//  2. The start memory state of a block is consistent with the end memory state
//     of its parent nodes. If the start memory state of a block is a Phi value,
//     then the end memory state of its parent nodes is consistent with the
//     corresponding argument value of the Phi node.
//  3. The algorithm first obtains the memory state of some blocks in the tree
//     in the first step. Then floods the known memory state to other nodes in
//     the second step.
func memState(f *Func, startMem, endMem []*Value) {
	// This slice contains the set of blocks that have had their startMem set but this
	// startMem value has not yet been propagated to the endMem of its predecessors
	changed := make([]*Block, 0)
	// First step, init the memory state of some blocks.
	for _, b := range f.Blocks {
		for _, v := range b.Values {
			var mem *Value
			if v.Op == OpPhi {
				if v.Type.IsMemory() {
					mem = v
				}
			} else if v.Op == OpInitMem {
				mem = v // This is actually not needed.
			} else if a := v.MemoryArg(); a != nil && a.Block != b {
				// The only incoming memory value doesn't belong to this block.
				mem = a
			}
			if mem != nil {
				if old := startMem[b.ID]; old != nil {
					if old == mem {
						continue
					}
					f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem)
				}
				startMem[b.ID] = mem
				changed = append(changed, b)
			}
		}
	}

	// Second step, floods the known memory state of some blocks to others.
	for len(changed) != 0 {
		top := changed[0]
		changed = changed[1:]
		mem := startMem[top.ID]
		for i, p := range top.Preds {
			pb := p.b
			if endMem[pb.ID] != nil {
				continue
			}
			if mem.Op == OpPhi && mem.Block == top {
				endMem[pb.ID] = mem.Args[i]
			} else {
				endMem[pb.ID] = mem
			}
			if startMem[pb.ID] == nil {
				startMem[pb.ID] = endMem[pb.ID]
				changed = append(changed, pb)
			}
		}
	}
}