summaryrefslogtreecommitdiff
path: root/i965_drv_video/shaders/h264/mc/scoreboard.asm
blob: 6fb41cf0524319ed944470d6e7409a7a7e22a406 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/*
 * Dependency control scoreboard kernel
 * Copyright © <2010>, Intel Corporation.
 *
 * This program is licensed under the terms and conditions of the
 * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
 * http://www.opensource.org/licenses/eclipse-1.0.php.
 *
 */
// Kernel name: scoreboard.asm
//
// Dependency control scoreboard kernel
//
//  $Revision: 16 $
//  $Date: 10/18/06 4:10p $
//

// ----------------------------------------------------
//  Main: scoreboard
// ----------------------------------------------------
// ----------------------------------------------------
//  Scoreboard structure
// ----------------------------------------------------
//
//	1 DWORD per thread
//
//	Bit 31:	"Checking" thread, i.e. an intra MB that sends "check dependency" message
//	Bit 30: "Completed" thread. This bit set by an "update" message from intra/inter MB.
//	Bits 29:28:	Must set to 0
//	Bits 27:24:	EUID
//	Bits 23:18: Reserved
//	Bits 17:16: TID
//	Bits 15:8:	X offset of current MB
//	Bits 15:5:	Reserved
//	Bits 4:0: 5 bits of available neighbor MB flags

.kernel scoreboard
SCOREBOARD:

#ifdef _DEBUG
// WA for FULSIM so we'll know which kernel is being debugged
mov (1) acc0:ud 0xf0aa55a5:ud
#endif

#include "header.inc"
#include "scoreboard_header.inc"

//
//  Now, begin source code....
//

.code

#ifdef	AS_ENABLED
	and.z.f0.1	(1)	NULLREG	r0.2<0;1,0>:ud	TH_RES	// Is this a restarted thread previously interrupted?
	(f0.1) jmpi	(1)	Scoreboard_Init

	#include "scoreboard_restore_AS.asm"

	jmpi (1)	Scoreboard_OpenGW
Scoreboard_Init:
#endif	// End AS_ENABLED

// Scoreboard must be initialized to 0xc000ffff, meaning all "completed"
// And it also avoids message mis-handling for the first MB
    $for(0; <32; 2) {
	mov (16)	CMD_SB(%1)<1>	0xc000ffff:ud {Compr}
	}
#ifdef	DOUBLE_SB					// Scoreboard size needs to be doubled
    $for(32; <64; 2) {
	mov (16)	CMD_SB(%1)<1>	0xc000ffff:ud {Compr}
	}
#endif	// DOUBLE_SB

//----------------------------------------------------------
//	Open message gateway for the scoreboard thread
//
//	RegBase = r4 (0x04)
//	Gateway Size = 64 GRF registers (0x6)
//	Dispatch ID = r0.20:ub
//	Scoreboard Thread Key = 0
//----------------------------------------------------------
Scoreboard_OpenGW:
    mov (8)	MSGHDRY0<1>:ud	0x00000000:ud			// Initialize message header payload with 0

	// Send a message with register base RegBase=0x04(r4) and Gateway size = 0x6 = 64 GRF reg and Key = 0
	// 000 00000100 00000 00000 110 00000000 ==> 0000 0000 1000 0000 0000 0110 0000 0000
#ifdef	AS_ENABLED
	add (1) MSGHDRY0.5<1>:ud r0.20:ub	0x00800700:ud	// Allocate 128 GRFs for message gateway - for SIP to send notification MSG
#else
  #ifdef	DOUBLE_SB
	add (1) MSGHDRY0.5<1>:ud r0.20:ub	0x00800600:ud	// 64 GRF's for CTG-B
  #else
	add (1) MSGHDRY0.5<1>:ud r0.20:ub	0x00800500:ud	// 32 GRF's for CTG-A
  #endif	// DOUBLE_SB
#endif
	send (8)	NULLREG  MSGHDRY0	null:ud    MSG_GW	OGWMSGDSC

//------------------------------------------------------------------------
//	Send Thread Spawning Message to start dispatching macroblock threads
//
//------------------------------------------------------------------------
#ifdef	AS_ENABLED
	mov (8)	acc0<1>:ud	CMD_SB(31)<8;8,1>			// Ensure scoreboard data have been completely restored
#endif	// End AS_ENABLED
    mov (8)	MSGHDRY1<1>:ud		r0<8;8,1>:ud		// Initialize message header payload with R0
    mov (1)	MSGHDRY1.4<1>:ud	0x00000400:ud		// Dispatch URB length = 1

	send (8)	NULLREG  MSGHDRY1	null:ud    TS	TSMSGDSC

    mov (8)	MSGHDRY0<1>:ud		0x00000000:ud		// Initialize message header payload with 0

//------------------------------------------------------------------------
//	Scoreboard control data initialization
//------------------------------------------------------------------------
#ifdef	AS_ENABLED
	or	(1)	cr0.1:ud	cr0.1:ud	AS_INT_EN		// Enable interrupt
	(f0.1) jmpi	(1)	Scoreboard_State_Init	// Jump if not restarted thread

	// Restore scoreboard kernel control data to r1 - r3
    mov (1)	m4.1:ud	64:ud				// Starting r1
    mov (1)	m4.2:ud	0x0002001f:ud		// for 3 registers
    send (8)	r1.0<1>:ud	m4	null:ud	DWBRMSGDSC_SC+0x00030000+AS_SAVE	// Restore r1 - r3
	mov	(8)	a0.0<1>:uw	AR_SAVE<8;8,1>:uw				// Restore all address registers

// Check whether all MBs have been decoded
	cmp.e.f0.0 (1)	NULLREG	TotalMB<0;1,0>:w	0:w	// Set "Last MB" flag
	(-f0.0) jmpi (1)	Before_First_MB
    END_THREAD

// Check whether it is before the first MB
Before_First_MB:
	cmp.e.f0.0 (1)	NULLREG	AVAILFLAGD<1>:ud	0x08020401:ud	// in ACBD order
	(f0.0) jmpi (1)	Wavefront_Walk

Scoreboard_State_Init:
#endif	// End AS_ENABLED
	mov (2) WFLen_B<2>:w		HEIGHTINMB_1<0;1,0>:w
	mov (1)	AVAILFLAGD<1>:ud	0x08020401:ud	// in ACBD order
	mov	(1) CASE00PTR<1>:ud	Notify_MSG_IP-No_Message_IP:ud		// Inter kernel starts
	mov	(1) CASE10PTR<1>:ud	Dependency_Check_IP-No_Message_IP:ud	// Intra kernel starts
#ifdef	AS_ENABLED
	mov	(1) CASE11PTR<1>:ud	0:ud		// No message
#else
	mov	(1) CASE11PTR<1>:ud	MB_Loop_IP-No_Message_IP:ud		// No message
#endif	// End AS_ENABLED
	mov	(1) StartXD<1>:ud	0:ud
	mov	(1) NewWFOffsetD<1>:ud	0x01ffff00:ud

	mov (4) WFStart(0)<1>	0xffff:w
	mov (1) WFStart(0)<1>	0:w

	mov	(8)	a0.0<1>:uw	0x0:uw						// Initialize all pointers to 0

//------------------------------------------------------------------------
//	Scoreboard message handling loop
//------------------------------------------------------------------------
//
Scoreboard_Loop:
	// Calculate current wavefront length
	add.ge.f0.1 (16)	acc0<1>:w	StartX<0;1,0>:w	0:w		// Used for x>2*y check
	mac.g.f0.0 (16)	NULLREGW	WFLenY<0;1,0>:w	-2:w		// X - 2*Y > 0 ??
	(f0.0) mov (1)	WFLen<1>:w	WFLenY<0;1,0>:w				// Use smaller vertical wavefront length
	(-f0.0) asr.sat (1)	WFLen<1>:uw	StartX<0;1,0>:w	1:w		// Horizontal wavefront length is smaller

	// Initialize 5-MB group
#ifdef ONE_MB_WA
	mov (2) MBINDEX(0)<1>		WFStart(0)<2;2,1>
	(f0.1) add (4) MBINDEX(0,2)<1>		WFStart(0,1)<4;4,1>	-1:w
	(-f0.1) add (4) MBINDEX(0,2)<1>		WFStart(0,0)<4;4,1>	-1:w
	(-f0.1) mov (1) StartX<1>:w		0:w					// WA for 1-MB wide pictures
#else
	mov (2) MBINDEX(0)<1>		WFStart(0)<2;2,1>			{NoDDClr}
	add (4) MBINDEX(0,2)<1>		WFStart(0,1)<4;4,1>	-1:w	{NoDDChk}
#endif

	// Update WFStart
	mov (8)	acc0<1>:w	WFStart(0)<0;1,0>					// Move WFStart(0) to acc0 to remove dependency later
	mov (4)	WFStart(0,1)<1>	WFStart(0)<4;4,1>	{NoDDClr}	// Shift WFStart(0:2) to WFStart(1:3)
	add (1)	WFStart(0)<1>	acc0.0<0;1,0>:w		WFLen<0;1,0>:w	{NoDDChk}	// WFStart(0) = WFStart(0) + WFLen

	mul (8)	MBINDEX(0)<1>	MBINDEX(0)<8;8,1>	4:w	// Adjust MB order # to be DWORD aligned
	and (1)	DEPPTR<1>:uw	acc0<0;1,0>:w	SB_MASK*4:uw {NoDDClr}	// Wrap around scoreboard entries for current MB
	and (4)	DEPPTRL<1>:uw	acc0.1<4;4,1>:w	SB_MASK*4:uw {NoDDChk}	// Wrap around scoreboard entries for neighbor MBs

Wavefront_Walk:
	wait	n0:ud
//	Check for combined "checking" or "completed" threads in forwarded message
//	2 MSB of scoreboard message indicate:
//	0b00 = "inter start" message
//	0b10 = "intra start" message
//	0b11 = "No Message" or "inter complete" message
//	0b01 = Reserved (should never occur)
//
MB_Loop:
	shr	(1)	PMSGSEL<1>:uw	r[CMDPTR,CMD_SB_REG_OFF*GRFWIB+2]<0;1,0>:uw	12:w					// DWORD aligned pointer to message handler
	and.nz.f0.1 (4) NULLREG	r[CMDPTR,CMD_SB_REG_OFF*GRFWIB]<0;1,0>:ub	AVAILFLAG<4;4,1>:ub		// f0.1 4 LSB will have the available flags in ACBD order
	mov (1) MSGHDRY0.4<1>:ud	r[CMDPTR,CMD_SB_REG_OFF*GRFWIB]<0;1,0>:ud		// Copy MB thread info from scoreboard
	jmpi (1)	r[PMSGSEL, INLINE_REG_OFF*GRFWIB+16]<0;1,0>:d

//	Now determine whether this is "inter done" or "no message"
//	through checking debug_counter
//
No_Message:
#ifdef	AS_ENABLED
	cmp.z.f0.1 (1)	NULLREG	n0:ud	0	// Are all messages handled?
	and.z.f0.0 (1)	NULLREG	cr0.1:ud	AS_INT	// Poll interrupt bit
	(-f0.1) jmpi (1)	MB_Loop			// Continue polling the remaining message from current thread

// All messages have been handled
	(f0.0) jmpi (1) Wavefront_Walk		// No interrupt occurs. Wait for next one

// Interrupt has been detected
// Save all contents and terminate the scoreboard
//
	#include "scoreboard_save_AS.asm"

	// Save scoreboard control data as well
	//
	mov (8) AR_SAVE<1>:uw	a0.0<8;8,1>:uw		// All address registers needs to be saved
    mov (1)	MSGHDR.1:ud		64:ud
    mov (1)	MSGHDR.2:ud		0x0002001f:ud	// for 3 registers
	$for(0; <3; 1) {
	mov (8)	MSGPAYLOADD(%1)<1>	CMD_SB(%1-3)REGION(8,1)
	}
    send (8)	NULLREG	MSGHDR	null:ud	DWBWMSGDSC+0x00300000+AS_SAVE	// Save r1 - r3

	send (8) NULLREG MSGHDR r0:ud EOTMSGDSC+TH_INT	// Terminate with "Thread Interrupted" bit set
#endif	// End AS_ENABLED

Dependency_Check:
//	Current thread is "checking" but not "completed" (0b10 case).
//	Check for dependency clear using all availability bits
//
	(f0.1) and.z.f0.1 (4)	NULLREG	r[DEPPTRL,CMD_SB_REG_OFF*GRFWIB+3]<1,0>:ub	DONEFLAG:uw	// f0.1 4 LSB contains dependency clear
	(f0.1.any4h) jmpi (1)	Dependency_Check		// Dependency not clear, keep polling..

//	"Checking" thread and dependency cleared, send a message to let the thread go
//
Notify_MSG:
	send (8)	NULLREG  MSGHDRY0	null:ud    MSG_GW	FWDMSGDSC+NOTIFYMSG

//	Current macroblock has been serviced. Update to next macroblock in special zig-zag order
//
Update_CurMB:
#if 0
	add.ge.f0.0 (1)	WFLen<1>:w	WFLen<0;1,0>:w	-1:w 			// Set "End of wavefront" flag
	add (1)	TotalMB<1>:w	TotalMB<0;1,0>:w	-1:w 			// Decrement "TotalMB"
#else
	add.ge.f0.0 (2)	TotalMB<2>:w	TotalMB<4;2,2>:w	-1:w 	// Set "End of wavefront" flag and decrement "TotalMB"
#endif
	add (8)	MBINDEX(0)<1>	MBINDEX(0)<8;8,1>	4:w				// Increment MB indices
	and (1)	DEPPTR<1>:uw	acc0<0;1,0>:w	SB_MASK*4:uw {NoDDClr}	// Wrap around 256 scoreboard entries for current MB
	and (4)	DEPPTRL<1>:uw	acc0.1<4;4,1>:w	SB_MASK*4:uw {NoDDChk}	// Wrap around 256 scoreboard entries for neighbor MBs
	cmp.e.f0.1 (16)	NULLREGW  StartX<0;1,0>:uw	WIDTHINMB_1<0;1,0>:uw	// Set "on picture right boundary" flag
#if 0
	(f0.0) jmpi (1) Wavefront_Walk			// Continue wavefront walking
#else
	(f0.0.all2h) jmpi (1) Wavefront_Walk	// Continue wavefront walking
#endif

//	Start new wavefront
//
	cmp.e.f0.0 (1)	NULLREG	TotalMB<0;1,0>:w		0:w	// Set "Last MB" flag
	(f0.1) add (4)	WFLen<1>:w	WFLen<4;4,1>:w		NewWFOffset<4;4,1>:b
	(f0.1) add (4)	WFStart(0)<1>	WFStart(0)<4;4,1>	1:w
	(-f0.1) add (1)	StartX<1>:w		StartX<0;1,0>:w	1:w		// Move to right MB
	(-f0.1) add (1)	WFStart(0)<1>	WFStart(0)<0;1,0>	1:w

	(-f0.0) jmpi (1)	Scoreboard_Loop				// Not last MB, start new wavefront walking

// All MBs have decoded. Terminate the thread now
//
    END_THREAD

#if !defined(COMBINED_KERNEL)		// For standalone kernel only
.end_code

.end_kernel
#endif

// End of scoreboard