summaryrefslogtreecommitdiff
path: root/i965_drv_video/shaders/h264/ildb/AVC_ILDB_Root_Y.asm
blob: 9b9b1a8952a3d701248a25cd922d25a0f8c235b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
 * Copyright © <2010>, Intel Corporation.
 *
 * This program is licensed under the terms and conditions of the
 * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
 * http://www.opensource.org/licenses/eclipse-1.0.php.
 *
 */
// Kernel name: AVC_ILDB_Root_Y.asm
//
//  Root kernel serves as a scheduler for child threads
//
//  $Revision: 1 $
//  $Date: 10/19/06 5:06p $
//

// ----------------------------------------------------
//  AVC_ILDB_ROOT_Y
// ----------------------------------------------------
#define AVC_ILDB

.kernel AVC_ILDB_ROOT_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_ROOT_Y):
#endif

#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"

/////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG) 

// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  
// Will remove it for production release.

mov (8) m1:ud 		0x11111111:ud
mov (8) m2:ud 		0x22222222:ud 
mov (8) m3:ud 		0x33333333:ud
mov (8) m4:ud 		0x44444444:ud 

mov (1)	Temp1_W:w	0:w

ILDB_LABEL(ILDB_INIT_URB_Y):
//mul (1)	Temp2_W:w				Temp1_W:w		4:w		// URBOffset
//shl (1) URBWriteMsgDescLow:uw 	Temp2_W:w		4:w		// Msg descriptor: URB write dest offset (9:4)
//mov (1) URBWriteMsgDescHigh:uw 	0x0650:uw				// Msg descriptor: URB write 5 MRFs (m0 - m4)

//mul (1)	URBOffset:uw			Temp1_W:uw		4:w		// Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)
mul (1)	URBOffset:uw			Temp1_W:uw		2:w		// Each thread uses 2 URB entries (1 r0 + 1 inline)
mov (1) URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud			// Msg descriptor: URB write msg length = 3
#include "writeURB.asm"

add		(1)		Temp1_W:w	Temp1_W:w	1:w				// Increase block count
cmp.l.f0.0 (1) 	null		Temp1_W:w	MBsCntY:w		// Check the block count limit
(f0.0) jmpi		ILDB_LABEL(ILDB_INIT_URB_Y)							// Loop back

mov		(1)		EntrySignature:w			0xFFF0:w

#endif
/////////////////////////////////////////////////////////////////////////////////////


// Set global variable
mov (32) 	ChildParam:uw			0:uw								// Reset local variables, 2 GRFs
//mul	(1)	 	TotalBlocks:w			MBsCntX:w		MBsCntY:w			// Total # of blocks
//add	(1)	 	GatewayApertureE:w		MBsCntY:w 		GatewayApertureB:w	// Aperture End = aperture Head + BlockCntY

// 4 URB entries for Y:
// Entry 0 - Child thread R0Hdr
// Entry 1 - input parameter to child kernel (child r1)
// Entry 2 - Prev MB data Y 4x16, col 1 and col 0
// Entry 3 - Prev MB data Y 4x16, col 3 and col 2

#undef 		URB_ENTRIES_PER_MB
#define 	URB_ENTRIES_PER_MB		4

// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
mov (1) 	URB_EntriesPerMB_2:w		URB_ENTRIES_PER_MB-1:w
shl (1) 	URB_EntriesPerMB_2:w		URB_EntriesPerMB_2:w	10:w

shr (1)		ThreadLimit:w		MaxThreads:w	1:w			// Initial luma thread limit to 50%
mul	(1)	 	TotalBlocks:w		MBsCntX:w		MBsCntY:w	// MBs to be processed count down from TotalBlocks

//***** Init CT_R0Hdr fields that are common to all threads *************************
mov (8)		CT_R0Hdr.0:ud		r0.0<8;8,1>:ud				// Init to root R0 header
mov (1) 	CT_R0Hdr.7:ud		r0.6:ud						// Copy Parent Thread Cnt; JJ did the change on 06/20/2006
mov (1) 	CT_R0Hdr.31:ub		0:w							// Reset the highest byte
mov (1) 	CT_R0Hdr.3:ud 		0x00000000	 
mov (1) 	CT_R0Hdr.6:uw 		sr0.0:uw					// sr0.0: state reg contains general thread states, e.g. EUID/TID.

//***** Init ChildParam fields that are common to all threads ***********************
mov (8) 	ChildParam<1>:ud	RootParam<8;8,1>:ud		// Copy all root parameters
mov (4)		CurCol<1>:w			0:w						// Reset CurCol, CurRow, 
add	(2)		LastCol<1>:w		MBsCntX<2;2,1>:w		-1:w	// Get LastCol and LastRow
	
mov (1) 	URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud

//===================================================================================

#include "AVC_ILDB_OpenGateway.asm"		// Open gateway for receiving notification 

#if defined(DEV_CL)	
	mov	(1)		URBOffset:uw		240:uw	// Use chroma URB offset to spawn chroma root
#else
	mov	(1)		URBOffset:uw		320:uw	// Use chroma URB offset to spawn chroma root
#endif

#include "AVC_ILDB_SpawnChromaRoot.asm"	// Spawn chroma root

mov	(1)		URBOffset:uw		0:uw	// Use luma URB offset to spawn luma child 
mov	(1)		ChildThreadsID:uw	2:uw	// Starting ChildThreadsID for luma child threads

#include "AVC_ILDB_Dep_Check.asm"  		// Check dependency and spawn all luma child threads in parallel with chroma root


// Wait for UV root thread to finish
ILDB_LABEL(WAIT_FOR_UV):
cmp.l.f0.0 (1) null:w	ThreadLimit:w		MaxThreads:w
(f0.0) 	jmpi 	ILDB_LABEL(WAIT_FOR_UV)


#include "AVC_ILDB_CloseGateway.asm"	// Close root thread gateway 

END_THREAD								// End of root thread

#if !defined(COMBINED_KERNEL)		// For standalone kernel only
.end_code

.end_kernel
#endif