summaryrefslogtreecommitdiff
path: root/i965_drv_video/shaders/post_processing/Common/common.inc
blob: a0a66a0084533cd2137801ee438296d458a5c523 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
/*
 * All Video Processing kernels 
 * Copyright © <2010>, Intel Corporation.
 *
 * This program is licensed under the terms and conditions of the
 * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
 * http://www.opensource.org/licenses/eclipse-1.0.php.
 *
 */

#ifndef COMMON_INC
#define COMMON_INC

// Module name: common.inc
//
// Common header file for all Video-Processing kernels
//

.default_execution_size (16)
.default_register_type  :ub

.reg_count_total        80
.reg_count_payload      4


//========== Common constants ==========

// Bit position constants 
#define BIT0    0x01
#define BIT1    0x02
#define BIT2    0x04
#define BIT3    0x08
#define BIT4    0x10
#define BIT5    0x20
#define BIT6    0x40
#define BIT7    0x80
#define BIT8    0x0100
#define BIT9    0x0200
#define BIT10   0x0400
#define BIT11   0x0800
#define BIT12   0x1000
#define BIT13   0x2000
#define BIT14   0x4000
#define BIT15   0x8000
#define BIT16   0x00010000
#define BIT17   0x00020000
#define BIT18   0x00040000
#define BIT19   0x00080000
#define BIT20   0x00100000
#define BIT21   0x00200000
#define BIT22   0x00400000
#define BIT23   0x00800000
#define BIT24   0x01000000
#define BIT25   0x02000000
#define BIT26   0x04000000
#define BIT27   0x08000000
#define BIT28   0x10000000
#define BIT29   0x20000000
#define BIT30   0x40000000
#define BIT31   0x80000000

#define nGRFWIB             32      // GRF register width in byte
#define nGRFWIW             16      // GRF register width in word
#define nGRFWID             8       // GRF register width in dword

#define nTOP_FIELD          0
#define nBOTTOM_FIELD       1

#define nPREVIOUS_FRAME     0       // Previous frame
#define nCURRENT_FRAME      1       // Current frame
#define nNEXT_FRAME         2       // Next frame

#ifdef GT
// GT DI Kernel
#else // ILK
// ILK DI Kernel
#endif

//===================================

//========== Macros ==========
#define REGION(Width,HStride) <Width*HStride;Width,HStride> // Region definition when ExecSize = Width

#define RegFile(a) a
#define REG(r,n) _REG(RegFile(r),n)
#define _REG(r,n) __REG(r,n)
#define __REG(r,n) r##n.0
#define REG2(r,n,s) _REG2(RegFile(r),n,s)
#define _REG2(r,n,s) __REG2(r,n,s)
#define __REG2(r,n,s) r##n.##s

#define dNULLREG     null<1>:d
#define wNULLREG     null<1>:w
    
#define KERNEL_ID(kernel_ID)    mov NULLREG kernel_ID:ud


#define NODDCLR 			
#define NODDCLR_NODDCHK 	
#define NODDCHK			    

//#define NODDCLR 			{ NoDDClr }
//#define NODDCLR_NODDCHK 	{ NoDDClr, NoDDChk }
//#define NODDCHK				{ NoDDChk } 


//========== Defines ====================


//========== GRF partition ==========
// r0 header            :   r0          (1 GRF)
// Static parameters    :   r1 - r5     (5 GRFS)
// Inline parameters    :   r6 - r7     (2 GRFs)
// MSGSRC               :   r9          (1 GRF)
// Top IO region        :   r10 - r33   (24 GRFS 8 for each component Y,U,V 16X8:w)
// Free space           :   r34 - r55   (22 GRFS)
// Bottom IO region     :   r56 - r79   (24 GRFS 8 for each component Y,U,V 16X8:w) 
//===================================


//========== Static Parameters ==========
// r1
#define fPROCAMP_C0             r1.0    // DWORD 0, Procamp constant C0 in :f
#define wPROCAMP_C0             r1.0    // DWORD 0, Procamp constant C0 in :w
#define NUMBER_0002							r1.1		// DWORD 0, 0x0002 used in procamp for GT
#define udCP_MessageFormat      r1.0    // DWORD 0, bits 2:3 of DWORD. (CE)
#define udCP_StatePointer       r1.0    // DWORD 0, bits 31:5 of DWORD.(CE)

#define ubSRC_CF_OFFSET         r1.4    // DWORD 1, byte 0-2. SRC packed color format YUV offset in :ub

#define ubDEST_RGB_FORMAT        r1.8    // DWORD 2, byte 0. Dest RGB color format (0:ARGB FF:XRGB)
#define ubDEST_CF_OFFSET        r1.8    // DWORD 2, byte 0-2. Dest packed color format YUV offset in :ub

#define fPROCAMP_C1             r1.3    // DWORD 3, Procamp constant C1 in :f   
#define wPROCAMP_C1             r1.6    // DWORD 3, Procamp constant C1 in :w   
#define NUMBER_0100							r1.7		// DWORD 3, 0x0100 used in procamp for GT

#define fPROCAMP_C2             r1.4    // DWORD 4, Procamp constant C2 in :f
#define wPROCAMP_C2             r1.8    // DWORD 4, Procamp constant C2 in :w

#define uwSPITCH_DIV2           r1.10   // DWORD 5, byte 0-1. statistics surface pitch divided by 2

#define fVIDEO_STEP_Y           r1.6    // DWORD 6, :f, AVS normalized reciprocal of Y Scaling factor
#define ubSTMM_SHIFT            r1.24   // DWORD 6, byte 0. Amount of right shift for the DI blending equation
#define ubSTMM_MIN              r1.25   // DWORD 6, byte 1. Min STMM for DI blending equation
#define ubSTMM_MAX              r1.26   // DWORD 6, byte 2. Max STMM for DI blending equation
#define ubTFLD_FIRST            r1.27   // DWORD 6, byte 3. Field parity order

#define fPROCAMP_C5             r1.7    // DWORD 7, Procamp constant C3 in :f
#define wPROCAMP_C5             r1.14   // DWORD 7, Procamp constant C3 in :w

// r2
#define fPROCAMP_C3             r2.0    // DWORD 0, Procamp constant C4 in :f
#define wPROCAMP_C3             r2.0    // DWORD 0, Procamp constant C4 in :w
                    
#define fCSC_C5					r2.2	// DWORD 2. WG+CSC constant C5
#define wCSC_C5					r2.4	// DWORD 2. WG+CSC constant C5

#define fPROCAMP_C4             r2.3    // DWORD 3, Procamp constant C5 in :f
#define wPROCAMP_C4             r2.6    // DWORD 3, Procamp constant C5 in :w

#define fCSC_C8					r2.4	// DWORD 4. WG+CSC constant C8
#define wCSC_C8					r2.8	// DWORD 4. WG+CSC constant C8
#define fCSC_C9					r2.7	// DWORD 7. WG+CSC constant C9
#define wCSC_C9					r2.14	// DWORD 7. WG+CSC constant C9

// r3
#define fCSC_C0					r3.0	// DWORD 0. WG+CSC constant C0
#define wCSC_C0					r3.0	// DWORD 0. WG+CSC constant C0

#define fSCALING_STEP_RATIO     r3.1    // DWORD 1, = Alpha_X_Scaling_Step / Video_X_scaling_Step :f (blending)
#define fALPHA_STEP_X           r3.1    // DWORD 1, = 1/Scale X, 0.5 = 2x, in :f (blending)

#define fALPHA_STEP_Y           r3.2    // DWORD 2, = 1/Scale Y, in :f

#define fCSC_C4					r3.3	// DWORD 3. WG+CSC constant C4
#define wCSC_C4					r3.6	// DWORD 3. WG+CSC constant C4
#define fCSC_C1					r3.4	// DWORD 4. WG+CSC constant C1
#define wCSC_C1					r3.8	// DWORD 4. WG+CSC constant C1

#define wSRC_H_ORI_OFFSET       r3.10   // DWORD 5, bytes 0,1 :w    
#define wSRC_V_ORI_OFFSET       r3.11   // DWORD 5, bytes 2,3 :w

#define dCOLOR_PIXEL            r3.6    // DWORD 6. Color pixel for Colorfill

#define fCSC_C2					r3.6	// DWORD 6. WG+CSC constant C2
#define wCSC_C2					r3.12	// DWORD 6. WG+CSC constant C2
#define fCSC_C3					r3.7	// DWORD 7. WG+CSC constant C3
#define wCSC_C3					r3.14	// DWORD 7. WG+CSC constant C3

// r4
#define fCSC_C6					r4.0	// DWORD 0. WG+CSC constant C6
#define wCSC_C6					r4.0	// DWORD 0. WG+CSC constant C6

#define wFRAME_ENDX             r4.2    // DWORD 1, word 0. Horizontal end = Origin+Width (in pixels)(for multiple blocks)
#define wNUM_BLKS               r4.3    // DWORD 1, word 1. Number of blocks to process (for multiple blocks)

#define wCOPY_ORIX              r4.5    // DWORD 2, word 1. A copy of X origin (for multiple blocks)
#define uwNLAS_ENABLE           r4.4    // DWORD 2, bit 15, NLAS enble bit

#define fCSC_C7					r4.3	// DWORD 3. WG+CSC constant C7
#define wCSC_C7					r4.6	// DWORD 3. WG+CSC constant C7
#define fCSC_C10				r4.4	// DWORD 4. WG+CSC constant C10
#define wCSC_C10				r4.8	// DWORD 4. WG+CSC constant C10

#define fFRAME_VID_ORIX         r4.5    // DWORD 5, Frame horizontal origin normalized for scale kernel

#define fFRAME_ALPHA_ORIX       r4.6    // DWORD 6. Normalized alpha horiz origin for the frame

#define fCSC_C11				r4.7	// DWORD 7. WG+CSC constant C11
#define wCSC_C11				r4.14	// DWORD 7. WG+CSC constant C11

//========================================

//========== Inline parameters ===========
// r5
#define wORIX                   r5.0    // DWORD 0, byte 0-1. :w, Destination Block Horizontal Origin in pel
#define wORIY                   r5.1    // DWORD 0, byte 2-3. :w, Destination Block Vertical   Origin in pel

#define fSRC_VID_H_ORI          r5.1    // DWORD 1, :f, SRC Y horizontal origin normalized for scale kernel

#define fSRC_VID_V_ORI          r5.2    // DWORD 2, :f, SRC Y vertical origin normalized for scale kernel

#define fSRC_ALPHA_H_ORI        r5.3    // DWORD 3, :f, Normalized alpha horizontal origin

#define fSRC_ALPHA_V_ORI        r5.4    // DWORD 4, :f, Normalized alpha vertical origin

#define uwALPHA_MASK_X          r5.10   // DWORD 5, byte 0-1 :w, H. alpha mask
#define ubALPHA_MASK_Y          r5.22   // DWORD 5, byte 2.  :ub,V. alpha mask
#define ubBLK_CNT_X             r5.23   // DWORD 5, byte 3,  :ub, Horizontal Block Count per thread

#define udBLOCK_MASK            r5.6    // DWORD 6
#define uwBLOCK_MASK_H          r5.12   // DWORD 6, byte 0-1 :uw, Block horizontal mask used in non-DWord aligned kernels
#define ubBLOCK_MASK_V          r5.26   // DWORD 6, byte 2   :ub, Block vertical mask used in non-DWord aligned kernels
#define ubNUM_BLKS              r5.27   // DWORD 6, byte 3,  :ub, Total Block Count per thread

#define fVIDEO_STEP_X           r5.7    // DWORD 7. :f, AVS normalized reciprocal of X Scaling factor

// r6
#define fVIDEO_STEP_DELTA       r6.0    // DWORD 0. :f, AVS normalized delta between 2 adjacent scaling steps (used for non-linear scaling)


//====================== Binding table =========================================

#if defined(DNDI)
    // DNDI Surface Binding Table
    //#define nBI_SRC_CURR        0       // Current input frame surface
    //#define nBI_SRC_PRIV        1       // Denoised previous input frame surface
    //#define nBI_SRC_STAT        2       // Statistics input surface (STMM / Noise motion history)
    //#define nBI_DEST_1ST        3       // 1st deinterlaced output frame surface
//    #define nBI_DEST_YUV        3       // Dest frame YUV (for DN only)
    //#define nBI_DEST_Y          3       // Dest frame Y (for DN only)
    //#define nBI_DEST_2ND        4       // 2nd deinterlaced output frame surface
    //#define nBI_DEST_DN_CURR    6       // Denoised current output frame surface
    //#define nBI_DEST_STAT       7       // Statistics output surface (STMM / Noise motion history)
//    #define nBI_DEST_U          8       // Dest frame U (for DN only)
//    #define nBI_DEST_V          9       // Dest frame V (for DN only)
//    #define nBI_SRC_U          10       // Src frame U (for DN only)
//    #define nBI_SRC_V          11       // Src frame V (for DN only)
//    #define nBI_SRC_UV         10       // Current src frame for UV
    
#endif

#if defined(INPUT_PL3)
    // PL3 Surface Binding Table
//    #define nBI_SRC_ALPHA       0       // Alpha
//    #define nBI_SRC_Y           1       // Current src frame
//    #define nBI_SRC_U           2       // Current src frame
//    #define nBI_SRC_V           3       // Current src frame
//    #define nBI_DEST_Y         10       // Dest frame
//    #define nBI_DEST_U         11       // Dest frame
//    #define nBI_DEST_V         12       // Dest frame
//    #define nBI_DEST_YUV        7       // Dest frame
//    #define nBI_DEST_RGB        7       // same num as BI_DEST_YUV, never used at the same time
#endif

#if defined(INPUT_PL2)
    // PL2 Surface Binding Table
//    #define nBI_SRC_ALPHA       0       // Alpha
//    #define nBI_SRC_Y           1       // Current src frame for Y + offseted UV
//    #define nBI_SRC_YUV         1       // Current src frame for YUV in case of NV12_AVS
//    #define nBI_SRC_UV          2       // Current src frame for UV
//    #define nBI_DEST_YUV        7       // Current dest frame for Y + offseted UV
//    #define nBI_DEST_RGB        7       // same num as BI_DEST_YUV, never used at the same time
//    #define nBI_DEST_Y         10       // Dest frame
//    #define nBI_DEST_U         11       // Dest frame
//    #define nBI_DEST_V         12       // Dest frame
#endif

#if defined(INPUT_PA) || defined(COLORFILL)
    // Packed Surface Binding Table 
//    #define nBI_SRC_ALPHA       0       // Alpha    
//    #define nBI_SRC_YUV         1       // Current src frame
//    #define nBI_DEST_YUV        3       // Dest frame
//    #define nBI_DEST_RGB        3       // same num as BI_DEST_YUV, never used at the same time
#endif


//supper binding table
#define nBI_ALPHA_SRC                   0
#define nBI_CURRENT_SRC_YUV             1 
#define nBI_FIELD_COPY_SRC_1_YUV        1 
#define nBI_CURRENT_SRC_Y               1 
#define nBI_FIELD_COPY_SRC_1_Y          1
#define nBI_CURRENT_SRC_RGB             1  
#define nBI_CURRENT_SRC_UV              2 
#define nBI_FIELD_COPY_SRC_1_UV         2 
#define nBI_CURRENT_SRC_U               2 
#define nBI_FIELD_COPY_SRC_1_U          2 
#define nBI_CURRENT_SRC_V               3
#define nBI_FIELD_COPY_SRC_1_V          3 
#define nBI_TEMPORAL_REFERENCE_YUV      4 
#define nBI_FIELD_COPY_SRC_2_YUV        4 
#define nBI_TEMPORAL_REFERENCE_Y        4 
#define nBI_FIELD_COPY_SRC_2_Y          4 
#define nBI_CURRENT_SRC_YUV_HW_DI       4 
#define nBI_TEMPORAL_REFERENCE_UV       5 
#define nBI_FIELD_COPY_SRC_2_UV         5 
#define nBI_TEMPORAL_REFERENCE_U        5 
#define nBI_FIELD_COPY_SRC_2_U          5 
#define nBI_DENOISED_PREV_HW_DI         5 
#define nBI_TEMPORAL_REFERENCE_V        6 
#define nBI_FIELD_COPY_SRC_2_V          6 
#define nBI_STMM_HISTORY                6 
#define nBI_DESTINATION_YUV             7
#define nBI_DESTINATION_RGB             7
#define nBI_DESTINATION_Y               7
#define nBI_DESTINATION_UV              8
#define nBI_DESTINATION_U               8
#define nBI_DESTINATION_V               9
#define nBI_DESTINATION_1_YUV           10
#define nBI_DESTINATION_1_Y             10
#define nBI_DESTINATION_1_UV            11
#define nBI_DESTINATION_1_U             11
#define nBI_DESTINATION_1_V             12
#define nBI_DESTINATION_2_YUV           13
#define nBI_DESTINATION_2_Y             13
#define nBI_DESTINATION_2_UV            14
#define nBI_DESTINATION_2_U             14
#define nBI_DESTINATION_2_V             15
#define nBI_STMM_HISTORY_OUTPUT         20
#define nBI_TEMPORAL_REFERENCE_YUV_PDI  21 
#define nBI_TEMPORAL_REFERENCE_Y_PDI    21 
#define nBI_TEMPORAL_REFERENCE_UV_PDI   22 
#define nBI_TEMPORAL_REFERENCE_U_PDI    22 
#define nBI_TEMPORAL_REFERENCE_V_PDI    23 
#define nBI_SUBVIDEO_YUV                26
#define nBI_SUBVIDEO_Y                  26
#define nBI_SUBVIDEO_UV                 27
#define nBI_SUBVIDEO_U                  27
#define nBI_SUBVIDEO_V                  28
#define nBI_SUBPICTURE_YUV              29
#define nBI_SUBPICTURE_P8               29
#define nBI_SUBPICTURE_A8               30
#define nBI_GRAPHIC_YUV                 31
#define nBI_GRAPHIC_P8                  31
#define nBI_GRAPHIC_A8                  32



//========== Planar Sampler State Table Index ==========
#define nSI_SRC_ALPHA           0x000   // Sampler State for Alpha

//Sampler Index for AVS/IEF messages
#define nSI_SRC_Y               0x400   // Sampler State for Y
#define nSI_SRC_U               0x800   // Sampler State for U
#define nSI_SRC_V               0xC00   // Sampler State for V
#define nSI_SRC_UV              0x800   // For NV12 surfaces
#define nSI_SRC_YUV             0x400   // For Packed surfaces  
#define nSI_SRC_RGB             0x400   // For ARGB surfaces

//Sampler Index for SIMD16 sampler messages
#define nSI_SRC_SIMD16_Y        0x100   // Sampler State for Y
#define nSI_SRC_SIMD16_U        0x200   // Sampler State for U
#define nSI_SRC_SIMD16_V        0x300   // Sampler State for V
#define nSI_SRC_SIMD16_UV       0x200   // For NV12 surfaces
#define nSI_SRC_SIMD16_YUV      0x100   // For Packed surfaces  
#define nSI_SRC_SIMD16_RGB      0x100   // For ARGB surfaces



// Common Registers
#define pCF_Y_OFFSET            a0.4    // Address register holding Y offset
#define pCF_U_OFFSET            a0.5    // Address register holding U offset
#define pCF_V_OFFSET            a0.6    // Address register holding V offset

// #define YUV_ORI             ORIX    // Used by writing packed data to dport


//================= Message Payload Header fields ==============================
#define IDP     r0.2:ud     // Interface Descriptor Pointer

//================= Common Message Descriptor  TBD add common load and save =====
// Message descriptor for dataport media write
#ifdef GT
        // Message Descriptors
                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)         
                //                  1 (header present 1) 0 0 1010 (media block write) 00000
                //                  00000000 (binding table index - set later)
                //                = 0x02094000
        #define nDPMW_MSGDSC      0x02094000
        #define nDPMR_MSGDSC      0x02098000  // Data Port Media Block Read Message Descriptor
        // TBD
#else // ILK
        // Message Descriptors
                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)         
                //                  1 (header present 1) 000 0 010 (media block write) 0000
                //                  00000000 (binding table index - set later)
                //                = 0x02082000
        #define nDPMW_MSGDSC      0x02082000  // Data Port Media Block Write Message Descriptor
        #define nDPMR_MSGDSC      0x0208A000  // Data Port Media Block Read Message Descriptor
#endif

// Message Length defines
#define nMSGLEN_1      0x02000000 // Message Length of 1 GRF for Send
#define nMSGLEN_2      0x04000000 // Message Length of 2 GRF for Send
#define nMSGLEN_4      0x08000000 // Message Length of 4 GRF for Send
#define nMSGLEN_8      0x10000000 // Message Length of 8 GRF for Send

// Response Length defines
#define nRESLEN_1      0x00100000 // Message Response Length of 1  GRF from Send
#define nRESLEN_2      0x00200000 // Message Response Length of 2  GRF from Send
#define nRESLEN_3      0x00300000 // Message Response Length of 3  GRF from Send
#define nRESLEN_4      0x00400000 // Message Response Length of 4  GRF from Send
#define nRESLEN_5      0x00500000 // Message Response Length of 5  GRF from Send
#define nRESLEN_8      0x00800000 // Message Response Length of 8  GRF from Send
#define nRESLEN_9      0x00900000 // Message Response Length of 9  GRF from Send
#define nRESLEN_11     0x00B00000 // Message Response Length of 11 GRF from Send
#define nRESLEN_12     0x00C00000 // Message Response Length of 12 GRF from Send
#define nRESLEN_16     0x01000000 // Message Response Length of 16 GRF from Send

// Block Width and Height Size defines
#define nBLOCK_WIDTH_4   0x00000003      // Block Width  4
#define nBLOCK_WIDTH_5   0x00000004      // Block Width  5
#define nBLOCK_WIDTH_8   0x00000007      // Block Width  8
#define nBLOCK_WIDTH_9   0x00000008      // Block Width  9
#define nBLOCK_WIDTH_12  0x0000000B      // Block Width  12
#define nBLOCK_WIDTH_16  0x0000000F      // Block Width  16
#define nBLOCK_WIDTH_20  0x00000013      // Block Width  20
#define nBLOCK_WIDTH_32  0x0000001F      // Block Width  32
#define nBLOCK_HEIGHT_1  0x00000000      // Block Height 1
#define nBLOCK_HEIGHT_2  0x00010000      // Block Height 2
#define nBLOCK_HEIGHT_4  0x00030000      // Block Height 4
#define nBLOCK_HEIGHT_5  0x00040000      // Block Height 5
#define nBLOCK_HEIGHT_8  0x00070000      // Block Height 8

// Extended Message Descriptors
#define nEXTENDED_MATH      0x1
#define nSMPL_ENGINE        0x2
#define nMESSAGE_GATEWAY    0x3
#define nDATAPORT_READ      0x4
#define nDATAPORT_WRITE     0x5
#define nURB                0x6
#define nTS_EOT             0x27    // with End-Of-Thread bit ON

// Common message descriptors:
#ifdef GT
	#define nEOT_MSGDSC       0x02000010  // End of Thread Message Descriptor    
	#define IF_NULL						null:uw null:uw null:uw 	//for different if instructions on ILK and Gen6
#else //ILK
	#define nEOT_MSGDSC       0x02000000  // End of Thread Message Descriptor        
	#define IF_NULL
#endif    


//===================== Math Function Control ===================================
#define mfcINV                  0x1     // reciprocal
#define mfcLOG                  0x2     // log
#define mfcEXP                  0x3     // exponent
#define mfcSQRT                 0x4     // square root
#define mfcRSQ                  0x5     // reciprocal square root
#define mfcSIN                  0x6     // sine (in radians)
#define mfcCOS                  0x7     // cosine (in radians)
#define mfcSINCOS               0x8     // dst0 = sin of src0, dst1 = cosine of src0 (in radians) - GT+ ONLY
#define mfcPOW                  0xA     // abs(src0) raised to the src1 power    
#define mfcINT_DIV_QR           0xB     // return quotient and remainder
#define mfcINT_DIV_Q            0xC     // return quotient
#define mfcINT_DIV_R            0xD     // return remainder


//=================== Message related registers =================================

#ifdef GT
        #define udDUMMY_NULL
#else   // _ILK
        #define udDUMMY_NULL    null:ud         // Used in send inst as src0
#endif

 
//----------- Message Registers ------------
#define mMSGHDR      m1     // Message Payload Header
#define mMSGHDRY     m1     // Message Payload Header register for Y data
#define mMSGHDRU     m2     // Message Payload Header register for U data
#define mMSGHDRV     m3     // Message Payload Header register for V data
#define mMSGHDRYA    m4     // Second Message Payload Header register for Y data
#define mMSGHDRH     m5     // Message Payload Header register for motion history
#define mMSGHDRY1    m1     // Message Payload Header register for first  Y data
#define mMSGHDRY2    m2     // Message Payload Header register for second Y data
#define mMSGHDRY3    m3     // Message Payload Header register for third  Y data
#define mMSGHDRY4    m4     // Message Payload Header register for fourth Y data
#define mMSGHDRY5    m5     // Message Payload Header register for fifth Y data
#define mMSGHDRY6    m6     // Message Payload Header register for sixth Y data
#define mMSGHDR_EOT  m15    // Dummy Message Register for EOT

#define rMSGSRC     r8      // Message source register
#define pMSGDSC     a0.0:ud // Message Descriptor register (type DWORD)

#define udMH_ORI    rMSGSRC.0   // Data Port Media Block R/W message header block offset
#define udMH_ORIX   rMSGSRC.0   // Data Port Media Block R/W message header X offset
#define udMH_ORIY   rMSGSRC.1   // Data Port Media Block R/W message header Y offset
#define udMH_SIZE   rMSGSRC.2   // Data Port Media Block R/W message header block width & height

//  M2 - M9 for message data payload
.declare    mubMSGPAYLOAD  Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
.declare    muwMSGPAYLOAD  Base=m2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw
.declare    mudMSGPAYLOAD  Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare    mfMSGPAYLOAD   Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=f

//=================== End of thread instruction ===========================
#ifdef GT
	#define END_THREAD          mov  (8) mMSGHDR_EOT<1>:ud    r0.0<8;8,1>:ud \n\
								send (1) null<1>:d mMSGHDR_EOT nTS_EOT nEOT_MSGDSC 
#else   // ILK				This should be changed to 1 instruction; I have tested it and it works - vK
	#define END_THREAD          mov  (8) mMSGHDR_EOT<1>:ud    r0.0<8;8,1>:ud \n\
                            	send (1) dNULLREG mMSGHDR_EOT udDUMMY_NULL  nTS_EOT nEOT_MSGDSC:ud
#endif


//=======================================================================
// Region declarations for SRC and DEST as TOP and BOT

// Common I/O regions
#define nREGION_1       1
#define nREGION_2       2

//*** These region base GRFs are fixed regardless planar/packed, and data alignment.
//*** Each kernel is responsible to select the correct region declaration below.
//*** YUV regions are not necessarily next to each other.
#define nTOP_Y          10      // r10 - r17  (8 GRFs)
#define nTOP_U          18      // r18 - r25 (8 GRFs)
#define nTOP_V          26      // r26 - r33 (8 GRFs)

#define nBOT_Y          56      // r56 - r63 (8 GRFs)
#define nBOT_U          64      // r64 - r71 (8 GRFs)
#define nBOT_V          72      // r72 - r79 (8 GRFs)

// Define temp space for any usages
#define nTEMP0          34
#define nTEMP1          35
#define nTEMP2          36
#define nTEMP3          37
#define nTEMP4          38
#define nTEMP5          39
#define nTEMP6          40
#define nTEMP7          41
#define nTEMP8          42
#define nTEMP10         44
#define nTEMP12         46
#define nTEMP14         48
#define nTEMP16         50
#define nTEMP17         51
#define nTEMP18         52

#define nTEMP24			58

// Common region 1
.declare ubTOP_Y        Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(16,1) DstRegion=<1> Type=ub
.declare ubTOP_U        Base=REG(r,nTOP_U) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
.declare ubTOP_V        Base=REG(r,nTOP_V) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
                        
.declare uwTOP_Y        Base=REG(r,nTOP_Y) ElementSize=2 SrcRegion=REGION(16,1) DstRegion=<1> Type=uw
.declare uwTOP_U        Base=REG(r,nTOP_U) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
.declare uwTOP_V        Base=REG(r,nTOP_V) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
.declare ub2TOP_Y       Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(16,2) DstRegion=<1> Type=ub
.declare ub2TOP_U       Base=REG(r,nTOP_U) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub
.declare ub2TOP_V       Base=REG(r,nTOP_V) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub

.declare ub4TOP_Y       Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
.declare ub4TOP_U       Base=REG(r,nTOP_U) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
.declare ub4TOP_V       Base=REG(r,nTOP_V) ElementSize=1 SrcRegion=REGION(8,4) Type=ub

.declare ubTOP_ARGB     Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub

// Used by "send" instruction
.declare udTOP_Y_IO     Base=REG(r,nTOP_Y) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare udTOP_U_IO     Base=REG(r,nTOP_U) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare udTOP_V_IO     Base=REG(r,nTOP_V) ElementSize=4 SrcRegion=REGION(8,1) Type=ud

// Common region 2
.declare ubBOT_Y        Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(16,1) DstRegion=<1> Type=ub
.declare ubBOT_U        Base=REG(r,nBOT_U) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
.declare ubBOT_V        Base=REG(r,nBOT_V) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
                        
.declare uwBOT_Y        Base=REG(r,nBOT_Y) ElementSize=2 SrcRegion=REGION(16,1) DstRegion=<1> Type=uw
.declare uwBOT_U        Base=REG(r,nBOT_U) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
.declare uwBOT_V        Base=REG(r,nBOT_V) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
.declare ub2BOT_Y       Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(16,2) DstRegion=<1> Type=ub
.declare ub2BOT_U       Base=REG(r,nBOT_U) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub
.declare ub2BOT_V       Base=REG(r,nBOT_V) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub

.declare ubBOT_ARGB     Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub

// Used by "send" instruction
.declare udBOT_Y_IO     Base=REG(r,nBOT_Y) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare udBOT_U_IO     Base=REG(r,nBOT_U) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare udBOT_V_IO     Base=REG(r,nBOT_V) ElementSize=4 SrcRegion=REGION(8,1) Type=ud

// End of common.inc

#endif    // COMMON_INC