Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5361 serge 1
/*
2
 *  Copyright 2000-2011 Intel Corporation All Rights Reserved
3
 *
4
 *  Licensed under the Apache License, Version 2.0 (the "License");
5
 *  you may not use this file except in compliance with the License.
6
 *  You may obtain a copy of the License at
7
 *
8
 *      http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 *  Unless required by applicable law or agreed to in writing, software
11
 *  distributed under the License is distributed on an "AS IS" BASIS,
12
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 *  See the License for the specific language governing permissions and
14
 *  limitations under the License.
15
 */
16
//   44    // Total instruction count
17
//    1    // Total kernel count
18
 
19
 
20
 
21
// Module name: common.inc
22
//
23
// Common header file for all Video-Processing kernels
24
//
25
 
26
.default_execution_size (16)
27
.default_register_type  :ub
28
 
29
.reg_count_total        128
30
.reg_count_payload      7
31
 
32
//========== Common constants ==========
33
 
34
 
35
//========== Macros ==========
36
 
37
 
38
//Fast Jump, For more details see "Set_Layer_N.asm"
39
 
40
 
41
//========== Defines ====================
42
 
43
//========== Static Parameters (Common To All) ==========
44
//r1
45
 
46
 
47
//r2
48
 
49
                                    //  e.g.            byte0   byte1  byte2
50
                                    // YUYV               0       1      3
51
                                    // YVYU               0       3      1
52
 
53
//Color Pipe (IECP) parameters
54
 
55
 
56
//ByteCopy
57
 
58
 
59
//r4
60
 
61
                                    //  e.g.              byte0           byte1           byte2
62
                                    // YUYV                 0               1               3
63
                                    // YVYU                 0               3               1
64
 
65
 
66
//========== Inline parameters (Common To All) ===========
67
 
68
 
69
//============== Binding Index Table===========
70
//Common between DNDI and DNUV
71
 
72
 
73
//================= Common Message Descriptor =====
74
// Message descriptor for thread spawning
75
// Message Descriptors
76
//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
77
//                  0000,0000,0000
78
//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
79
//                = 0x02000011
80
// Thread Spawner Message Descriptor
81
 
82
 
83
// Message descriptor for atomic operation add
84
// Message Descriptors
85
//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
86
//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
87
//                  0000,0000 (Binding table index, added later)
88
//                = 0x02000011
89
 
90
// Atomic Operation Add Message Descriptor
91
 
92
 
93
// Message descriptor for dataport media write
94
        // Message Descriptors
95
                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
96
                //                  1 (header present 1) 0 1010 (media block write) 000000
97
                //                  00000000 (binding table index - set later)
98
                //                = 0x020A8000
99
 
100
 
101
// Message Length defines
102
 
103
 
104
// Response Length defines
105
 
106
 
107
// Block Width and Height Size defines
108
 
109
 
110
// Extended Message Descriptors
111
 
112
 
113
// Common message descriptors:
114
 
115
 
116
//===================== Math Function Control ===================================
117
 
118
 
119
//============ Message Registers ===============
120
                             // buf4 starts from r28
121
 
122
 
123
//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
124
 
125
 
126
.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
127
.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
128
.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
129
.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
130
 
131
//=================== End of thread instruction ===========================
132
 
133
 
134
//=====================Pointers Used=====================================
135
 
136
 
137
//=======================================================================
138
 
139
 
140
//r9-r17
141
// Define temp space for any usages
142
 
143
 
144
// Common Buffers
145
 
146
 
147
// temp space for rotation
148
 
149
.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
150
 
151
.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
152
 
153
.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
154
 
155
.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
156
 
157
.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
158
 
159
 
160
// End of common.inc
161
 
162
 
163
// FileName:		PL2_AVS_Buf_0.asm
164
// Author:			Tatiya, Rupesh
165
// Description:		Loads 8x8 AVS/IEF PL2 data into Buffer 0
166
 
167
 
168
 
169
// FileName     :   PL2_AVS_Buf.asm
170
// Author       :   Tatiya, Rupesh
171
// Description  :   Loads 8x8 AVS/IEF PL2 data into Buffer N
172
 
173
 
174
 
175
// Module name: Scaling.inc
176
 
177
 
178
 
179
 
180
// Description: Includes all definitions explicit to Fast Composite.
181
 
182
 
183
 
184
 
185
// End of common.inc
186
 
187
 
188
//========== GRF partition ==========
189
     // r0 header            :   r0          (1 GRF)
190
     // Static parameters    :   r1 - r6     (6 GRFS)
191
     // Inline parameters    :   r7 - r8     (2 GRFs)
192
     // MSGSRC               :   r27         (1 GRF)
193
//===================================
194
 
195
//Interface:
196
//========== Static Parameters (Explicit To Fast Composite) ==========
197
//r1
198
//CSC Set 0
199
 
200
 
201
.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
202
 
203
//Constant alpha
204
 
205
 
206
//r2
207
 
208
 
209
// Gen7 AVS WA
210
 
211
 
212
// WiDi Definitions
213
 
214
 
215
//Colorfill
216
 
217
 
218
                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
219
 
220
.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
221
 
222
//r3
223
//Normalised Ratio of Horizontal step size with main video for all layers
224
 
225
 
226
    //Normalised Ratio of Horizontal step size with main video for all layers becomes
227
    //Normalised Horizontal step size for all layers in VP_Setup.asm
228
 
229
 
230
//r4
231
//Normalised Vertical step size for all layers
232
 
233
 
234
//r5
235
//Normalised Vertical Frame Origin for all layers
236
 
237
 
238
//r6
239
//Normalised Horizontal Frame Origin for all layers
240
 
241
 
242
//========== Inline Parameters (Explicit To Fast Composite) ==========
243
 
244
 
245
//Main video Step X
246
 
247
 
248
//====================== Binding table (Explicit To Fast Composite)=========================================
249
 
250
 
251
//Used by Interlaced Scaling Kernels
252
 
253
 
254
//========== Sampler State Table Index (Explicit To Fast Composite)==========
255
//Sampler Index for AVS/IEF messages
256
 
257
 
258
//Sampler Index for SIMD16 sampler messages
259
 
260
 
261
//=============================================================================
262
 
263
.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
264
.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
265
.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
266
.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
267
.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
268
.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
269
 
270
.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
271
.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
272
.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
273
.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
274
.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
275
.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
276
 
277
.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
278
.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
279
.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
280
.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
281
.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
282
.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
283
 
284
.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
285
.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
286
.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
287
.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
288
.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
289
.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
290
 
291
.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
292
.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
293
.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
294
.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
295
.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
296
.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
297
 
298
//Pointer to mask reg
299
 
300
 
301
//r18
302
 
303
 
304
//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
305
// NODDCLR, NODDCHK flags. -rT
306
 
307
 
308
.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
309
 
310
//r19
311
 
312
 
313
.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
314
 
315
 
316
//r20
317
 
318
.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
319
 
320
//r21
321
 
322
.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
323
 
324
//r22
325
 
326
 
327
//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
328
// NODDCLR, NODDCHK flags. -rT
329
 
330
 
331
//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
332
//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
333
 
334
//r23
335
 
336
 
337
//Lumakey
338
 
339
 
340
//r24
341
 
342
 
343
//r25
344
 
345
 
346
//r26
347
 
348
 
349
//defines to generate LABELS during compile time.
350
 
351
 
352
        // Message Header
353
        // m0.7         31:0    Debug
354
        // m0.6         31:0    Debug
355
        // m0.5         31:0    Ignored
356
        // m0.4         31:0    Ignored
357
        // m0.3         31:0    Ignored
358
        // m0.2         31:16   Ignored
359
        //              15      Alpha Write Channel Mask        enable=0, disable=1
360
        //              14      Blue Write Channel Mask  (U)
361
        //              13      Green Write Channel Mask (Y)
362
        //              12      Red Write Channel Mask   (V)
363
        //              11:0    Ignored
364
        // m0.1                 Ignored
365
        // m0.0                 Ignored
366
 
367
 
368
        // AVS payload
369
        // m1.7                 Group ID Number
370
        // m1.6                 U 2nd Derivative        ---> NLAS dx
371
        // m1.5                 Delta V                 ---> Step Y
372
        // m1.4                 Delta U                 ---> Step X
373
        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
374
        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
375
        // m1.1                 Vertical Block Number
376
        // m1.0                 Reserved
377
 
378
        // Sampler Message Descriptor
379
        // 31:29        Reserved                        000
380
        // 28:25        Message length                  0010
381
        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
382
        // 19           Header Present                  1
383
        // 18:17        SIMD Mode                       11      ---> SIMD32/64
384
        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
385
        // 11:8         Sampler Index                   xxxx
386
        // 7:0          Binding Table Index             xxxxxxxx
387
 
388
 
389
        // Msg Header M0.2
390
        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
391
        // 14:14        Blue  Write Channel Mask
392
        // 13:13        Green Write Channel Mask
393
        // 12:12        Red   Write Channel Mask
394
 
395
 
396
//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
397
 
398
 
399
//used to generate LABELS at compile time.
400
 
401
 
402
        // 18:17        SIMD Mode                       10      ---> SIMD16
403
        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
404
 
405
 
406
//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
407
//r18-19  - 2 GRFs to store sampler ramp.
408
 
409
    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
410
    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
411
    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
412
    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
413
 
414
 
415
	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
416
	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
417
	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
418
	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
419
 
420
	// Sampler ramp is used for Scaling 0X_0.34X
421
	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
422
 
423
 
424
	//#define rMSGDSC_UV    		r23.0
425
 
426
 
427
//End of _SCALING_
428
 
429
 
430
        //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
431
        //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
432
        mov (1)     r22.4<1>:ud       0x400040:ud
433
 
434
 
435
    mov (1)     r16.3<1>:ud	r0.3<0;1,0>:ud
436
 
437
 
438
    //AVS_PAYLOAD already has all the data loaded at this point
439
    add (1)     a0.0<1>:ud     r23.5<0;1,0>:ud      0x44EB000:ud      //msg desc
440
 
441
    mov (1)     r16.2<1>:ud      0x0000D000:ud                                        // Enable Red channel
442
 
443
 
444
 
445
    // set the vertical block number
446
 
447
      mov (1)   r25.1<1>:ud 3:ud
448
 
449
    mov (8)     r17.0<1>:ud      r25.0<8;8,1>:ud                                     // Copy msg payload mirrors to MRFs
450
 
451
    send (1)    uwBUFFER_3(0)<1>   r16    0x2    a0.0:ud
452
    // Returns Y data in 4 GRFs in scrambled order
453
 
454
    add (1)     a0.0<1>:ud                r23.5<0;1,0>:ud      0x44EB001:ud            // msg desc; 1 is added to change BI to UV
455
 
456
    mov (1)     r16.2<1>:ud      0x0000E000:ud                           // Enable Red channel
457
 
458
    send (1)    uwBUFFER_3(4)<1>   r16    0x2    a0.0:ud
459
    // Returns U data in 4 GRFs in scrambled order
460
 
461
    add (1)     a0.0<1>:ud     r23.5<0;1,0>:ud      0x44EB002:ud     // msg desc; 1 is added to change BI to UV
462
    mov (1)     r16.2<1>:ud      0x0000E000:ud                           // Enable Red channel
463
 
464
    send (1)    uwBUFFER_3(8)<1>   r16    0x2    a0.0:ud
465
    // Returns V data in 4 GRFs in scrambled order
466
 
467
SKIP_AVS_LOAD_L0_0_:
468
    nop
469