Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5361 | serge | 1 | /* |
2 | * Copyright 2000-2011 Intel Corporation All Rights Reserved |
||
3 | * |
||
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
5 | * you may not use this file except in compliance with the License. |
||
6 | * You may obtain a copy of the License at |
||
7 | * |
||
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
9 | * |
||
10 | * Unless required by applicable law or agreed to in writing, software |
||
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
13 | * See the License for the specific language governing permissions and |
||
14 | * limitations under the License. |
||
15 | */ |
||
16 | // 44 // Total instruction count |
||
17 | // 1 // Total kernel count |
||
18 | |||
19 | |||
20 | |||
21 | // Module name: common.inc |
||
22 | // |
||
23 | // Common header file for all Video-Processing kernels |
||
24 | // |
||
25 | |||
26 | .default_execution_size (16) |
||
27 | .default_register_type :ub |
||
28 | |||
29 | .reg_count_total 128 |
||
30 | .reg_count_payload 7 |
||
31 | |||
32 | //========== Common constants ========== |
||
33 | |||
34 | |||
35 | //========== Macros ========== |
||
36 | |||
37 | |||
38 | //Fast Jump, For more details see "Set_Layer_N.asm" |
||
39 | |||
40 | |||
41 | //========== Defines ==================== |
||
42 | |||
43 | //========== Static Parameters (Common To All) ========== |
||
44 | //r1 |
||
45 | |||
46 | |||
47 | //r2 |
||
48 | |||
49 | // e.g. byte0 byte1 byte2 |
||
50 | // YUYV 0 1 3 |
||
51 | // YVYU 0 3 1 |
||
52 | |||
53 | //Color Pipe (IECP) parameters |
||
54 | |||
55 | |||
56 | //ByteCopy |
||
57 | |||
58 | |||
59 | //r4 |
||
60 | |||
61 | // e.g. byte0 byte1 byte2 |
||
62 | // YUYV 0 1 3 |
||
63 | // YVYU 0 3 1 |
||
64 | |||
65 | |||
66 | //========== Inline parameters (Common To All) =========== |
||
67 | |||
68 | |||
69 | //============== Binding Index Table=========== |
||
70 | //Common between DNDI and DNUV |
||
71 | |||
72 | |||
73 | //================= Common Message Descriptor ===== |
||
74 | // Message descriptor for thread spawning |
||
75 | // Message Descriptors |
||
76 | // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) |
||
77 | // 0000,0000,0000 |
||
78 | // 0001(Spawn a root thread),0001 (Root thread spawn thread) |
||
79 | // = 0x02000011 |
||
80 | // Thread Spawner Message Descriptor |
||
81 | |||
82 | |||
83 | // Message descriptor for atomic operation add |
||
84 | // Message Descriptors |
||
85 | // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) |
||
86 | // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) |
||
87 | // 0000,0000 (Binding table index, added later) |
||
88 | // = 0x02000011 |
||
89 | |||
90 | // Atomic Operation Add Message Descriptor |
||
91 | |||
92 | |||
93 | // Message descriptor for dataport media write |
||
94 | // Message Descriptors |
||
95 | // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) |
||
96 | // 1 (header present 1) 0 1010 (media block write) 000000 |
||
97 | // 00000000 (binding table index - set later) |
||
98 | // = 0x020A8000 |
||
99 | |||
100 | |||
101 | // Message Length defines |
||
102 | |||
103 | |||
104 | // Response Length defines |
||
105 | |||
106 | |||
107 | // Block Width and Height Size defines |
||
108 | |||
109 | |||
110 | // Extended Message Descriptors |
||
111 | |||
112 | |||
113 | // Common message descriptors: |
||
114 | |||
115 | |||
116 | //===================== Math Function Control =================================== |
||
117 | |||
118 | |||
119 | //============ Message Registers =============== |
||
120 | // buf4 starts from r28 |
||
121 | |||
122 | |||
123 | //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT |
||
124 | |||
125 | |||
126 | .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
127 | .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
128 | .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
129 | .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f |
||
130 | |||
131 | //=================== End of thread instruction =========================== |
||
132 | |||
133 | |||
134 | //=====================Pointers Used===================================== |
||
135 | |||
136 | |||
137 | //======================================================================= |
||
138 | |||
139 | |||
140 | //r9-r17 |
||
141 | // Define temp space for any usages |
||
142 | |||
143 | |||
144 | // Common Buffers |
||
145 | |||
146 | |||
147 | // temp space for rotation |
||
148 | |||
149 | .declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
150 | |||
151 | .declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
152 | |||
153 | .declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
154 | |||
155 | .declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
156 | |||
157 | .declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
158 | |||
159 | |||
160 | // End of common.inc |
||
161 | |||
162 | |||
163 | // FileName: PL2_AVS_Buf_0.asm |
||
164 | // Author: Tatiya, Rupesh |
||
165 | // Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0 |
||
166 | |||
167 | |||
168 | |||
169 | // FileName : PL2_AVS_Buf.asm |
||
170 | // Author : Tatiya, Rupesh |
||
171 | // Description : Loads 8x8 AVS/IEF PL2 data into Buffer N |
||
172 | |||
173 | |||
174 | |||
175 | // Module name: Scaling.inc |
||
176 | |||
177 | |||
178 | |||
179 | |||
180 | // Description: Includes all definitions explicit to Fast Composite. |
||
181 | |||
182 | |||
183 | |||
184 | |||
185 | // End of common.inc |
||
186 | |||
187 | |||
188 | //========== GRF partition ========== |
||
189 | // r0 header : r0 (1 GRF) |
||
190 | // Static parameters : r1 - r6 (6 GRFS) |
||
191 | // Inline parameters : r7 - r8 (2 GRFs) |
||
192 | // MSGSRC : r27 (1 GRF) |
||
193 | //=================================== |
||
194 | |||
195 | //Interface: |
||
196 | //========== Static Parameters (Explicit To Fast Composite) ========== |
||
197 | //r1 |
||
198 | //CSC Set 0 |
||
199 | |||
200 | |||
201 | .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud |
||
202 | |||
203 | //Constant alpha |
||
204 | |||
205 | |||
206 | //r2 |
||
207 | |||
208 | |||
209 | // Gen7 AVS WA |
||
210 | |||
211 | |||
212 | // WiDi Definitions |
||
213 | |||
214 | |||
215 | //Colorfill |
||
216 | |||
217 | |||
218 | // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. |
||
219 | |||
220 | .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub |
||
221 | |||
222 | //r3 |
||
223 | //Normalised Ratio of Horizontal step size with main video for all layers |
||
224 | |||
225 | |||
226 | //Normalised Ratio of Horizontal step size with main video for all layers becomes |
||
227 | //Normalised Horizontal step size for all layers in VP_Setup.asm |
||
228 | |||
229 | |||
230 | //r4 |
||
231 | //Normalised Vertical step size for all layers |
||
232 | |||
233 | |||
234 | //r5 |
||
235 | //Normalised Vertical Frame Origin for all layers |
||
236 | |||
237 | |||
238 | //r6 |
||
239 | //Normalised Horizontal Frame Origin for all layers |
||
240 | |||
241 | |||
242 | //========== Inline Parameters (Explicit To Fast Composite) ========== |
||
243 | |||
244 | |||
245 | //Main video Step X |
||
246 | |||
247 | |||
248 | //====================== Binding table (Explicit To Fast Composite)========================================= |
||
249 | |||
250 | |||
251 | //Used by Interlaced Scaling Kernels |
||
252 | |||
253 | |||
254 | //========== Sampler State Table Index (Explicit To Fast Composite)========== |
||
255 | //Sampler Index for AVS/IEF messages |
||
256 | |||
257 | |||
258 | //Sampler Index for SIMD16 sampler messages |
||
259 | |||
260 | |||
261 | //============================================================================= |
||
262 | |||
263 | .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
264 | .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
265 | .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
266 | .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
267 | .declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
268 | .declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
269 | |||
270 | .declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
271 | .declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
272 | .declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
273 | .declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
274 | .declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
275 | .declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
276 | |||
277 | .declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
278 | .declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
279 | .declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
280 | .declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
281 | .declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
282 | .declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
283 | |||
284 | .declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
285 | .declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
286 | .declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
287 | .declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
288 | .declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
289 | .declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
290 | |||
291 | .declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
292 | .declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
293 | .declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
294 | .declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
295 | .declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
296 | .declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
297 | |||
298 | //Pointer to mask reg |
||
299 | |||
300 | |||
301 | //r18 |
||
302 | |||
303 | |||
304 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
305 | // NODDCLR, NODDCHK flags. -rT |
||
306 | |||
307 | |||
308 | .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF |
||
309 | |||
310 | //r19 |
||
311 | |||
312 | |||
313 | .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF |
||
314 | |||
315 | |||
316 | //r20 |
||
317 | |||
318 | .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
319 | |||
320 | //r21 |
||
321 | |||
322 | .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
323 | |||
324 | //r22 |
||
325 | |||
326 | |||
327 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
328 | // NODDCLR, NODDCHK flags. -rT |
||
329 | |||
330 | |||
331 | //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as |
||
332 | //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT |
||
333 | |||
334 | //r23 |
||
335 | |||
336 | |||
337 | //Lumakey |
||
338 | |||
339 | |||
340 | //r24 |
||
341 | |||
342 | |||
343 | //r25 |
||
344 | |||
345 | |||
346 | //r26 |
||
347 | |||
348 | |||
349 | //defines to generate LABELS during compile time. |
||
350 | |||
351 | |||
352 | // Message Header |
||
353 | // m0.7 31:0 Debug |
||
354 | // m0.6 31:0 Debug |
||
355 | // m0.5 31:0 Ignored |
||
356 | // m0.4 31:0 Ignored |
||
357 | // m0.3 31:0 Ignored |
||
358 | // m0.2 31:16 Ignored |
||
359 | // 15 Alpha Write Channel Mask enable=0, disable=1 |
||
360 | // 14 Blue Write Channel Mask (U) |
||
361 | // 13 Green Write Channel Mask (Y) |
||
362 | // 12 Red Write Channel Mask (V) |
||
363 | // 11:0 Ignored |
||
364 | // m0.1 Ignored |
||
365 | // m0.0 Ignored |
||
366 | |||
367 | |||
368 | // AVS payload |
||
369 | // m1.7 Group ID Number |
||
370 | // m1.6 U 2nd Derivative ---> NLAS dx |
||
371 | // m1.5 Delta V ---> Step Y |
||
372 | // m1.4 Delta U ---> Step X |
||
373 | // m1.3 Pixel 0 V Address ---> ORIY (Y0) |
||
374 | // m1.2 Pixel 0 U Address ---> ORIX (X0) |
||
375 | // m1.1 Vertical Block Number |
||
376 | // m1.0 Reserved |
||
377 | |||
378 | // Sampler Message Descriptor |
||
379 | // 31:29 Reserved 000 |
||
380 | // 28:25 Message length 0010 |
||
381 | // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) |
||
382 | // 19 Header Present 1 |
||
383 | // 18:17 SIMD Mode 11 ---> SIMD32/64 |
||
384 | // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) |
||
385 | // 11:8 Sampler Index xxxx |
||
386 | // 7:0 Binding Table Index xxxxxxxx |
||
387 | |||
388 | |||
389 | // Msg Header M0.2 |
||
390 | // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back |
||
391 | // 14:14 Blue Write Channel Mask |
||
392 | // 13:13 Green Write Channel Mask |
||
393 | // 12:12 Red Write Channel Mask |
||
394 | |||
395 | |||
396 | //By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 |
||
397 | |||
398 | |||
399 | //used to generate LABELS at compile time. |
||
400 | |||
401 | |||
402 | // 18:17 SIMD Mode 10 ---> SIMD16 |
||
403 | // 16:12 Message Type xxxxx ---> 00000 (SIMD16) |
||
404 | |||
405 | |||
406 | //r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) |
||
407 | //r18-19 - 2 GRFs to store sampler ramp. |
||
408 | |||
409 | .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
410 | .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
411 | .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
412 | .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub |
||
413 | |||
414 | |||
415 | .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
416 | .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
417 | .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub |
||
418 | .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
419 | |||
420 | // Sampler ramp is used for Scaling 0X_0.34X |
||
421 | .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements |
||
422 | |||
423 | |||
424 | //#define rMSGDSC_UV r23.0 |
||
425 | |||
426 | |||
427 | //End of _SCALING_ |
||
428 | |||
429 | |||
430 | //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. |
||
431 | //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. |
||
432 | mov (1) r22.4<1>:ud 0x400040:ud |
||
433 | |||
434 | |||
435 | mov (1) r16.3<1>:ud r0.3<0;1,0>:ud |
||
436 | |||
437 | |||
438 | //AVS_PAYLOAD already has all the data loaded at this point |
||
439 | add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB000:ud //msg desc |
||
440 | |||
441 | mov (1) r16.2<1>:ud 0x0000D000:ud // Enable Red channel |
||
442 | |||
443 | |||
444 | |||
445 | // set the vertical block number |
||
446 | |||
447 | mov (1) r25.1<1>:ud 3:ud |
||
448 | |||
449 | mov (8) r17.0<1>:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs |
||
450 | |||
451 | send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud |
||
452 | // Returns Y data in 4 GRFs in scrambled order |
||
453 | |||
454 | add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB001:ud // msg desc; 1 is added to change BI to UV |
||
455 | |||
456 | mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel |
||
457 | |||
458 | send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud |
||
459 | // Returns U data in 4 GRFs in scrambled order |
||
460 | |||
461 | add (1) a0.0<1>:ud r23.5<0;1,0>:ud 0x44EB002:ud // msg desc; 1 is added to change BI to UV |
||
462 | mov (1) r16.2<1>:ud 0x0000E000:ud // Enable Red channel |
||
463 | |||
464 | send (1) uwBUFFER_3(8)<1> r16 0x2 a0.0:ud |
||
465 | // Returns V data in 4 GRFs in scrambled order |
||
466 | |||
467 | SKIP_AVS_LOAD_L0_0_: |
||
468 | nop1>1>0;1,0>1>1>1>0;1,0>1>1>8;8,1>1>1>1>0;1,0>1>0;1,0>1>1>8;8,1>1>16;16,1>1>32;8,4>1>8;8,1>1>8;8,1>1>32;32,1>1>8;8,1>1>16;16,1>1>8;8,1>16;16,1>16;16,1>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>0;1,0>4>32;8,4>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1> |
||
469 |