Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6146 | serge | 1 | /* |
2 | * Copyright 2000-2011 Intel Corporation All Rights Reserved |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the |
||
6 | * "Software"), to deal in the Software without restriction, including |
||
7 | * without limitation the rights to use, copy, modify, merge, publish, |
||
8 | * distribute, sub license, and/or sell copies of the Software, and to |
||
9 | * permit persons to whom the Software is furnished to do so, subject to |
||
10 | * the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the |
||
13 | * next paragraph) shall be included in all copies or substantial portions |
||
14 | * of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
19 | * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
20 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
21 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
22 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * This file was originally licensed under the following license |
||
25 | * |
||
26 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
27 | * you may not use this file except in compliance with the License. |
||
28 | * You may obtain a copy of the License at |
||
29 | * |
||
30 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
31 | * |
||
32 | * Unless required by applicable law or agreed to in writing, software |
||
33 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
34 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
35 | * See the License for the specific language governing permissions and |
||
36 | * limitations under the License. |
||
37 | */ |
||
38 | // 47 // Total instruction count |
||
39 | // 1 // Total kernel count |
||
40 | |||
41 | |||
42 | |||
43 | // Module name: common.inc |
||
44 | // |
||
45 | // Common header file for all Video-Processing kernels |
||
46 | // |
||
47 | |||
48 | .default_execution_size (16) |
||
49 | .default_register_type :ub |
||
50 | |||
51 | .reg_count_total 128 |
||
52 | .reg_count_payload 7 |
||
53 | |||
54 | //========== Common constants ========== |
||
55 | |||
56 | |||
57 | //========== Macros ========== |
||
58 | |||
59 | |||
60 | //Fast Jump, For more details see "Set_Layer_N.asm" |
||
61 | |||
62 | |||
63 | //========== Defines ==================== |
||
64 | |||
65 | //========== Static Parameters (Common To All) ========== |
||
66 | //r1 |
||
67 | |||
68 | |||
69 | //r2 |
||
70 | |||
71 | // e.g. byte0 byte1 byte2 |
||
72 | // YUYV 0 1 3 |
||
73 | // YVYU 0 3 1 |
||
74 | |||
75 | //Color Pipe (IECP) parameters |
||
76 | |||
77 | |||
78 | //ByteCopy |
||
79 | |||
80 | |||
81 | //r4 |
||
82 | |||
83 | // e.g. byte0 byte1 byte2 |
||
84 | // YUYV 0 1 3 |
||
85 | // YVYU 0 3 1 |
||
86 | |||
87 | |||
88 | //========== Inline parameters (Common To All) =========== |
||
89 | |||
90 | |||
91 | //============== Binding Index Table=========== |
||
92 | //Common between DNDI and DNUV |
||
93 | |||
94 | |||
95 | //================= Common Message Descriptor ===== |
||
96 | // Message descriptor for thread spawning |
||
97 | // Message Descriptors |
||
98 | // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) |
||
99 | // 0000,0000,0000 |
||
100 | // 0001(Spawn a root thread),0001 (Root thread spawn thread) |
||
101 | // = 0x02000011 |
||
102 | // Thread Spawner Message Descriptor |
||
103 | |||
104 | |||
105 | // Message descriptor for atomic operation add |
||
106 | // Message Descriptors |
||
107 | // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) |
||
108 | // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) |
||
109 | // 0000,0000 (Binding table index, added later) |
||
110 | // = 0x02000011 |
||
111 | |||
112 | // Atomic Operation Add Message Descriptor |
||
113 | |||
114 | |||
115 | // Message descriptor for dataport media write |
||
116 | // Message Descriptors |
||
117 | // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) |
||
118 | // 1 (header present 1) 0 1010 (media block write) 000000 |
||
119 | // 00000000 (binding table index - set later) |
||
120 | // = 0x020A8000 |
||
121 | |||
122 | |||
123 | // Message Length defines |
||
124 | |||
125 | |||
126 | // Response Length defines |
||
127 | |||
128 | |||
129 | // Block Width and Height Size defines |
||
130 | |||
131 | |||
132 | // Extended Message Descriptors |
||
133 | |||
134 | |||
135 | // Common message descriptors: |
||
136 | |||
137 | |||
138 | //===================== Math Function Control =================================== |
||
139 | |||
140 | |||
141 | //============ Message Registers =============== |
||
142 | // buf4 starts from r28 |
||
143 | |||
144 | |||
145 | //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT |
||
146 | |||
147 | |||
148 | .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
149 | .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
150 | .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
151 | .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f |
||
152 | |||
153 | //=================== End of thread instruction =========================== |
||
154 | |||
155 | |||
156 | //=====================Pointers Used===================================== |
||
157 | |||
158 | |||
159 | //======================================================================= |
||
160 | |||
161 | |||
162 | //r11-r17 |
||
163 | // Define temp space for any usages |
||
164 | |||
165 | |||
166 | // Common Buffers |
||
167 | |||
168 | |||
169 | // temp space for rotation |
||
170 | |||
171 | .declare fROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
172 | |||
173 | .declare udROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
174 | |||
175 | .declare uwROBUF Base=r11.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
176 | |||
177 | .declare ubROBUF Base=r11.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
178 | |||
179 | .declare ub4ROBUF Base=r11.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
180 | |||
181 | |||
182 | // End of common.inc |
||
183 | |||
184 | |||
185 | // FileName: PL3_AVS_Buf_0.asm |
||
186 | // Author: Tatiya, Rupesh |
||
187 | // Description: Loads 8x8 AVS/IEF PL3 data into Buffer 0 |
||
188 | |||
189 | |||
190 | |||
191 | // FileName : PL3_AVS_Buf.asm |
||
192 | // Author : Tatiya, Rupesh |
||
193 | // Description : Loads 8x8 AVS/IEF PL3 data into Buffer N |
||
194 | |||
195 | |||
196 | |||
197 | // Module name: Scaling.inc |
||
198 | |||
199 | |||
200 | |||
201 | |||
202 | // Description: Includes all definitions explicit to Fast Composite. |
||
203 | |||
204 | |||
205 | |||
206 | |||
207 | // End of common.inc |
||
208 | |||
209 | |||
210 | //========== GRF partition ========== |
||
211 | // r0 header : r0 (1 GRF) |
||
212 | // Static parameters : r1 - r6 (6 GRFS) |
||
213 | // Inline parameters : r7 - r8 (2 GRFs) |
||
214 | // MSGSRC : r27 (1 GRF) |
||
215 | //=================================== |
||
216 | |||
217 | //Interface: |
||
218 | //========== Static Parameters (Explicit To Fast Composite) ========== |
||
219 | //r1 |
||
220 | //CSC Set 0 |
||
221 | |||
222 | |||
223 | .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud |
||
224 | |||
225 | //Constant alpha |
||
226 | |||
227 | |||
228 | //r2 |
||
229 | |||
230 | |||
231 | // Gen7 AVS WA |
||
232 | |||
233 | |||
234 | // WiDi Definitions |
||
235 | |||
236 | |||
237 | //Colorfill |
||
238 | |||
239 | |||
240 | // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. |
||
241 | |||
242 | .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub |
||
243 | |||
244 | //r3 |
||
245 | //Normalised Ratio of Horizontal step size with main video for all layers |
||
246 | |||
247 | |||
248 | //Normalised Ratio of Horizontal step size with main video for all layers becomes |
||
249 | //Normalised Horizontal step size for all layers in VP_Setup.asm |
||
250 | |||
251 | |||
252 | //r4 |
||
253 | //Normalised Vertical step size for all layers |
||
254 | |||
255 | |||
256 | //r5 |
||
257 | //Normalised Vertical Frame Origin for all layers |
||
258 | |||
259 | |||
260 | //r6 |
||
261 | //Normalised Horizontal Frame Origin for all layers |
||
262 | |||
263 | |||
264 | //========== Inline Parameters (Explicit To Fast Composite) ========== |
||
265 | |||
266 | |||
267 | //Main video Step X |
||
268 | |||
269 | |||
270 | //====================== Binding table (Explicit To Fast Composite)========================================= |
||
271 | |||
272 | |||
273 | //Used by Interlaced Scaling Kernels |
||
274 | |||
275 | |||
276 | //========== Sampler State Table Index (Explicit To Fast Composite)========== |
||
277 | //Sampler Index for AVS/IEF messages |
||
278 | |||
279 | |||
280 | //Sampler Index for SIMD16 sampler messages |
||
281 | |||
282 | |||
283 | //============================================================================= |
||
284 | |||
285 | .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
286 | .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
287 | .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
288 | .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
289 | .declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
290 | .declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
291 | |||
292 | .declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
293 | .declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
294 | .declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
295 | .declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
296 | .declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
297 | .declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
298 | |||
299 | .declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
300 | .declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
301 | .declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
302 | .declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
303 | .declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
304 | .declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
305 | |||
306 | .declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
307 | .declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
308 | .declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
309 | .declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
310 | .declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
311 | .declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
312 | |||
313 | .declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
314 | .declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
315 | .declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
316 | .declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
317 | .declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
318 | .declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
319 | |||
320 | //Pointer to mask reg |
||
321 | |||
322 | |||
323 | //r18 |
||
324 | |||
325 | |||
326 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
327 | // NODDCLR, NODDCHK flags. -rT |
||
328 | |||
329 | |||
330 | .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF |
||
331 | |||
332 | //r19 |
||
333 | |||
334 | |||
335 | .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF |
||
336 | |||
337 | |||
338 | //r20 |
||
339 | |||
340 | .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
341 | |||
342 | //r21 |
||
343 | |||
344 | .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
345 | |||
346 | //r22 |
||
347 | |||
348 | |||
349 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
350 | // NODDCLR, NODDCHK flags. -rT |
||
351 | |||
352 | |||
353 | //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as |
||
354 | //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT |
||
355 | |||
356 | //r23 |
||
357 | |||
358 | |||
359 | //Lumakey |
||
360 | |||
361 | |||
362 | //r24 |
||
363 | |||
364 | |||
365 | //r25 |
||
366 | |||
367 | |||
368 | //r26 |
||
369 | |||
370 | |||
371 | //defines to generate LABELS during compile time. |
||
372 | |||
373 | |||
374 | // Message Header |
||
375 | // m0.7 31:0 Debug |
||
376 | // m0.6 31:0 Debug |
||
377 | // m0.5 31:0 Ignored |
||
378 | // m0.4 31:0 Ignored |
||
379 | // m0.3 31:0 Ignored |
||
380 | // m0.2 31:16 Ignored |
||
381 | // 15 Alpha Write Channel Mask enable=0, disable=1 |
||
382 | // 14 Blue Write Channel Mask (U) |
||
383 | // 13 Green Write Channel Mask (Y) |
||
384 | // 12 Red Write Channel Mask (V) |
||
385 | // 11:0 Ignored |
||
386 | // m0.1 Ignored |
||
387 | // m0.0 Ignored |
||
388 | |||
389 | |||
390 | // AVS payload |
||
391 | // m1.7 Group ID Number |
||
392 | // m1.6 U 2nd Derivative ---> NLAS dx |
||
393 | // m1.5 Delta V ---> Step Y |
||
394 | // m1.4 Delta U ---> Step X |
||
395 | // m1.3 Pixel 0 V Address ---> ORIY (Y0) |
||
396 | // m1.2 Pixel 0 U Address ---> ORIX (X0) |
||
397 | // m1.1 Vertical Block Number |
||
398 | // m1.0 Reserved |
||
399 | |||
400 | // Sampler Message Descriptor |
||
401 | // 31:29 Reserved 000 |
||
402 | // 28:25 Message length 0010 |
||
403 | // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm) |
||
404 | // 19 Header Present 1 |
||
405 | // 18:17 SIMD Mode 11 ---> SIMD32/64 |
||
406 | // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix) |
||
407 | // 11:8 Sampler Index xxxx |
||
408 | // 7:0 Binding Table Index xxxxxxxx |
||
409 | |||
410 | |||
411 | // Msg Header M0.2 |
||
412 | // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back |
||
413 | // 14:14 Blue Write Channel Mask |
||
414 | // 13:13 Green Write Channel Mask |
||
415 | // 12:12 Red Write Channel Mask |
||
416 | |||
417 | |||
418 | //By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7 |
||
419 | |||
420 | |||
421 | //used to generate LABELS at compile time. |
||
422 | |||
423 | |||
424 | // 18:17 SIMD Mode 10 ---> SIMD16 |
||
425 | // 16:12 Message Type xxxxx ---> 00000 (SIMD16) |
||
426 | |||
427 | |||
428 | //r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels) |
||
429 | //r18-19 - 2 GRFs to store sampler ramp. |
||
430 | |||
431 | .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
432 | .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
433 | .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
434 | .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub |
||
435 | |||
436 | |||
437 | .declare fSCALING_0X_34X_TEMP Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
438 | .declare udSCALING_0X_34X_TEMP Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
439 | .declare ub4SCALING_0X_34X_TEMP Base=r11.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub |
||
440 | .declare uwSCALING_0X_34X_TEMP Base=r11.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
441 | |||
442 | // Sampler ramp is used for Scaling 0X_0.34X |
||
443 | .declare fSAMPLER_RAMP Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements |
||
444 | |||
445 | |||
446 | //#define rMSGDSC_UV r23.0 |
||
447 | |||
448 | |||
449 | //End of _SCALING_ |
||
450 | |||
451 | |||
452 | //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it. |
||
453 | //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0. |
||
454 | mov (1) r22.4<1>:ud 0x400040:ud |
||
455 | |||
456 | |||
457 | //Check if layer is to be skipped |
||
458 | |||
459 | |||
460 | // f0.1 pre-computed in Set_Layer_0 |
||
461 | (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_0_ |
||
462 | |||
463 | |||
464 | //AVS_PAYLOAD already has all the data loaded at this point |
||
465 | add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc |
||
466 | |||
467 | mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel |
||
468 | |||
469 | |||
470 | mov (1) r25.7<1>:ud r9.7:ud { NoDDClr } |
||
471 | mov (1) r25.1<1>:ud r9.12:uw { NoDDChk } |
||
472 | |||
473 | |||
474 | // set the vertical block number |
||
475 | |||
476 | |||
477 | mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs |
||
478 | |||
479 | // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces |
||
480 | // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) |
||
481 | // { |
||
482 | // modified_u_coord = u_coord – 5.0/(256*width); //floating point |
||
483 | // } |
||
484 | // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) |
||
485 | // { |
||
486 | // modified_u_coord = u_coord + 1.0/(256*width); //floating point |
||
487 | // } |
||
488 | // else{ |
||
489 | // modified_u_coord = u_coord; |
||
490 | // } |
||
491 | // Where u_left = u – 2*du + 3*ddu for IEF On |
||
492 | // And u_left = u for IEF Off case |
||
493 | // |
||
494 | |||
495 | // check whether Gen7 AVS WA is enabled, |
||
496 | mov (1) r14.8:uw f0.0:uw // save f0.0 |
||
497 | mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma |
||
498 | |||
499 | and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw |
||
500 | (-f0.0)jmpi (1) GEN7_PL3_AVS_WA_DONE_L0_0_ |
||
501 | |||
502 | // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula |
||
503 | |||
504 | and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw |
||
505 | (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f |
||
506 | (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f |
||
507 | (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f |
||
508 | (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left |
||
509 | (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left |
||
510 | |||
511 | and (1) r14.1:ud r2.3:uw 0xFFF8:uw |
||
512 | asr (1) r14.1:ud r14.1:ud 3:d |
||
513 | mov (1) r14.1:f r14.1:ud |
||
514 | |||
515 | // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width) |
||
516 | mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width |
||
517 | add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256 |
||
518 | add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256 |
||
519 | |||
520 | //Check if the values are < 0 and account for (int) cast of negative numbers |
||
521 | |||
522 | //(int)(u_left*width) |
||
523 | cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f |
||
524 | mov (1) r14.0:d r14.0:f |
||
525 | (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d |
||
526 | |||
527 | //(int)(u_left*width + 5.0/256) |
||
528 | cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f |
||
529 | mov (1) r14.2:d r14.2:f |
||
530 | (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d |
||
531 | |||
532 | //(int)(u_left*width + 255.0/256) |
||
533 | cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f |
||
534 | mov (1) r14.3:d r14.3:f |
||
535 | (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d |
||
536 | |||
537 | mov (1) f0.0:uw 0:uw // clear flag |
||
538 | //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) |
||
539 | cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d |
||
540 | // modified_u_coord = u_coord – 5.0/(256*width); //floating point |
||
541 | (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f |
||
542 | //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) |
||
543 | (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d |
||
544 | // modified_u_coord = u_coord + 1.0/(256*width); //floating point |
||
545 | (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f |
||
546 | |||
547 | GEN7_PL3_AVS_WA_DONE_L0_0_: |
||
548 | mov (1) f0.0:uw r14.8:uw // restore f0.0 |
||
549 | |||
550 | |||
551 | send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud |
||
552 | // Returns Y data in 4 GRFs in scrambled order |
||
553 | |||
554 | mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma |
||
555 | |||
556 | add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB801:ud // msg desc; 1 is added to change BI to UV |
||
557 | mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel |
||
558 | |||
559 | send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud |
||
560 | // Returns U data in 4 GRFs in scrambled order |
||
561 | |||
562 | add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EBC02:ud // msg desc; 1 is added to change BI to UV |
||
563 | mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel |
||
564 | |||
565 | send (1) uwBUFFER_0(8)<1> r16 0x2 a0.0:ud |
||
566 | // Returns V data in 4 GRFs in scrambled order |
||
567 | |||
568 | SKIP_AVS_LOAD_L0_0_: |
||
569 | nop1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>>0;1,0>0;1,0>0;1,0>0;1,0>0;1,0>1>1>0;1,0>8;8,1>1>1>0;1,0>1>8;8,1>1>16;16,1>1>32;8,4>1>8;8,1>1>8;8,1>1>32;32,1>1>8;8,1>1>16;16,1>1>8;8,1>16;16,1>16;16,1>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>0;1,0>4>32;8,4>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1> |
||
570 |