Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6146 | serge | 1 | /* |
2 | * Copyright 2000-2011 Intel Corporation All Rights Reserved |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the |
||
6 | * "Software"), to deal in the Software without restriction, including |
||
7 | * without limitation the rights to use, copy, modify, merge, publish, |
||
8 | * distribute, sub license, and/or sell copies of the Software, and to |
||
9 | * permit persons to whom the Software is furnished to do so, subject to |
||
10 | * the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the |
||
13 | * next paragraph) shall be included in all copies or substantial portions |
||
14 | * of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
19 | * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
20 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
21 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
22 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * This file was originally licensed under the following license |
||
25 | * |
||
26 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
27 | * you may not use this file except in compliance with the License. |
||
28 | * You may obtain a copy of the License at |
||
29 | * |
||
30 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
31 | * |
||
32 | * Unless required by applicable law or agreed to in writing, software |
||
33 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
34 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
35 | * See the License for the specific language governing permissions and |
||
36 | * limitations under the License. |
||
37 | */ |
||
38 | // 7 // Total instruction count |
||
39 | // 1 // Total kernel count |
||
40 | |||
41 | |||
42 | |||
43 | // Module name: common.inc |
||
44 | // |
||
45 | // Common header file for all Video-Processing kernels |
||
46 | // |
||
47 | |||
48 | .default_execution_size (16) |
||
49 | .default_register_type :ub |
||
50 | |||
51 | .reg_count_total 128 |
||
52 | .reg_count_payload 7 |
||
53 | |||
54 | //========== Common constants ========== |
||
55 | |||
56 | |||
57 | //========== Macros ========== |
||
58 | |||
59 | |||
60 | //Fast Jump, For more details see "Set_Layer_N.asm" |
||
61 | |||
62 | |||
63 | //========== Defines ==================== |
||
64 | |||
65 | //========== Static Parameters (Common To All) ========== |
||
66 | //r1 |
||
67 | |||
68 | |||
69 | //r2 |
||
70 | |||
71 | // e.g. byte0 byte1 byte2 |
||
72 | // YUYV 0 1 3 |
||
73 | // YVYU 0 3 1 |
||
74 | |||
75 | //Color Pipe (IECP) parameters |
||
76 | |||
77 | |||
78 | //ByteCopy |
||
79 | |||
80 | |||
81 | //r4 |
||
82 | |||
83 | // e.g. byte0 byte1 byte2 |
||
84 | // YUYV 0 1 3 |
||
85 | // YVYU 0 3 1 |
||
86 | |||
87 | |||
88 | //========== Inline parameters (Common To All) =========== |
||
89 | |||
90 | |||
91 | //============== Binding Index Table=========== |
||
92 | //Common between DNDI and DNUV |
||
93 | |||
94 | |||
95 | //================= Common Message Descriptor ===== |
||
96 | // Message descriptor for thread spawning |
||
97 | // Message Descriptors |
||
98 | // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) |
||
99 | // 0000,0000,0000 |
||
100 | // 0001(Spawn a root thread),0001 (Root thread spawn thread) |
||
101 | // = 0x02000011 |
||
102 | // Thread Spawner Message Descriptor |
||
103 | |||
104 | |||
105 | // Message descriptor for atomic operation add |
||
106 | // Message Descriptors |
||
107 | // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) |
||
108 | // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) |
||
109 | // 0000,0000 (Binding table index, added later) |
||
110 | // = 0x02000011 |
||
111 | |||
112 | // Atomic Operation Add Message Descriptor |
||
113 | |||
114 | |||
115 | // Message descriptor for dataport media write |
||
116 | // Message Descriptors |
||
117 | // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) |
||
118 | // 1 (header present 1) 0 1010 (media block write) 000000 |
||
119 | // 00000000 (binding table index - set later) |
||
120 | // = 0x020A8000 |
||
121 | |||
122 | |||
123 | // Message Length defines |
||
124 | |||
125 | |||
126 | // Response Length defines |
||
127 | |||
128 | |||
129 | // Block Width and Height Size defines |
||
130 | |||
131 | |||
132 | // Extended Message Descriptors |
||
133 | |||
134 | |||
135 | // Common message descriptors: |
||
136 | |||
137 | |||
138 | //===================== Math Function Control =================================== |
||
139 | |||
140 | |||
141 | //============ Message Registers =============== |
||
142 | // buf4 starts from r28 |
||
143 | |||
144 | |||
145 | //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT |
||
146 | |||
147 | |||
148 | .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
149 | .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
150 | .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
151 | .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f |
||
152 | |||
153 | //=================== End of thread instruction =========================== |
||
154 | |||
155 | |||
156 | //=====================Pointers Used===================================== |
||
157 | |||
158 | |||
159 | //======================================================================= |
||
160 | |||
161 | |||
162 | //r11-r17 |
||
163 | // Define temp space for any usages |
||
164 | |||
165 | |||
166 | // Common Buffers |
||
167 | |||
168 | |||
169 | // temp space for rotation |
||
170 | |||
171 | .declare fROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
172 | |||
173 | .declare udROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
174 | |||
175 | .declare uwROBUF Base=r11.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
176 | |||
177 | .declare ubROBUF Base=r11.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
178 | |||
179 | .declare ub4ROBUF Base=r11.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
180 | |||
181 | |||
182 | // End of common.inc |
||
183 | |||
184 | |||
185 | //Module Name: Set_AVS_Buf_0123_YUVA.asm |
||
186 | |||
187 | |||
188 | |||
189 | // Module Name : Set_Buf_0123_VUYA |
||
190 | |||
191 | |||
192 | |||
193 | |||
194 | // Description: Includes all definitions explicit to Fast Composite. |
||
195 | |||
196 | |||
197 | |||
198 | |||
199 | // End of common.inc |
||
200 | |||
201 | |||
202 | //========== GRF partition ========== |
||
203 | // r0 header : r0 (1 GRF) |
||
204 | // Static parameters : r1 - r6 (6 GRFS) |
||
205 | // Inline parameters : r7 - r8 (2 GRFs) |
||
206 | // MSGSRC : r27 (1 GRF) |
||
207 | //=================================== |
||
208 | |||
209 | //Interface: |
||
210 | //========== Static Parameters (Explicit To Fast Composite) ========== |
||
211 | //r1 |
||
212 | //CSC Set 0 |
||
213 | |||
214 | |||
215 | .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud |
||
216 | |||
217 | //Constant alpha |
||
218 | |||
219 | |||
220 | //r2 |
||
221 | |||
222 | |||
223 | // Gen7 AVS WA |
||
224 | |||
225 | |||
226 | // WiDi Definitions |
||
227 | |||
228 | |||
229 | //Colorfill |
||
230 | |||
231 | |||
232 | // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. |
||
233 | |||
234 | .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub |
||
235 | |||
236 | //r3 |
||
237 | //Normalised Ratio of Horizontal step size with main video for all layers |
||
238 | |||
239 | |||
240 | //Normalised Ratio of Horizontal step size with main video for all layers becomes |
||
241 | //Normalised Horizontal step size for all layers in VP_Setup.asm |
||
242 | |||
243 | |||
244 | //r4 |
||
245 | //Normalised Vertical step size for all layers |
||
246 | |||
247 | |||
248 | //r5 |
||
249 | //Normalised Vertical Frame Origin for all layers |
||
250 | |||
251 | |||
252 | //r6 |
||
253 | //Normalised Horizontal Frame Origin for all layers |
||
254 | |||
255 | |||
256 | //========== Inline Parameters (Explicit To Fast Composite) ========== |
||
257 | |||
258 | |||
259 | //Main video Step X |
||
260 | |||
261 | |||
262 | //====================== Binding table (Explicit To Fast Composite)========================================= |
||
263 | |||
264 | |||
265 | //Used by Interlaced Scaling Kernels |
||
266 | |||
267 | |||
268 | //========== Sampler State Table Index (Explicit To Fast Composite)========== |
||
269 | //Sampler Index for AVS/IEF messages |
||
270 | |||
271 | |||
272 | //Sampler Index for SIMD16 sampler messages |
||
273 | |||
274 | |||
275 | //============================================================================= |
||
276 | |||
277 | .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
278 | .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
279 | .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
280 | .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
281 | .declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
282 | .declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
283 | |||
284 | .declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
285 | .declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
286 | .declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
287 | .declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
288 | .declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
289 | .declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
290 | |||
291 | .declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
292 | .declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
293 | .declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
294 | .declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
295 | .declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
296 | .declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
297 | |||
298 | .declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
299 | .declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
300 | .declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
301 | .declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
302 | .declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
303 | .declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
304 | |||
305 | .declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
306 | .declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
307 | .declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
308 | .declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
309 | .declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
310 | .declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
311 | |||
312 | //Pointer to mask reg |
||
313 | |||
314 | |||
315 | //r18 |
||
316 | |||
317 | |||
318 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
319 | // NODDCLR, NODDCHK flags. -rT |
||
320 | |||
321 | |||
322 | .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF |
||
323 | |||
324 | //r19 |
||
325 | |||
326 | |||
327 | .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF |
||
328 | |||
329 | |||
330 | //r20 |
||
331 | |||
332 | .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
333 | |||
334 | //r21 |
||
335 | |||
336 | .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
337 | |||
338 | //r22 |
||
339 | |||
340 | |||
341 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
342 | // NODDCLR, NODDCHK flags. -rT |
||
343 | |||
344 | |||
345 | //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as |
||
346 | //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT |
||
347 | |||
348 | //r23 |
||
349 | |||
350 | |||
351 | //Lumakey |
||
352 | |||
353 | |||
354 | //r24 |
||
355 | |||
356 | |||
357 | //r25 |
||
358 | |||
359 | |||
360 | //r26 |
||
361 | |||
362 | |||
363 | //defines to generate LABELS during compile time. |
||
364 | |||
365 | |||
366 | //For AVS: We use surface state as R8G8B8A8_UNORM and hence set pointers to VUYA. |
||
367 | //AVS LAYOUT:(VVUUYYAA) |
||
368 | //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V |
||
369 | //V = 0, Y= 8, U = 4, A = 12. |
||
370 | mov (4) acc0.0<1>:w 0x6E2A:v |
||
371 | add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw |
||
372 | shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw |
||
373 | |||
374 | //Used by Shuffle. |
||
375 | //SU LAYOUT:(VUYAVUYA) |
||
376 | //V = 0, Y = 4, U = 2, A = 6 |
||
377 | mov (4) acc0.0<1>:w 0x6240:v |
||
378 | add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw |
||
379 | shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. |
||
380 | |||
381 | //OFFSET: |
||
382 | mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }1>4;4,1>1>4;4,1>1>1>4;4,1>1>4;4,1>1>1>16;16,1>16;16,1>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>0;1,0>4>32;8,4>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1> |
||
383 |