Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6146 | serge | 1 | /* |
2 | * Copyright 2000-2013 Intel Corporation All Rights Reserved |
||
3 | * |
||
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
5 | * you may not use this file except in compliance with the License. |
||
6 | * You may obtain a copy of the License at |
||
7 | * |
||
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
9 | * |
||
10 | * Unless required by applicable law or agreed to in writing, software |
||
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
13 | * See the License for the specific language governing permissions and |
||
14 | * limitations under the License. |
||
15 | * |
||
16 | * Authors: Zhao Yakui |
||
17 | */ |
||
18 | // 7 // Total instruction count |
||
19 | // 1 // Total kernel count |
||
20 | |||
21 | |||
22 | |||
23 | // Module name: common.inc |
||
24 | // |
||
25 | // Common header file for all Video-Processing kernels |
||
26 | // |
||
27 | |||
28 | .default_execution_size (16) |
||
29 | .default_register_type :ub |
||
30 | |||
31 | .reg_count_total 128 |
||
32 | .reg_count_payload 7 |
||
33 | |||
34 | //========== Common constants ========== |
||
35 | |||
36 | |||
37 | //========== Macros ========== |
||
38 | |||
39 | |||
40 | //Fast Jump, For more details see "Set_Layer_N.asm" |
||
41 | |||
42 | |||
43 | //========== Defines ==================== |
||
44 | |||
45 | //========== Static Parameters (Common To All) ========== |
||
46 | //r1 |
||
47 | |||
48 | |||
49 | //r2 |
||
50 | |||
51 | // e.g. byte0 byte1 byte2 |
||
52 | // YUYV 0 1 3 |
||
53 | // YVYU 0 3 1 |
||
54 | |||
55 | //Color Pipe (IECP) parameters |
||
56 | |||
57 | |||
58 | //ByteCopy |
||
59 | |||
60 | |||
61 | //r4 |
||
62 | |||
63 | // e.g. byte0 byte1 byte2 |
||
64 | // YUYV 0 1 3 |
||
65 | // YVYU 0 3 1 |
||
66 | |||
67 | |||
68 | //========== Inline parameters (Common To All) =========== |
||
69 | |||
70 | |||
71 | //============== Binding Index Table=========== |
||
72 | //Common between DNDI and DNUV |
||
73 | |||
74 | |||
75 | //================= Common Message Descriptor ===== |
||
76 | // Message descriptor for thread spawning |
||
77 | // Message Descriptors |
||
78 | // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) |
||
79 | // 0000,0000,0000 |
||
80 | // 0001(Spawn a root thread),0001 (Root thread spawn thread) |
||
81 | // = 0x02000011 |
||
82 | // Thread Spawner Message Descriptor |
||
83 | |||
84 | |||
85 | // Message descriptor for atomic operation add |
||
86 | // Message Descriptors |
||
87 | // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) |
||
88 | // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) |
||
89 | // 0000,0000 (Binding table index, added later) |
||
90 | // = 0x02000011 |
||
91 | |||
92 | // Atomic Operation Add Message Descriptor |
||
93 | |||
94 | |||
95 | // Message descriptor for dataport media write |
||
96 | // Message Descriptors |
||
97 | // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) |
||
98 | // 1 (header present 1) 0 1010 (media block write) 000000 |
||
99 | // 00000000 (binding table index - set later) |
||
100 | // = 0x020A8000 |
||
101 | |||
102 | |||
103 | // Message Length defines |
||
104 | |||
105 | |||
106 | // Response Length defines |
||
107 | |||
108 | |||
109 | // Block Width and Height Size defines |
||
110 | |||
111 | |||
112 | // Extended Message Descriptors |
||
113 | |||
114 | |||
115 | // Common message descriptors: |
||
116 | |||
117 | |||
118 | //===================== Math Function Control =================================== |
||
119 | |||
120 | |||
121 | //============ Message Registers =============== |
||
122 | // buf4 starts from r28 |
||
123 | |||
124 | |||
125 | //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT |
||
126 | |||
127 | |||
128 | .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
129 | .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
130 | .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
131 | .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f |
||
132 | |||
133 | //=================== End of thread instruction =========================== |
||
134 | |||
135 | |||
136 | //=====================Pointers Used===================================== |
||
137 | |||
138 | |||
139 | //======================================================================= |
||
140 | |||
141 | |||
142 | //r11-r17 |
||
143 | // Define temp space for any usages |
||
144 | |||
145 | |||
146 | // Common Buffers |
||
147 | |||
148 | |||
149 | // temp space for rotation |
||
150 | |||
151 | .declare fROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
152 | |||
153 | .declare udROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
154 | |||
155 | .declare uwROBUF Base=r11.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
156 | |||
157 | .declare ubROBUF Base=r11.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
158 | |||
159 | .declare ub4ROBUF Base=r11.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
160 | |||
161 | |||
162 | // End of common.inc |
||
163 | |||
164 | |||
165 | //Module Name: Set_AVS_Buf_0123_BGRA.asm |
||
166 | |||
167 | |||
168 | |||
169 | //Module Name: Set_Buf_0123_BGRA |
||
170 | |||
171 | |||
172 | |||
173 | |||
174 | // Description: Includes all definitions explicit to Fast Composite. |
||
175 | |||
176 | |||
177 | |||
178 | |||
179 | // End of common.inc |
||
180 | |||
181 | |||
182 | //========== GRF partition ========== |
||
183 | // r0 header : r0 (1 GRF) |
||
184 | // Static parameters : r1 - r6 (6 GRFS) |
||
185 | // Inline parameters : r7 - r8 (2 GRFs) |
||
186 | // MSGSRC : r27 (1 GRF) |
||
187 | //=================================== |
||
188 | |||
189 | //Interface: |
||
190 | //========== Static Parameters (Explicit To Fast Composite) ========== |
||
191 | //r1 |
||
192 | //CSC Set 0 |
||
193 | |||
194 | |||
195 | .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud |
||
196 | |||
197 | //Constant alpha |
||
198 | |||
199 | |||
200 | //r2 |
||
201 | |||
202 | |||
203 | // Gen7 AVS WA |
||
204 | |||
205 | |||
206 | // WiDi Definitions |
||
207 | |||
208 | |||
209 | //Colorfill |
||
210 | |||
211 | |||
212 | // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. |
||
213 | |||
214 | .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub |
||
215 | |||
216 | //r3 |
||
217 | //Normalised Ratio of Horizontal step size with main video for all layers |
||
218 | |||
219 | |||
220 | //Normalised Ratio of Horizontal step size with main video for all layers becomes |
||
221 | //Normalised Horizontal step size for all layers in VP_Setup.asm |
||
222 | |||
223 | |||
224 | //r4 |
||
225 | //Normalised Vertical step size for all layers |
||
226 | |||
227 | |||
228 | //r5 |
||
229 | //Normalised Vertical Frame Origin for all layers |
||
230 | |||
231 | |||
232 | //r6 |
||
233 | //Normalised Horizontal Frame Origin for all layers |
||
234 | |||
235 | |||
236 | //========== Inline Parameters (Explicit To Fast Composite) ========== |
||
237 | |||
238 | |||
239 | //Main video Step X |
||
240 | |||
241 | |||
242 | //====================== Binding table (Explicit To Fast Composite)========================================= |
||
243 | |||
244 | |||
245 | //Used by Interlaced Scaling Kernels |
||
246 | |||
247 | |||
248 | //========== Sampler State Table Index (Explicit To Fast Composite)========== |
||
249 | //Sampler Index for AVS/IEF messages |
||
250 | |||
251 | |||
252 | //Sampler Index for SIMD16 sampler messages |
||
253 | |||
254 | |||
255 | //============================================================================= |
||
256 | |||
257 | .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
258 | .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
259 | .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
260 | .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
261 | .declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
262 | .declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
263 | |||
264 | .declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
265 | .declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
266 | .declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
267 | .declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
268 | .declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
269 | .declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
270 | |||
271 | .declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
272 | .declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
273 | .declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
274 | .declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
275 | .declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
276 | .declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
277 | |||
278 | .declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
279 | .declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
280 | .declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
281 | .declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
282 | .declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
283 | .declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
284 | |||
285 | .declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
286 | .declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
287 | .declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
288 | .declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
289 | .declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
290 | .declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
291 | |||
292 | //Pointer to mask reg |
||
293 | |||
294 | |||
295 | //r18 |
||
296 | |||
297 | |||
298 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
299 | // NODDCLR, NODDCHK flags. -rT |
||
300 | |||
301 | |||
302 | .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF |
||
303 | |||
304 | //r19 |
||
305 | |||
306 | |||
307 | .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF |
||
308 | |||
309 | |||
310 | //r20 |
||
311 | |||
312 | .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
313 | |||
314 | //r21 |
||
315 | |||
316 | .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
317 | |||
318 | //r22 |
||
319 | |||
320 | |||
321 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
322 | // NODDCLR, NODDCHK flags. -rT |
||
323 | |||
324 | |||
325 | //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as |
||
326 | //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT |
||
327 | |||
328 | //r23 |
||
329 | |||
330 | |||
331 | //Lumakey |
||
332 | |||
333 | |||
334 | //r24 |
||
335 | |||
336 | |||
337 | //r25 |
||
338 | |||
339 | |||
340 | //r26 |
||
341 | |||
342 | |||
343 | //defines to generate LABELS during compile time. |
||
344 | |||
345 | |||
346 | //AVS LAYOUT:(UUYYVVAA) |
||
347 | //AVS RGBX LAYOUT (RRGGBBAA) |
||
348 | //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V |
||
349 | // V = 8, Y= 0, U = 4, A = 12. |
||
350 | // And a0.x is used as indirect-register for RGBX. R=a0.1, G=a0.2, B=a0.0 |
||
351 | // B = 8, R= 0, G = 4, A = 12 |
||
352 | mov (4) acc0.0<1>:w 0x6EA2:v |
||
353 | add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw |
||
354 | shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw |
||
355 | |||
356 | //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT |
||
357 | |||
358 | //SU LAYOUT:(VYUAVYUA) |
||
359 | //V = 4, Y = 2, U = 0, A = 6 |
||
360 | //B = 4, G = 2, R = 0, A = 6 |
||
361 | mov (4) acc0.0<1>:w 0x6204:v |
||
362 | add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw |
||
363 | shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address. |
||
364 | |||
365 | //OFFSET: |
||
366 | mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }1>4;4,1>1>4;4,1>1>1>4;4,1>1>4;4,1>1>1>16;16,1>16;16,1>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>0;1,0>4>32;8,4>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1> |
||
367 |