Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6146 | serge | 1 | /* |
2 | * Copyright 2000-2011 Intel Corporation All Rights Reserved |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the |
||
6 | * "Software"), to deal in the Software without restriction, including |
||
7 | * without limitation the rights to use, copy, modify, merge, publish, |
||
8 | * distribute, sub license, and/or sell copies of the Software, and to |
||
9 | * permit persons to whom the Software is furnished to do so, subject to |
||
10 | * the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the |
||
13 | * next paragraph) shall be included in all copies or substantial portions |
||
14 | * of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
19 | * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
20 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
21 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
22 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * This file was originally licensed under the following license |
||
25 | * |
||
26 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
27 | * you may not use this file except in compliance with the License. |
||
28 | * You may obtain a copy of the License at |
||
29 | * |
||
30 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
31 | * |
||
32 | * Unless required by applicable law or agreed to in writing, software |
||
33 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
34 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
35 | * See the License for the specific language governing permissions and |
||
36 | * limitations under the License. |
||
37 | * Authors: |
||
38 | * Zhao Yakui |
||
39 | */ |
||
40 | |||
41 | |||
42 | |||
43 | // Module name: common.inc |
||
44 | // |
||
45 | // Common header file for all Video-Processing kernels |
||
46 | // |
||
47 | |||
48 | .default_execution_size (16) |
||
49 | .default_register_type :ub |
||
50 | |||
51 | .reg_count_total 128 |
||
52 | .reg_count_payload 7 |
||
53 | |||
54 | //========== Common constants ========== |
||
55 | |||
56 | |||
57 | //========== Macros ========== |
||
58 | |||
59 | |||
60 | //Fast Jump, For more details see "Set_Layer_N.asm" |
||
61 | |||
62 | |||
63 | //========== Defines ==================== |
||
64 | |||
65 | //========== Static Parameters (Common To All) ========== |
||
66 | //r1 |
||
67 | |||
68 | |||
69 | //r2 |
||
70 | |||
71 | // e.g. byte0 byte1 byte2 |
||
72 | // YUYV 0 1 3 |
||
73 | // YVYU 0 3 1 |
||
74 | |||
75 | //Color Pipe (IECP) parameters |
||
76 | |||
77 | |||
78 | //ByteCopy |
||
79 | |||
80 | |||
81 | //r4 |
||
82 | |||
83 | // e.g. byte0 byte1 byte2 |
||
84 | // YUYV 0 1 3 |
||
85 | // YVYU 0 3 1 |
||
86 | |||
87 | |||
88 | //========== Inline parameters (Common To All) =========== |
||
89 | |||
90 | |||
91 | //============== Binding Index Table=========== |
||
92 | //Common between DNDI and DNUV |
||
93 | |||
94 | |||
95 | //================= Common Message Descriptor ===== |
||
96 | // Message descriptor for thread spawning |
||
97 | // Message Descriptors |
||
98 | // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) |
||
99 | // 0000,0000,0000 |
||
100 | // 0001(Spawn a root thread),0001 (Root thread spawn thread) |
||
101 | // = 0x02000011 |
||
102 | // Thread Spawner Message Descriptor |
||
103 | |||
104 | |||
105 | // Message descriptor for atomic operation add |
||
106 | // Message Descriptors |
||
107 | // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) |
||
108 | // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) |
||
109 | // 0000,0000 (Binding table index, added later) |
||
110 | // = 0x02000011 |
||
111 | |||
112 | // Atomic Operation Add Message Descriptor |
||
113 | |||
114 | |||
115 | // Message descriptor for dataport media write |
||
116 | // Message Descriptors |
||
117 | // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) |
||
118 | // 1 (header present 1) 0 1010 (media block write) 000000 |
||
119 | // 00000000 (binding table index - set later) |
||
120 | // = 0x020A8000 |
||
121 | |||
122 | |||
123 | // Message Length defines |
||
124 | |||
125 | |||
126 | // Response Length defines |
||
127 | |||
128 | |||
129 | // Block Width and Height Size defines |
||
130 | |||
131 | |||
132 | // Extended Message Descriptors |
||
133 | |||
134 | |||
135 | // Common message descriptors: |
||
136 | |||
137 | |||
138 | //===================== Math Function Control =================================== |
||
139 | |||
140 | |||
141 | //============ Message Registers =============== |
||
142 | // buf4 starts from r28 |
||
143 | |||
144 | |||
145 | //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT |
||
146 | |||
147 | |||
148 | .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
149 | .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
150 | .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
151 | .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f |
||
152 | |||
153 | //=================== End of thread instruction =========================== |
||
154 | |||
155 | |||
156 | //=====================Pointers Used===================================== |
||
157 | |||
158 | |||
159 | //======================================================================= |
||
160 | |||
161 | |||
162 | //r11-r17 |
||
163 | // Define temp space for any usages |
||
164 | |||
165 | |||
166 | // Common Buffers |
||
167 | |||
168 | |||
169 | // temp space for rotation |
||
170 | |||
171 | .declare fROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
172 | |||
173 | .declare udROBUF Base=r11.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
174 | |||
175 | .declare uwROBUF Base=r11.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
176 | |||
177 | .declare ubROBUF Base=r11.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
178 | |||
179 | .declare ub4ROBUF Base=r11.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
180 | |||
181 | |||
182 | // End of common.inc |
||
183 | |||
184 | |||
185 | // Module name: Save_AVS_RGBX.asm |
||
186 | // |
||
187 | // Save packed ARGB 444 frame data block of size 16x16 |
||
188 | // |
||
189 | // To save 16x16 block (64x16 byte layout for ARGB8888) we need 4 send instructions with 32x8 in each |
||
190 | // -------- |
||
191 | // | 0 | 1 | |
||
192 | // | 2 | 3 | |
||
193 | // --------- |
||
194 | // the 4 32x8 block send is used |
||
195 | |||
196 | |||
197 | |||
198 | // Module name: Save.inc |
||
199 | |||
200 | |||
201 | |||
202 | |||
203 | // Description: Includes all definitions explicit to Fast Composite. |
||
204 | |||
205 | |||
206 | |||
207 | |||
208 | // End of common.inc |
||
209 | |||
210 | |||
211 | //========== GRF partition ========== |
||
212 | // r0 header : r0 (1 GRF) |
||
213 | // Static parameters : r1 - r6 (6 GRFS) |
||
214 | // Inline parameters : r7 - r8 (2 GRFs) |
||
215 | // MSGSRC : r27 (1 GRF) |
||
216 | //=================================== |
||
217 | |||
218 | //Interface: |
||
219 | //========== Static Parameters (Explicit To Fast Composite) ========== |
||
220 | //r1 |
||
221 | //CSC Set 0 |
||
222 | |||
223 | |||
224 | .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud |
||
225 | |||
226 | //Constant alpha |
||
227 | |||
228 | |||
229 | //r2 |
||
230 | |||
231 | |||
232 | // Gen7 AVS WA |
||
233 | |||
234 | |||
235 | // WiDi Definitions |
||
236 | |||
237 | |||
238 | //Colorfill |
||
239 | |||
240 | |||
241 | // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. |
||
242 | |||
243 | .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub |
||
244 | |||
245 | //r3 |
||
246 | //Normalised Ratio of Horizontal step size with main video for all layers |
||
247 | |||
248 | |||
249 | //Normalised Ratio of Horizontal step size with main video for all layers becomes |
||
250 | //Normalised Horizontal step size for all layers in VP_Setup.asm |
||
251 | |||
252 | |||
253 | //r4 |
||
254 | //Normalised Vertical step size for all layers |
||
255 | |||
256 | |||
257 | //r5 |
||
258 | //Normalised Vertical Frame Origin for all layers |
||
259 | |||
260 | |||
261 | //r6 |
||
262 | //Normalised Horizontal Frame Origin for all layers |
||
263 | |||
264 | |||
265 | //========== Inline Parameters (Explicit To Fast Composite) ========== |
||
266 | |||
267 | |||
268 | //Main video Step X |
||
269 | |||
270 | |||
271 | //====================== Binding table (Explicit To Fast Composite)========================================= |
||
272 | |||
273 | |||
274 | //Used by Interlaced Scaling Kernels |
||
275 | |||
276 | |||
277 | //========== Sampler State Table Index (Explicit To Fast Composite)========== |
||
278 | //Sampler Index for AVS/IEF messages |
||
279 | |||
280 | |||
281 | //Sampler Index for SIMD16 sampler messages |
||
282 | |||
283 | |||
284 | //============================================================================= |
||
285 | |||
286 | .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
287 | .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
288 | .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
289 | .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
290 | .declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
291 | .declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
292 | |||
293 | .declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
294 | .declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
295 | .declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
296 | .declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
297 | .declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
298 | .declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
299 | |||
300 | .declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
301 | .declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
302 | .declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
303 | .declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
304 | .declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
305 | .declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
306 | |||
307 | .declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
308 | .declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
309 | .declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
310 | .declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
311 | .declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
312 | .declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
313 | |||
314 | .declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
315 | .declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
316 | .declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
317 | .declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
318 | .declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
319 | .declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
320 | |||
321 | //Pointer to mask reg |
||
322 | |||
323 | |||
324 | //r18 |
||
325 | |||
326 | |||
327 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
328 | // NODDCLR, NODDCHK flags. -rT |
||
329 | |||
330 | |||
331 | .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF |
||
332 | |||
333 | //r19 |
||
334 | |||
335 | |||
336 | .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF |
||
337 | |||
338 | |||
339 | //r20 |
||
340 | |||
341 | .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
342 | |||
343 | //r21 |
||
344 | |||
345 | .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
346 | |||
347 | //r22 |
||
348 | |||
349 | |||
350 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
351 | // NODDCLR, NODDCHK flags. -rT |
||
352 | |||
353 | |||
354 | //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as |
||
355 | //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT |
||
356 | |||
357 | //r23 |
||
358 | |||
359 | |||
360 | //Lumakey |
||
361 | |||
362 | |||
363 | //r24 |
||
364 | |||
365 | |||
366 | //r25 |
||
367 | |||
368 | |||
369 | //r26 |
||
370 | |||
371 | |||
372 | //defines to generate LABELS during compile time. |
||
373 | |||
374 | |||
375 | //Msg payload buffers; upto 4 full-size messages can be written |
||
376 | |||
377 | |||
378 | .declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
379 | .declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
380 | .declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
381 | .declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
382 | |||
383 | .declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
384 | .declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
385 | .declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
386 | .declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
387 | |||
388 | .declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
389 | .declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
390 | .declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
391 | .declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
392 | .declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
393 | .declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
394 | .declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
395 | .declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
396 | |||
397 | |||
398 | // the r17 register (nTEMP0) is originally defined from "Common.inc" |
||
399 | // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming |
||
400 | |||
401 | .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw |
||
402 | |||
403 | |||
404 | //_SAVE_INC_ |
||
405 | |||
406 | |||
407 | // At the save module we have all 8 address sub-registers available. |
||
408 | // So we will use PING-PONG type of scheme to save the data using |
||
409 | // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help |
||
410 | // reduce dependency. - rT |
||
411 | |||
412 | //Internal LAYOUT:(RRGGBBAA) |
||
413 | //Assign buffer channel order for Buffer 0123 in the order RGBA a0.3>A, a0.2>B, a0.1>G, a0.0>R |
||
414 | // R = 0, G= 4, B = 8, A = 12. |
||
415 | mov (4) acc0.0<1>:w 0x62EA:v |
||
416 | add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw |
||
417 | shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw |
||
418 | |||
419 | // if channel swap? |
||
420 | // This means that it should be BGRA(B is the LSB) or RGBA |
||
421 | // the internal format is always RGBA(MSB-A-B-G-R). |
||
422 | and.nz.f0.0 null<1>:w r2.3<0;1,0>:uw 0x01:w |
||
423 | |||
424 | //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4. |
||
425 | //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5). |
||
426 | //Offsets are zero for buffer 0 and buffer 4. |
||
427 | add (4) a0.0:uw r22.0<4;4,1>:w 0:uw |
||
428 | |||
429 | // pointer swap |
||
430 | (f0.0) mov (1) uwTemp0<1> a0.0<0;1,0>:uw |
||
431 | (f0.0) mov (1) a0.0<1>:uw a0.2<0;1,0>:uw |
||
432 | (f0.0) mov (1) a0.2<1>:uw uwTemp0<0;1,0> |
||
433 | |||
434 | shl (1) r27.0<1>:d r9.0<0;1,0>:w 2:w { NoDDClr } // H. block origin need to be quadrupled |
||
435 | mov (1) r27.1<1>:d r9.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant) |
||
436 | mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x4) |
||
437 | |||
438 | mov (4) a0.4<1>:uw a0.0<4;4,1>:uw |
||
439 | |||
440 | mov (8) r28<1>:ud r27<8;8,1>:ud |
||
441 | mov (8) r37<1>:ud r27<8;8,1>:ud |
||
442 | mov (8) r46<1>:ud r27<8;8,1>:ud |
||
443 | mov (8) r55<1>:ud r27<8;8,1>:ud |
||
444 | |||
445 | mov (8) r31<1>:ud r27<8;8,1>:ud |
||
446 | mov (8) r40<1>:ud r27<8;8,1>:ud |
||
447 | mov (8) r49<1>:ud r27<8;8,1>:ud |
||
448 | mov (8) r58<1>:ud r27<8;8,1>:ud |
||
449 | |||
450 | //Buffer 0/1 are written by using 4 32x4. |
||
451 | |||
452 | add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d |
||
453 | |||
454 | add (1) r46.1<1>:d r27.1<0;1,0>:d 4:d |
||
455 | |||
456 | add (1) r55.1<1>:d r27.1<0;1,0>:d 4:d |
||
457 | add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d |
||
458 | |||
459 | // write Buf_0 to 1st quarter of four horizontal output blocks |
||
460 | |||
461 | // Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers |
||
462 | // of destination reg are not updated at one place and hence even flags are scattered. -rT |
||
463 | |||
464 | /* for block 0 the left part of buffer 0 and 1 */ |
||
465 | mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub |
||
466 | mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub |
||
467 | mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub |
||
468 | mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub |
||
469 | |||
470 | mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub |
||
471 | mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub |
||
472 | mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub |
||
473 | mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub |
||
474 | |||
475 | mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub |
||
476 | mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub |
||
477 | mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub |
||
478 | mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub |
||
479 | |||
480 | mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub |
||
481 | mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub |
||
482 | mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub |
||
483 | mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub |
||
484 | |||
485 | mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub |
||
486 | mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub |
||
487 | mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub |
||
488 | mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub |
||
489 | |||
490 | mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub |
||
491 | mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub |
||
492 | mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub |
||
493 | mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub |
||
494 | |||
495 | mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub |
||
496 | mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub |
||
497 | mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub |
||
498 | mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub |
||
499 | |||
500 | mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub |
||
501 | mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub |
||
502 | mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub |
||
503 | mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub |
||
504 | |||
505 | /* For Buffer 0 */ |
||
506 | send (16) null<1>:d r28 0x5 0x0A0A8018:ud |
||
507 | send (16) null<1>:d r37 0x5 0x0A0A8018:ud |
||
508 | |||
509 | add (4) a0.0<1>:uw a0.4<4;4,1>:uw 512:uw |
||
510 | mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub |
||
511 | mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub |
||
512 | mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub |
||
513 | mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub |
||
514 | |||
515 | mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub |
||
516 | mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub |
||
517 | mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub |
||
518 | mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub |
||
519 | |||
520 | mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub |
||
521 | mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub |
||
522 | mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub |
||
523 | mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub |
||
524 | |||
525 | mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub |
||
526 | mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub |
||
527 | mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub |
||
528 | mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub |
||
529 | |||
530 | mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub |
||
531 | mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub |
||
532 | mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub |
||
533 | mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub |
||
534 | |||
535 | mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub |
||
536 | mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub |
||
537 | mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub |
||
538 | mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub |
||
539 | |||
540 | mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub |
||
541 | mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub |
||
542 | mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub |
||
543 | mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub |
||
544 | |||
545 | mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub |
||
546 | mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub |
||
547 | mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub |
||
548 | mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub |
||
549 | // send Buffer 1 |
||
550 | send (16) null<1>:d r46 0x5 0x0A0A8018:ud |
||
551 | send (16) null<1>:d r55 0x5 0x0A0A8018:ud |
||
552 | |||
553 | |||
554 | /* for Buffer 2/3 */ |
||
555 | mov (8) r28<1>:ud r27<8;8,1>:ud |
||
556 | mov (8) r37<1>:ud r27<8;8,1>:ud |
||
557 | mov (8) r46<1>:ud r27<8;8,1>:ud |
||
558 | mov (8) r55<1>:ud r27<8;8,1>:ud |
||
559 | |||
560 | add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d |
||
561 | |||
562 | add (1) r37.0<1>:d r27.0<0;1,0>:d 32:d |
||
563 | add (1) r37.1<1>:d r27.1<0;1,0>:d 8:d |
||
564 | |||
565 | add (1) r46.1<1>:d r27.1<0;1,0>:d 12:d |
||
566 | |||
567 | add (1) r55.1<1>:d r27.1<0;1,0>:d 12:d |
||
568 | add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d |
||
569 | |||
570 | add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1024:uw |
||
571 | |||
572 | mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.0, 1]<16;8,2>:ub |
||
573 | mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2>:ub |
||
574 | mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.2, 1]<16;8,2>:ub |
||
575 | mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub |
||
576 | |||
577 | mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.0, 33]<16;8,2>:ub |
||
578 | mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2>:ub |
||
579 | mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.2, 33]<16;8,2>:ub |
||
580 | mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub |
||
581 | |||
582 | mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.0, 17]<16;8,2>:ub |
||
583 | mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.1, 17]<16;8,2>:ub |
||
584 | mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.2, 17]<16;8,2>:ub |
||
585 | mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub |
||
586 | |||
587 | mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.0, 49]<16;8,2>:ub |
||
588 | mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.1, 49]<16;8,2>:ub |
||
589 | mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.2, 49]<16;8,2>:ub |
||
590 | mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub |
||
591 | |||
592 | mov (8) mubMSGPAYLOAD0(2, 0)<4> r[a0.0, 65]<16;8,2>:ub |
||
593 | mov (8) mubMSGPAYLOAD0(2, 1)<4> r[a0.1, 65]<16;8,2>:ub |
||
594 | mov (8) mubMSGPAYLOAD0(2, 2)<4> r[a0.2, 65]<16;8,2>:ub |
||
595 | mov (8) mubMSGPAYLOAD0(2, 3)<4> r2.31:ub |
||
596 | |||
597 | mov (8) mubMSGPAYLOAD0(3, 0)<4> r[a0.0, 97]<16;8,2>:ub |
||
598 | mov (8) mubMSGPAYLOAD0(3, 1)<4> r[a0.1, 97]<16;8,2>:ub |
||
599 | mov (8) mubMSGPAYLOAD0(3, 2)<4> r[a0.2, 97]<16;8,2>:ub |
||
600 | mov (8) mubMSGPAYLOAD0(3, 3)<4> r2.31:ub |
||
601 | |||
602 | mov (8) mubMSGPAYLOAD1(2, 0)<4> r[a0.0, 81]<16;8,2>:ub |
||
603 | mov (8) mubMSGPAYLOAD1(2, 1)<4> r[a0.1, 81]<16;8,2>:ub |
||
604 | mov (8) mubMSGPAYLOAD1(2, 2)<4> r[a0.2, 81]<16;8,2>:ub |
||
605 | mov (8) mubMSGPAYLOAD1(2, 3)<4> r2.31:ub |
||
606 | |||
607 | mov (8) mubMSGPAYLOAD1(3, 0)<4> r[a0.0, 113]<16;8,2>:ub |
||
608 | mov (8) mubMSGPAYLOAD1(3, 1)<4> r[a0.1, 113]<16;8,2>:ub |
||
609 | mov (8) mubMSGPAYLOAD1(3, 2)<4> r[a0.2, 113]<16;8,2>:ub |
||
610 | mov (8) mubMSGPAYLOAD1(3, 3)<4> r2.31:ub |
||
611 | |||
612 | // Send Buffer 2 |
||
613 | send (16) null<1>:d r28 0x5 0x0A0A8018:ud |
||
614 | send (16) null<1>:d r37 0x5 0x0A0A8018:ud |
||
615 | |||
616 | add (4) a0.0<1>:uw a0.4<4;4,1>:uw 1536:uw |
||
617 | mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.0, 1]<16;8,2>:ub |
||
618 | mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 1]<16;8,2>:ub |
||
619 | mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.2, 1]<16;8,2>:ub |
||
620 | mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub |
||
621 | |||
622 | mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.0, 33]<16;8,2>:ub |
||
623 | mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 33]<16;8,2>:ub |
||
624 | mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.2, 33]<16;8,2>:ub |
||
625 | mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub |
||
626 | |||
627 | mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.0, 17]<16;8,2>:ub |
||
628 | mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.1, 17]<16;8,2>:ub |
||
629 | mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.2, 17]<16;8,2>:ub |
||
630 | mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub |
||
631 | |||
632 | mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.0, 49]<16;8,2>:ub |
||
633 | mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.1, 49]<16;8,2>:ub |
||
634 | mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.2, 49]<16;8,2>:ub |
||
635 | mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub |
||
636 | |||
637 | mov (8) mubMSGPAYLOAD2(2, 0)<4> r[a0.0, 65]<16;8,2>:ub |
||
638 | mov (8) mubMSGPAYLOAD2(2, 1)<4> r[a0.1, 65]<16;8,2>:ub |
||
639 | mov (8) mubMSGPAYLOAD2(2, 2)<4> r[a0.2, 65]<16;8,2>:ub |
||
640 | mov (8) mubMSGPAYLOAD2(2, 3)<4> r2.31:ub |
||
641 | |||
642 | mov (8) mubMSGPAYLOAD2(3, 0)<4> r[a0.0, 97]<16;8,2>:ub |
||
643 | mov (8) mubMSGPAYLOAD2(3, 1)<4> r[a0.1, 97]<16;8,2>:ub |
||
644 | mov (8) mubMSGPAYLOAD2(3, 2)<4> r[a0.2, 97]<16;8,2>:ub |
||
645 | mov (8) mubMSGPAYLOAD2(3, 3)<4> r2.31:ub |
||
646 | |||
647 | mov (8) mubMSGPAYLOAD3(2, 0)<4> r[a0.0, 81]<16;8,2>:ub |
||
648 | mov (8) mubMSGPAYLOAD3(2, 1)<4> r[a0.1, 81]<16;8,2>:ub |
||
649 | mov (8) mubMSGPAYLOAD3(2, 2)<4> r[a0.2, 81]<16;8,2>:ub |
||
650 | mov (8) mubMSGPAYLOAD3(2, 3)<4> r2.31:ub |
||
651 | |||
652 | mov (8) mubMSGPAYLOAD3(3, 0)<4> r[a0.0, 113]<16;8,2>:ub |
||
653 | mov (8) mubMSGPAYLOAD3(3, 1)<4> r[a0.1, 113]<16;8,2>:ub |
||
654 | mov (8) mubMSGPAYLOAD3(3, 2)<4> r[a0.2, 113]<16;8,2>:ub |
||
655 | mov (8) mubMSGPAYLOAD3(3, 3)<4> r2.31:ub |
||
656 | // send buffer 3 |
||
657 | send (16) null<1>:d r46 0x5 0x0A0A8018:ud |
||
658 | send (16) null<1>:d r55 0x5 0x0A0A8018:ud1>1>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4;4,1>1>1>1>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4;4,1>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>1>1>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4;4,1>1>1>1>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>4>16;8,2>4>16;8,2>4>16;8,2>4>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>4;4,1>1>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>4;4,1>0;1,0>1>4;4,1>1>4;4,1>1>1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>16;16,1>8;8,1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>0;1,0>4>32;8,4>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1> |
||
659 |