Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5361 | serge | 1 | /* |
2 | * Copyright 2000-2011 Intel Corporation All Rights Reserved |
||
3 | * |
||
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
5 | * you may not use this file except in compliance with the License. |
||
6 | * You may obtain a copy of the License at |
||
7 | * |
||
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
9 | * |
||
10 | * Unless required by applicable law or agreed to in writing, software |
||
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
13 | * See the License for the specific language governing permissions and |
||
14 | * limitations under the License. |
||
15 | */ |
||
16 | /* |
||
17 | * Copyright 2000-2011 Intel Corporation All Rights Reserved |
||
18 | * |
||
19 | * Licensed under the Apache License, Version 2.0 (the "License"); |
||
20 | * you may not use this file except in compliance with the License. |
||
21 | * You may obtain a copy of the License at |
||
22 | * |
||
23 | * http://www.apache.org/licenses/LICENSE-2.0 |
||
24 | * |
||
25 | * Unless required by applicable law or agreed to in writing, software |
||
26 | * distributed under the License is distributed on an "AS IS" BASIS, |
||
27 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||
28 | * See the License for the specific language governing permissions and |
||
29 | * limitations under the License. |
||
30 | */ |
||
31 | |||
32 | // 326 // Total instruction count |
||
33 | // 1 // Total kernel count |
||
34 | |||
35 | |||
36 | |||
37 | // Module name: common.inc |
||
38 | // |
||
39 | // Common header file for all Video-Processing kernels |
||
40 | // |
||
41 | |||
42 | .default_execution_size (16) |
||
43 | .default_register_type :ub |
||
44 | |||
45 | .reg_count_total 128 |
||
46 | .reg_count_payload 7 |
||
47 | |||
48 | //========== Common constants ========== |
||
49 | |||
50 | |||
51 | //========== Macros ========== |
||
52 | |||
53 | |||
54 | //Fast Jump, For more details see "Set_Layer_N.asm" |
||
55 | |||
56 | |||
57 | //========== Defines ==================== |
||
58 | |||
59 | //========== Static Parameters (Common To All) ========== |
||
60 | //r1 |
||
61 | |||
62 | |||
63 | //r2 |
||
64 | |||
65 | // e.g. byte0 byte1 byte2 |
||
66 | // YUYV 0 1 3 |
||
67 | // YVYU 0 3 1 |
||
68 | |||
69 | //Color Pipe (IECP) parameters |
||
70 | |||
71 | |||
72 | //ByteCopy |
||
73 | |||
74 | |||
75 | //r4 |
||
76 | |||
77 | // e.g. byte0 byte1 byte2 |
||
78 | // YUYV 0 1 3 |
||
79 | // YVYU 0 3 1 |
||
80 | |||
81 | |||
82 | //========== Inline parameters (Common To All) =========== |
||
83 | |||
84 | |||
85 | //============== Binding Index Table=========== |
||
86 | //Common between DNDI and DNUV |
||
87 | |||
88 | |||
89 | //================= Common Message Descriptor ===== |
||
90 | // Message descriptor for thread spawning |
||
91 | // Message Descriptors |
||
92 | // = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later) |
||
93 | // 0000,0000,0000 |
||
94 | // 0001(Spawn a root thread),0001 (Root thread spawn thread) |
||
95 | // = 0x02000011 |
||
96 | // Thread Spawner Message Descriptor |
||
97 | |||
98 | |||
99 | // Message descriptor for atomic operation add |
||
100 | // Message Descriptors |
||
101 | // = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later) |
||
102 | // 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add) |
||
103 | // 0000,0000 (Binding table index, added later) |
||
104 | // = 0x02000011 |
||
105 | |||
106 | // Atomic Operation Add Message Descriptor |
||
107 | |||
108 | |||
109 | // Message descriptor for dataport media write |
||
110 | // Message Descriptors |
||
111 | // = 000 0001 (min message len 1 - add later) 00000 (resp len 0) |
||
112 | // 1 (header present 1) 0 1010 (media block write) 000000 |
||
113 | // 00000000 (binding table index - set later) |
||
114 | // = 0x020A8000 |
||
115 | |||
116 | |||
117 | // Message Length defines |
||
118 | |||
119 | |||
120 | // Response Length defines |
||
121 | |||
122 | |||
123 | // Block Width and Height Size defines |
||
124 | |||
125 | |||
126 | // Extended Message Descriptors |
||
127 | |||
128 | |||
129 | // Common message descriptors: |
||
130 | |||
131 | |||
132 | //===================== Math Function Control =================================== |
||
133 | |||
134 | |||
135 | //============ Message Registers =============== |
||
136 | // buf4 starts from r28 |
||
137 | |||
138 | |||
139 | //#define mMSGHDR_EOT r43 // Dummy Message Register for EOT |
||
140 | |||
141 | |||
142 | .declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub |
||
143 | .declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw |
||
144 | .declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud |
||
145 | .declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f |
||
146 | |||
147 | //=================== End of thread instruction =========================== |
||
148 | |||
149 | |||
150 | //=====================Pointers Used===================================== |
||
151 | |||
152 | |||
153 | //======================================================================= |
||
154 | |||
155 | |||
156 | //r9-r17 |
||
157 | // Define temp space for any usages |
||
158 | |||
159 | |||
160 | // Common Buffers |
||
161 | |||
162 | |||
163 | // temp space for rotation |
||
164 | |||
165 | .declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
166 | |||
167 | .declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
168 | |||
169 | .declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
170 | |||
171 | .declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
172 | |||
173 | .declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
174 | |||
175 | |||
176 | // End of common.inc |
||
177 | |||
178 | |||
179 | // FileName: VP_Setup.asm |
||
180 | // Author: Vivek Kumar |
||
181 | // Description: Sets up all parameters for the Video Processing Kernel |
||
182 | |||
183 | |||
184 | |||
185 | |||
186 | // Description: Includes all definitions explicit to Fast Composite. |
||
187 | |||
188 | |||
189 | |||
190 | |||
191 | // End of common.inc |
||
192 | |||
193 | |||
194 | //========== GRF partition ========== |
||
195 | // r0 header : r0 (1 GRF) |
||
196 | // Static parameters : r1 - r6 (6 GRFS) |
||
197 | // Inline parameters : r7 - r8 (2 GRFs) |
||
198 | // MSGSRC : r27 (1 GRF) |
||
199 | //=================================== |
||
200 | |||
201 | //Interface: |
||
202 | //========== Static Parameters (Explicit To Fast Composite) ========== |
||
203 | //r1 |
||
204 | //CSC Set 0 |
||
205 | |||
206 | |||
207 | .declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud |
||
208 | |||
209 | //Constant alpha |
||
210 | |||
211 | |||
212 | //r2 |
||
213 | |||
214 | |||
215 | // Gen7 AVS WA |
||
216 | |||
217 | |||
218 | // WiDi Definitions |
||
219 | |||
220 | |||
221 | //Colorfill |
||
222 | |||
223 | |||
224 | // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise. |
||
225 | |||
226 | .declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub |
||
227 | |||
228 | //r3 |
||
229 | //Normalised Ratio of Horizontal step size with main video for all layers |
||
230 | |||
231 | |||
232 | //Normalised Ratio of Horizontal step size with main video for all layers becomes |
||
233 | //Normalised Horizontal step size for all layers in VP_Setup.asm |
||
234 | |||
235 | |||
236 | //r4 |
||
237 | //Normalised Vertical step size for all layers |
||
238 | |||
239 | |||
240 | //r5 |
||
241 | //Normalised Vertical Frame Origin for all layers |
||
242 | |||
243 | |||
244 | //r6 |
||
245 | //Normalised Horizontal Frame Origin for all layers |
||
246 | |||
247 | |||
248 | //========== Inline Parameters (Explicit To Fast Composite) ========== |
||
249 | |||
250 | |||
251 | //Main video Step X |
||
252 | |||
253 | |||
254 | //====================== Binding table (Explicit To Fast Composite)========================================= |
||
255 | |||
256 | |||
257 | //Used by Interlaced Scaling Kernels |
||
258 | |||
259 | |||
260 | //========== Sampler State Table Index (Explicit To Fast Composite)========== |
||
261 | //Sampler Index for AVS/IEF messages |
||
262 | |||
263 | |||
264 | //Sampler Index for SIMD16 sampler messages |
||
265 | |||
266 | |||
267 | //============================================================================= |
||
268 | |||
269 | .declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
270 | .declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
271 | .declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
272 | .declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
273 | .declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
274 | .declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f |
||
275 | |||
276 | .declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
277 | .declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
278 | .declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
279 | .declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
280 | .declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
281 | .declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud |
||
282 | |||
283 | .declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
284 | .declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
285 | .declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
286 | .declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
287 | .declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
288 | .declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw |
||
289 | |||
290 | .declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
291 | .declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
292 | .declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
293 | .declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
294 | .declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
295 | .declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub |
||
296 | |||
297 | .declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
298 | .declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
299 | .declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
300 | .declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
301 | .declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
302 | .declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub |
||
303 | |||
304 | //Pointer to mask reg |
||
305 | |||
306 | |||
307 | //r18 |
||
308 | |||
309 | |||
310 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
311 | // NODDCLR, NODDCHK flags. -rT |
||
312 | |||
313 | |||
314 | .declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF |
||
315 | |||
316 | //r19 |
||
317 | |||
318 | |||
319 | .declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF |
||
320 | |||
321 | |||
322 | //r20 |
||
323 | |||
324 | .declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
325 | |||
326 | //r21 |
||
327 | |||
328 | .declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF |
||
329 | |||
330 | //r22 |
||
331 | |||
332 | |||
333 | //Always keep Cannel Pointers and Offsets in same GRF, so that we can use |
||
334 | // NODDCLR, NODDCHK flags. -rT |
||
335 | |||
336 | |||
337 | //Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as |
||
338 | //sub registers of same GRF to enable using NODDCLR NODDCHK. -rT |
||
339 | |||
340 | //r23 |
||
341 | |||
342 | |||
343 | //Lumakey |
||
344 | |||
345 | |||
346 | //r24 |
||
347 | |||
348 | |||
349 | //r25 |
||
350 | |||
351 | |||
352 | //r26 |
||
353 | |||
354 | |||
355 | //defines to generate LABELS during compile time. |
||
356 | |||
357 | |||
358 | //Setup pointer to the inline parameter |
||
359 | |||
360 | // Copy MSG HDR |
||
361 | mov (8) r27.0<1>:ud r0.0<8;8,1>:ud // Initialize message payload header with R0 |
||
362 | |||
363 | |||
364 | //temp; remove it once unread msg warnings are resolved -vK |
||
365 | mov (8) r25:ud r0.0<8;8,1>:ud |
||
366 | mov (8) r26:ud r0.0<8;8,1>:ud |
||
367 | |||
368 | // Calculate StepX for all layers and overwrite it on the ratio |
||
369 | mul (8) r3.0<1>:f r3.0<8;8,1>:f r7.4<0;1,0>:f //StepX_ratio = StepX / VideoStepX |
||
370 | |||
371 | //Normalised Ratio of Horizontal step size with main video for all layers now becomes |
||
372 | //Normalised Horizontal step size for all layers |
||
373 | |||
374 | // Calculate block origin for all layers and overwrite it on the frame origin |
||
375 | mov (2) r8.5<1>:f r7.0<2;2,1>:w //Convert origin from word to float |
||
376 | |||
377 | cmp.e.f0.0 (8) null<1>:d r2.26:ub 1:uw |
||
378 | |||
379 | |||
380 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 0:uw |
||
381 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
382 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
383 | (f0.1) jmpi (1) ROTATE_90_L0 |
||
384 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
385 | (f0.1) jmpi (1) ROTATE_180_L0 |
||
386 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
387 | (f0.1) jmpi (1) ROTATE_270_L0 |
||
388 | |||
389 | // rotate 0 degree |
||
390 | ROTATE_0_L0: |
||
391 | (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f |
||
392 | (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.5<0;1,0>:f |
||
393 | |||
394 | mov (1) acc0.0:f r5.0<0;1,0>:f |
||
395 | mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.6<0;1,0>:f |
||
396 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 |
||
397 | |||
398 | // rotate 90 degree |
||
399 | ROTATE_90_L0: |
||
400 | (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f |
||
401 | (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.6<0;1,0>:f |
||
402 | |||
403 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
404 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
405 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
406 | |||
407 | mov (1) acc0.0:f r5.0<0;1,0>:f |
||
408 | mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f |
||
409 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 |
||
410 | |||
411 | // rotate 180 degree |
||
412 | ROTATE_180_L0: |
||
413 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
414 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
415 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
416 | (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f |
||
417 | (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f |
||
418 | |||
419 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
420 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
421 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
422 | mov (1) acc0.0:f r5.0<0;1,0>:f |
||
423 | mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f |
||
424 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0 |
||
425 | |||
426 | // rotate 270 degree |
||
427 | ROTATE_270_L0: |
||
428 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
429 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
430 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
431 | (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f |
||
432 | (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f |
||
433 | |||
434 | mov (1) acc0.0:f r5.0<0;1,0>:f |
||
435 | mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.5<0;1,0>:f |
||
436 | |||
437 | END_SRC_BLOCK_ORIG_COMP_L0: |
||
438 | nop |
||
439 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 2:uw |
||
440 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
441 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
442 | (f0.1) jmpi (1) ROTATE_90_L1 |
||
443 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
444 | (f0.1) jmpi (1) ROTATE_180_L1 |
||
445 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
446 | (f0.1) jmpi (1) ROTATE_270_L1 |
||
447 | |||
448 | // rotate 0 degree |
||
449 | ROTATE_0_L1: |
||
450 | (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f |
||
451 | (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r8.5<0;1,0>:f |
||
452 | |||
453 | mov (1) acc0.1:f r5.1<0;1,0>:f |
||
454 | mac (1) r5.1<1>:f r4.1<0;1,0>:f r8.6<0;1,0>:f |
||
455 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1 |
||
456 | |||
457 | // rotate 90 degree |
||
458 | ROTATE_90_L1: |
||
459 | (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f |
||
460 | (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r8.6<0;1,0>:f |
||
461 | |||
462 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
463 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
464 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
465 | |||
466 | mov (1) acc0.1:f r5.1<0;1,0>:f |
||
467 | mac (1) r5.1<1>:f r4.1<0;1,0>:f r17.0<0;1,0>:f |
||
468 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1 |
||
469 | |||
470 | // rotate 180 degree |
||
471 | ROTATE_180_L1: |
||
472 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
473 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
474 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
475 | (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f |
||
476 | (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r17.0<0;1,0>:f |
||
477 | |||
478 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
479 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
480 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
481 | mov (1) acc0.1:f r5.1<0;1,0>:f |
||
482 | mac (1) r5.1<1>:f r4.1<0;1,0>:f r17.0<0;1,0>:f |
||
483 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1 |
||
484 | |||
485 | // rotate 270 degree |
||
486 | ROTATE_270_L1: |
||
487 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
488 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
489 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
490 | (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f |
||
491 | (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r17.0<0;1,0>:f |
||
492 | |||
493 | mov (1) acc0.1:f r5.1<0;1,0>:f |
||
494 | mac (1) r5.1<1>:f r4.1<0;1,0>:f r8.5<0;1,0>:f |
||
495 | |||
496 | END_SRC_BLOCK_ORIG_COMP_L1: |
||
497 | nop |
||
498 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 4:uw |
||
499 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
500 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
501 | (f0.1) jmpi (1) ROTATE_90_L2 |
||
502 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
503 | (f0.1) jmpi (1) ROTATE_180_L2 |
||
504 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
505 | (f0.1) jmpi (1) ROTATE_270_L2 |
||
506 | |||
507 | // rotate 0 degree |
||
508 | ROTATE_0_L2: |
||
509 | (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f |
||
510 | (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r8.5<0;1,0>:f |
||
511 | |||
512 | mov (1) acc0.2:f r5.2<0;1,0>:f |
||
513 | mac (1) r5.2<1>:f r4.2<0;1,0>:f r8.6<0;1,0>:f |
||
514 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2 |
||
515 | |||
516 | // rotate 90 degree |
||
517 | ROTATE_90_L2: |
||
518 | (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f |
||
519 | (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r8.6<0;1,0>:f |
||
520 | |||
521 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
522 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
523 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
524 | |||
525 | mov (1) acc0.2:f r5.2<0;1,0>:f |
||
526 | mac (1) r5.2<1>:f r4.2<0;1,0>:f r17.0<0;1,0>:f |
||
527 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2 |
||
528 | |||
529 | // rotate 180 degree |
||
530 | ROTATE_180_L2: |
||
531 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
532 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
533 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
534 | (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f |
||
535 | (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r17.0<0;1,0>:f |
||
536 | |||
537 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
538 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
539 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
540 | mov (1) acc0.2:f r5.2<0;1,0>:f |
||
541 | mac (1) r5.2<1>:f r4.2<0;1,0>:f r17.0<0;1,0>:f |
||
542 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2 |
||
543 | |||
544 | // rotate 270 degree |
||
545 | ROTATE_270_L2: |
||
546 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
547 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
548 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
549 | (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f |
||
550 | (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r17.0<0;1,0>:f |
||
551 | |||
552 | mov (1) acc0.2:f r5.2<0;1,0>:f |
||
553 | mac (1) r5.2<1>:f r4.2<0;1,0>:f r8.5<0;1,0>:f |
||
554 | |||
555 | END_SRC_BLOCK_ORIG_COMP_L2: |
||
556 | nop |
||
557 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 6:uw |
||
558 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
559 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
560 | (f0.1) jmpi (1) ROTATE_90_L3 |
||
561 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
562 | (f0.1) jmpi (1) ROTATE_180_L3 |
||
563 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
564 | (f0.1) jmpi (1) ROTATE_270_L3 |
||
565 | |||
566 | // rotate 0 degree |
||
567 | ROTATE_0_L3: |
||
568 | (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f |
||
569 | (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r8.5<0;1,0>:f |
||
570 | |||
571 | mov (1) acc0.3:f r5.3<0;1,0>:f |
||
572 | mac (1) r5.3<1>:f r4.3<0;1,0>:f r8.6<0;1,0>:f |
||
573 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3 |
||
574 | |||
575 | // rotate 90 degree |
||
576 | ROTATE_90_L3: |
||
577 | (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f |
||
578 | (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r8.6<0;1,0>:f |
||
579 | |||
580 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
581 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
582 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
583 | |||
584 | mov (1) acc0.3:f r5.3<0;1,0>:f |
||
585 | mac (1) r5.3<1>:f r4.3<0;1,0>:f r17.0<0;1,0>:f |
||
586 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3 |
||
587 | |||
588 | // rotate 180 degree |
||
589 | ROTATE_180_L3: |
||
590 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
591 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
592 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
593 | (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f |
||
594 | (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r17.0<0;1,0>:f |
||
595 | |||
596 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
597 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
598 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
599 | mov (1) acc0.3:f r5.3<0;1,0>:f |
||
600 | mac (1) r5.3<1>:f r4.3<0;1,0>:f r17.0<0;1,0>:f |
||
601 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3 |
||
602 | |||
603 | // rotate 270 degree |
||
604 | ROTATE_270_L3: |
||
605 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
606 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
607 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
608 | (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f |
||
609 | (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r17.0<0;1,0>:f |
||
610 | |||
611 | mov (1) acc0.3:f r5.3<0;1,0>:f |
||
612 | mac (1) r5.3<1>:f r4.3<0;1,0>:f r8.5<0;1,0>:f |
||
613 | |||
614 | END_SRC_BLOCK_ORIG_COMP_L3: |
||
615 | nop |
||
616 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 8:uw |
||
617 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
618 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
619 | (f0.1) jmpi (1) ROTATE_90_L4 |
||
620 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
621 | (f0.1) jmpi (1) ROTATE_180_L4 |
||
622 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
623 | (f0.1) jmpi (1) ROTATE_270_L4 |
||
624 | |||
625 | // rotate 0 degree |
||
626 | ROTATE_0_L4: |
||
627 | (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f |
||
628 | (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r8.5<0;1,0>:f |
||
629 | |||
630 | mov (1) acc0.4:f r5.4<0;1,0>:f |
||
631 | mac (1) r5.4<1>:f r4.4<0;1,0>:f r8.6<0;1,0>:f |
||
632 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4 |
||
633 | |||
634 | // rotate 90 degree |
||
635 | ROTATE_90_L4: |
||
636 | (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f |
||
637 | (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r8.6<0;1,0>:f |
||
638 | |||
639 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
640 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
641 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
642 | |||
643 | mov (1) acc0.4:f r5.4<0;1,0>:f |
||
644 | mac (1) r5.4<1>:f r4.4<0;1,0>:f r17.0<0;1,0>:f |
||
645 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4 |
||
646 | |||
647 | // rotate 180 degree |
||
648 | ROTATE_180_L4: |
||
649 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
650 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
651 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
652 | (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f |
||
653 | (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r17.0<0;1,0>:f |
||
654 | |||
655 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
656 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
657 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
658 | mov (1) acc0.4:f r5.4<0;1,0>:f |
||
659 | mac (1) r5.4<1>:f r4.4<0;1,0>:f r17.0<0;1,0>:f |
||
660 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4 |
||
661 | |||
662 | // rotate 270 degree |
||
663 | ROTATE_270_L4: |
||
664 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
665 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
666 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
667 | (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f |
||
668 | (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r17.0<0;1,0>:f |
||
669 | |||
670 | mov (1) acc0.4:f r5.4<0;1,0>:f |
||
671 | mac (1) r5.4<1>:f r4.4<0;1,0>:f r8.5<0;1,0>:f |
||
672 | |||
673 | END_SRC_BLOCK_ORIG_COMP_L4: |
||
674 | nop |
||
675 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 10:uw |
||
676 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
677 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
678 | (f0.1) jmpi (1) ROTATE_90_L5 |
||
679 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
680 | (f0.1) jmpi (1) ROTATE_180_L5 |
||
681 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
682 | (f0.1) jmpi (1) ROTATE_270_L5 |
||
683 | |||
684 | // rotate 0 degree |
||
685 | ROTATE_0_L5: |
||
686 | (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f |
||
687 | (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r8.5<0;1,0>:f |
||
688 | |||
689 | mov (1) acc0.5:f r5.5<0;1,0>:f |
||
690 | mac (1) r5.5<1>:f r4.5<0;1,0>:f r8.6<0;1,0>:f |
||
691 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5 |
||
692 | |||
693 | // rotate 90 degree |
||
694 | ROTATE_90_L5: |
||
695 | (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f |
||
696 | (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r8.6<0;1,0>:f |
||
697 | |||
698 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
699 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
700 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
701 | |||
702 | mov (1) acc0.5:f r5.5<0;1,0>:f |
||
703 | mac (1) r5.5<1>:f r4.5<0;1,0>:f r17.0<0;1,0>:f |
||
704 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5 |
||
705 | |||
706 | // rotate 180 degree |
||
707 | ROTATE_180_L5: |
||
708 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
709 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
710 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
711 | (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f |
||
712 | (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r17.0<0;1,0>:f |
||
713 | |||
714 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
715 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
716 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
717 | mov (1) acc0.5:f r5.5<0;1,0>:f |
||
718 | mac (1) r5.5<1>:f r4.5<0;1,0>:f r17.0<0;1,0>:f |
||
719 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5 |
||
720 | |||
721 | // rotate 270 degree |
||
722 | ROTATE_270_L5: |
||
723 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
724 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
725 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
726 | (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f |
||
727 | (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r17.0<0;1,0>:f |
||
728 | |||
729 | mov (1) acc0.5:f r5.5<0;1,0>:f |
||
730 | mac (1) r5.5<1>:f r4.5<0;1,0>:f r8.5<0;1,0>:f |
||
731 | |||
732 | END_SRC_BLOCK_ORIG_COMP_L5: |
||
733 | nop |
||
734 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 12:uw |
||
735 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
736 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
737 | (f0.1) jmpi (1) ROTATE_90_L6 |
||
738 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
739 | (f0.1) jmpi (1) ROTATE_180_L6 |
||
740 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
741 | (f0.1) jmpi (1) ROTATE_270_L6 |
||
742 | |||
743 | // rotate 0 degree |
||
744 | ROTATE_0_L6: |
||
745 | (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f |
||
746 | (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r8.5<0;1,0>:f |
||
747 | |||
748 | mov (1) acc0.6:f r5.6<0;1,0>:f |
||
749 | mac (1) r5.6<1>:f r4.6<0;1,0>:f r8.6<0;1,0>:f |
||
750 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6 |
||
751 | |||
752 | // rotate 90 degree |
||
753 | ROTATE_90_L6: |
||
754 | (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f |
||
755 | (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r8.6<0;1,0>:f |
||
756 | |||
757 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
758 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
759 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
760 | |||
761 | mov (1) acc0.6:f r5.6<0;1,0>:f |
||
762 | mac (1) r5.6<1>:f r4.6<0;1,0>:f r17.0<0;1,0>:f |
||
763 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6 |
||
764 | |||
765 | // rotate 180 degree |
||
766 | ROTATE_180_L6: |
||
767 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
768 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
769 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
770 | (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f |
||
771 | (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r17.0<0;1,0>:f |
||
772 | |||
773 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
774 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
775 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
776 | mov (1) acc0.6:f r5.6<0;1,0>:f |
||
777 | mac (1) r5.6<1>:f r4.6<0;1,0>:f r17.0<0;1,0>:f |
||
778 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6 |
||
779 | |||
780 | // rotate 270 degree |
||
781 | ROTATE_270_L6: |
||
782 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
783 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
784 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
785 | (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f |
||
786 | (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r17.0<0;1,0>:f |
||
787 | |||
788 | mov (1) acc0.6:f r5.6<0;1,0>:f |
||
789 | mac (1) r5.6<1>:f r4.6<0;1,0>:f r8.5<0;1,0>:f |
||
790 | |||
791 | END_SRC_BLOCK_ORIG_COMP_L6: |
||
792 | nop |
||
793 | shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 14:uw |
||
794 | and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw |
||
795 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw |
||
796 | (f0.1) jmpi (1) ROTATE_90_L7 |
||
797 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw |
||
798 | (f0.1) jmpi (1) ROTATE_180_L7 |
||
799 | cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw |
||
800 | (f0.1) jmpi (1) ROTATE_270_L7 |
||
801 | |||
802 | // rotate 0 degree |
||
803 | ROTATE_0_L7: |
||
804 | (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f |
||
805 | (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r8.5<0;1,0>:f |
||
806 | |||
807 | mov (1) acc0.7:f r5.7<0;1,0>:f |
||
808 | mac (1) r5.7<1>:f r4.7<0;1,0>:f r8.6<0;1,0>:f |
||
809 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7 |
||
810 | |||
811 | // rotate 90 degree |
||
812 | ROTATE_90_L7: |
||
813 | (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f |
||
814 | (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r8.6<0;1,0>:f |
||
815 | |||
816 | mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
817 | add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
818 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
819 | |||
820 | mov (1) acc0.7:f r5.7<0;1,0>:f |
||
821 | mac (1) r5.7<1>:f r4.7<0;1,0>:f r17.0<0;1,0>:f |
||
822 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7 |
||
823 | |||
824 | // rotate 180 degree |
||
825 | ROTATE_180_L7: |
||
826 | (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw |
||
827 | (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f |
||
828 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
829 | (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f |
||
830 | (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r17.0<0;1,0>:f |
||
831 | |||
832 | mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
833 | add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
834 | add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
835 | mov (1) acc0.7:f r5.7<0;1,0>:f |
||
836 | mac (1) r5.7<1>:f r4.7<0;1,0>:f r17.0<0;1,0>:f |
||
837 | jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7 |
||
838 | |||
839 | // rotate 270 degree |
||
840 | ROTATE_270_L7: |
||
841 | (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw |
||
842 | (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f |
||
843 | (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f |
||
844 | (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f |
||
845 | (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r17.0<0;1,0>:f |
||
846 | |||
847 | mov (1) acc0.7:f r5.7<0;1,0>:f |
||
848 | mac (1) r5.7<1>:f r4.7<0;1,0>:f r8.5<0;1,0>:f |
||
849 | |||
850 | END_SRC_BLOCK_ORIG_COMP_L7: |
||
851 | nop0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>0;1,0>1>0;1,0>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>0;1,0>1>1>2;2,1>1>0;1,0>8;8,1>1>8;8,1>8;8,1>8;8,1>1>16;16,1>16;16,1>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>4>32;8,4>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>8;8,1>1>0;1,0>4>32;8,4>1>16;16,1>1>16;16,1>1>8;8,1>1>8;8,1>8;8,1>8;8,1>16;16,1>16;16,1> |
||
852 |