Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6146 serge 1
/*
2
 *  Copyright 2000-2011 Intel Corporation All Rights Reserved
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the
6
 * "Software"), to deal in the Software without restriction, including
7
 * without limitation the rights to use, copy, modify, merge, publish,
8
 * distribute, sub license, and/or sell copies of the Software, and to
9
 * permit persons to whom the Software is furnished to do so, subject to
10
 * the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the
13
 * next paragraph) shall be included in all copies or substantial portions
14
 * of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 * This file was originally licensed under the following license
25
 *
26
 *  Licensed under the Apache License, Version 2.0 (the "License");
27
 *  you may not use this file except in compliance with the License.
28
 *  You may obtain a copy of the License at
29
 *
30
 *      http://www.apache.org/licenses/LICENSE-2.0
31
 *
32
 *  Unless required by applicable law or agreed to in writing, software
33
 *  distributed under the License is distributed on an "AS IS" BASIS,
34
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35
 *  See the License for the specific language governing permissions and
36
 *  limitations under the License.
37
 *  Authors:
38
 *    Zhao Yakui 
39
 */
40
 
41
 
42
 
43
// Module name: common.inc
44
//
45
// Common header file for all Video-Processing kernels
46
//
47
 
48
.default_execution_size (16)
49
.default_register_type  :ub
50
 
51
.reg_count_total        128
52
.reg_count_payload      7
53
 
54
//========== Common constants ==========
55
 
56
 
57
//========== Macros ==========
58
 
59
 
60
//Fast Jump, For more details see "Set_Layer_N.asm"
61
 
62
 
63
//========== Defines ====================
64
 
65
//========== Static Parameters (Common To All) ==========
66
//r1
67
 
68
 
69
//r2
70
 
71
                                    //  e.g.            byte0   byte1  byte2
72
                                    // YUYV               0       1      3
73
                                    // YVYU               0       3      1
74
 
75
//Color Pipe (IECP) parameters
76
 
77
 
78
//ByteCopy
79
 
80
 
81
//r4
82
 
83
                                    //  e.g.              byte0           byte1           byte2
84
                                    // YUYV                 0               1               3
85
                                    // YVYU                 0               3               1
86
 
87
 
88
//========== Inline parameters (Common To All) ===========
89
 
90
 
91
//============== Binding Index Table===========
92
//Common between DNDI and DNUV
93
 
94
 
95
//================= Common Message Descriptor =====
96
// Message descriptor for thread spawning
97
// Message Descriptors
98
//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
99
//                  0000,0000,0000
100
//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
101
//                = 0x02000011
102
// Thread Spawner Message Descriptor
103
 
104
 
105
// Message descriptor for atomic operation add
106
// Message Descriptors
107
//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
108
//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
109
//                  0000,0000 (Binding table index, added later)
110
//                = 0x02000011
111
 
112
// Atomic Operation Add Message Descriptor
113
 
114
 
115
// Message descriptor for dataport media write
116
        // Message Descriptors
117
                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
118
                //                  1 (header present 1) 0 1010 (media block write) 000000
119
                //                  00000000 (binding table index - set later)
120
                //                = 0x020A8000
121
 
122
 
123
// Message Length defines
124
 
125
 
126
// Response Length defines
127
 
128
 
129
// Block Width and Height Size defines
130
 
131
 
132
// Extended Message Descriptors
133
 
134
 
135
// Common message descriptors:
136
 
137
 
138
//===================== Math Function Control ===================================
139
 
140
 
141
//============ Message Registers ===============
142
                             // buf4 starts from r28
143
 
144
 
145
//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
146
 
147
 
148
.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
149
.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
150
.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
151
.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
152
 
153
//=================== End of thread instruction ===========================
154
 
155
 
156
//=====================Pointers Used=====================================
157
 
158
 
159
//=======================================================================
160
 
161
 
162
//r11-r17
163
// Define temp space for any usages
164
 
165
 
166
// Common Buffers
167
 
168
 
169
// temp space for rotation
170
 
171
.declare fROBUF		  Base=r11.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
172
 
173
.declare udROBUF		Base=r11.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
174
 
175
.declare uwROBUF		Base=r11.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
176
 
177
.declare ubROBUF		Base=r11.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
178
 
179
.declare ub4ROBUF 	Base=r11.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
180
 
181
 
182
// End of common.inc
183
 
184
 
185
// Module name: Save_AVS_RGBX.asm
186
//
187
// Save packed ARGB 444 frame data block of size 16x16
188
//
189
// To save 16x16 block (64x16 byte layout for ARGB8888) we need 4 send instructions with 32x8 in each
190
//  --------
191
//  | 0 | 1 |
192
//  | 2 | 3 |
193
//  ---------
194
// the 4 32x8 block send is used
195
 
196
 
197
 
198
// Module name: Save.inc
199
 
200
 
201
 
202
 
203
// Description: Includes all definitions explicit to Fast Composite.
204
 
205
 
206
 
207
 
208
// End of common.inc
209
 
210
 
211
//========== GRF partition ==========
212
     // r0 header            :   r0          (1 GRF)
213
     // Static parameters    :   r1 - r6     (6 GRFS)
214
     // Inline parameters    :   r7 - r8     (2 GRFs)
215
     // MSGSRC               :   r27         (1 GRF)
216
//===================================
217
 
218
//Interface:
219
//========== Static Parameters (Explicit To Fast Composite) ==========
220
//r1
221
//CSC Set 0
222
 
223
 
224
.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
225
 
226
//Constant alpha
227
 
228
 
229
//r2
230
 
231
 
232
// Gen7 AVS WA
233
 
234
 
235
// WiDi Definitions
236
 
237
 
238
//Colorfill
239
 
240
 
241
                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
242
 
243
.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
244
 
245
//r3
246
//Normalised Ratio of Horizontal step size with main video for all layers
247
 
248
 
249
    //Normalised Ratio of Horizontal step size with main video for all layers becomes
250
    //Normalised Horizontal step size for all layers in VP_Setup.asm
251
 
252
 
253
//r4
254
//Normalised Vertical step size for all layers
255
 
256
 
257
//r5
258
//Normalised Vertical Frame Origin for all layers
259
 
260
 
261
//r6
262
//Normalised Horizontal Frame Origin for all layers
263
 
264
 
265
//========== Inline Parameters (Explicit To Fast Composite) ==========
266
 
267
 
268
//Main video Step X
269
 
270
 
271
//====================== Binding table (Explicit To Fast Composite)=========================================
272
 
273
 
274
//Used by Interlaced Scaling Kernels
275
 
276
 
277
//========== Sampler State Table Index (Explicit To Fast Composite)==========
278
//Sampler Index for AVS/IEF messages
279
 
280
 
281
//Sampler Index for SIMD16 sampler messages
282
 
283
 
284
//=============================================================================
285
 
286
.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
287
.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
288
.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
289
.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
290
.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
291
.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
292
 
293
.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
294
.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
295
.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
296
.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
297
.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
298
.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
299
 
300
.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
301
.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
302
.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
303
.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
304
.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
305
.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
306
 
307
.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
308
.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
309
.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
310
.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
311
.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
312
.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
313
 
314
.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
315
.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
316
.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
317
.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
318
.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
319
.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
320
 
321
//Pointer to mask reg
322
 
323
 
324
//r18
325
 
326
 
327
//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
328
// NODDCLR, NODDCHK flags. -rT
329
 
330
 
331
.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
332
 
333
//r19
334
 
335
 
336
.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
337
 
338
 
339
//r20
340
 
341
.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
342
 
343
//r21
344
 
345
.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
346
 
347
//r22
348
 
349
 
350
//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
351
// NODDCLR, NODDCHK flags. -rT
352
 
353
 
354
//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
355
//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
356
 
357
//r23
358
 
359
 
360
//Lumakey
361
 
362
 
363
//r24
364
 
365
 
366
//r25
367
 
368
 
369
//r26
370
 
371
 
372
//defines to generate LABELS during compile time.
373
 
374
 
375
//Msg payload buffers; upto 4 full-size messages can be written
376
 
377
 
378
.declare    mudMSGPAYLOAD0  Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
379
.declare    mudMSGPAYLOAD1  Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
380
.declare    mudMSGPAYLOAD2  Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
381
.declare    mudMSGPAYLOAD3  Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
382
 
383
.declare    muwMSGPAYLOAD0  Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
384
.declare    muwMSGPAYLOAD1  Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
385
.declare    muwMSGPAYLOAD2  Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
386
.declare    muwMSGPAYLOAD3  Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
387
 
388
.declare    mubMSGPAYLOAD0  Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
389
.declare    mubMSGPAYLOAD1  Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
390
.declare    mubMSGPAYLOAD2  Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
391
.declare    mubMSGPAYLOAD3  Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
392
.declare    mubMSGPAYLOAD4  Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
393
.declare    mubMSGPAYLOAD5  Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
394
.declare    mubMSGPAYLOAD6  Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
395
.declare    mubMSGPAYLOAD7  Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
396
 
397
 
398
	// the r17 register (nTEMP0) is originally defined from "Common.inc"
399
	// instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
400
 
401
	.declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
402
 
403
 
404
//_SAVE_INC_
405
 
406
 
407
// At the save module we have all 8 address sub-registers available.
408
// So we will use PING-PONG type of scheme to save the data using
409
// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
410
// reduce dependency. - rT
411
 
412
    //Internal LAYOUT:(RRGGBBAA)
413
        //Assign buffer channel order for Buffer 0123 in the order RGBA a0.3>A, a0.2>B, a0.1>G, a0.0>R
414
        // R = 0, G= 4, B = 8, A = 12.
415
        mov (4) acc0.0<1>:w                 0x62EA:v
416
        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       70:uw
417
        shl (4) r22.0<1>:w       acc0<4;4,1>:w       5:uw
418
 
419
			// if channel swap?
420
			// This means that it should be BGRA(B is the LSB) or RGBA
421
			// the internal format is always RGBA(MSB-A-B-G-R).
422
			and.nz.f0.0	null<1>:w	r2.3<0;1,0>:uw 0x01:w
423
 
424
//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
425
//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
426
//Offsets are zero for buffer 0 and buffer 4.
427
	add   (4)   a0.0:uw   r22.0<4;4,1>:w          0:uw
428
 
429
	// pointer swap
430
	(f0.0)	mov (1)	uwTemp0<1> a0.0<0;1,0>:uw
431
	(f0.0)	mov (1)	a0.0<1>:uw a0.2<0;1,0>:uw
432
	(f0.0)	mov (1)	a0.2<1>:uw uwTemp0<0;1,0>
433
 
434
    shl (1) r27.0<1>:d      r9.0<0;1,0>:w            2:w  			{ NoDDClr }       // H. block origin need to be quadrupled
435
    mov (1) r27.1<1>:d      r9.1<0;1,0>:w                 			{ NoDDClr, NoDDChk }    // Block origin (1st quadrant)
436
    mov (1) r27.2<1>:ud     0x3001F:ud  	{ NoDDChk }       // Block width and height (32x4)
437
 
438
    mov (4)   a0.4<1>:uw   a0.0<4;4,1>:uw
439
 
440
    mov (8) r28<1>:ud      r27<8;8,1>:ud
441
    mov (8) r37<1>:ud      r27<8;8,1>:ud
442
    mov (8) r46<1>:ud      r27<8;8,1>:ud
443
    mov (8) r55<1>:ud      r27<8;8,1>:ud
444
 
445
    mov (8) r31<1>:ud		r27<8;8,1>:ud
446
	mov (8) r40<1>:ud		r27<8;8,1>:ud
447
	mov (8) r49<1>:ud		r27<8;8,1>:ud
448
	mov (8) r58<1>:ud		r27<8;8,1>:ud
449
 
450
//Buffer 0/1 are written by using 4 32x4.
451
 
452
	add (1) r37.0<1>:d     r27.0<0;1,0>:d        32:d
453
 
454
	add (1) r46.1<1>:d     r27.1<0;1,0>:d        4:d
455
 
456
	add (1) r55.1<1>:d     r27.1<0;1,0>:d        4:d
457
	add (1) r55.0<1>:d     r27.0<0;1,0>:d        32:d
458
 
459
    // write Buf_0 to 1st quarter of four horizontal output blocks
460
 
461
// Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers
462
// of destination reg are not updated at one place and hence even flags are scattered. -rT
463
 
464
/* for block 0 the left part of buffer 0 and 1 */
465
    mov (8)    mubMSGPAYLOAD0(0,   0)<4>  r[a0.0,   1]<16;8,2>:ub
466
    mov (8)    mubMSGPAYLOAD0(0,   1)<4>  r[a0.1,   1]<16;8,2>:ub
467
    mov (8)    mubMSGPAYLOAD0(0,   2)<4>  r[a0.2,   1]<16;8,2>:ub
468
    mov (8)    mubMSGPAYLOAD0(0,   3)<4>  r2.31:ub
469
 
470
    mov (8)    mubMSGPAYLOAD0(1,   0)<4>  r[a0.0,   33]<16;8,2>:ub
471
    mov (8)    mubMSGPAYLOAD0(1,   1)<4>  r[a0.1,   33]<16;8,2>:ub
472
    mov (8)    mubMSGPAYLOAD0(1,   2)<4>  r[a0.2,   33]<16;8,2>:ub
473
    mov (8)    mubMSGPAYLOAD0(1,   3)<4>  r2.31:ub
474
 
475
    mov (8)    mubMSGPAYLOAD1(0,   0)<4>  r[a0.0,   17]<16;8,2>:ub
476
    mov (8)    mubMSGPAYLOAD1(0,   1)<4>  r[a0.1,   17]<16;8,2>:ub
477
    mov (8)    mubMSGPAYLOAD1(0,   2)<4>  r[a0.2,   17]<16;8,2>:ub
478
    mov (8)    mubMSGPAYLOAD1(0,   3)<4>  r2.31:ub
479
 
480
    mov (8)    mubMSGPAYLOAD1(1,   0)<4>  r[a0.0,   49]<16;8,2>:ub
481
    mov (8)    mubMSGPAYLOAD1(1,   1)<4>  r[a0.1,   49]<16;8,2>:ub
482
    mov (8)    mubMSGPAYLOAD1(1,   2)<4>  r[a0.2,   49]<16;8,2>:ub
483
    mov (8)    mubMSGPAYLOAD1(1,   3)<4>  r2.31:ub
484
 
485
    mov (8)    mubMSGPAYLOAD0(2,   0)<4>  r[a0.0,   65]<16;8,2>:ub
486
    mov (8)    mubMSGPAYLOAD0(2,   1)<4>  r[a0.1,   65]<16;8,2>:ub
487
    mov (8)    mubMSGPAYLOAD0(2,   2)<4>  r[a0.2,   65]<16;8,2>:ub
488
    mov (8)    mubMSGPAYLOAD0(2,   3)<4>  r2.31:ub
489
 
490
    mov (8)    mubMSGPAYLOAD0(3,   0)<4>  r[a0.0,   97]<16;8,2>:ub
491
    mov (8)    mubMSGPAYLOAD0(3,   1)<4>  r[a0.1,   97]<16;8,2>:ub
492
    mov (8)    mubMSGPAYLOAD0(3,   2)<4>  r[a0.2,   97]<16;8,2>:ub
493
    mov (8)    mubMSGPAYLOAD0(3,   3)<4>  r2.31:ub
494
 
495
    mov (8)    mubMSGPAYLOAD1(2,   0)<4>  r[a0.0,   81]<16;8,2>:ub
496
    mov (8)    mubMSGPAYLOAD1(2,   1)<4>  r[a0.1,   81]<16;8,2>:ub
497
    mov (8)    mubMSGPAYLOAD1(2,   2)<4>  r[a0.2,   81]<16;8,2>:ub
498
    mov (8)    mubMSGPAYLOAD1(2,   3)<4>  r2.31:ub
499
 
500
    mov (8)    mubMSGPAYLOAD1(3,   0)<4>  r[a0.0,   113]<16;8,2>:ub
501
    mov (8)    mubMSGPAYLOAD1(3,   1)<4>  r[a0.1,   113]<16;8,2>:ub
502
    mov (8)    mubMSGPAYLOAD1(3,   2)<4>  r[a0.2,   113]<16;8,2>:ub
503
    mov (8)    mubMSGPAYLOAD1(3,   3)<4>  r2.31:ub
504
 
505
/* For Buffer 0 */
506
    send (16)    null<1>:d    r28   	0x5			0x0A0A8018:ud
507
    send (16)    null<1>:d    r37	0x5			0x0A0A8018:ud
508
 
509
    add (4)    a0.0<1>:uw	a0.4<4;4,1>:uw          512:uw
510
    mov (8)    mubMSGPAYLOAD2(0,   0)<4>  r[a0.0,   1]<16;8,2>:ub
511
    mov (8)    mubMSGPAYLOAD2(0,   1)<4>  r[a0.1,   1]<16;8,2>:ub
512
    mov (8)    mubMSGPAYLOAD2(0,   2)<4>  r[a0.2,   1]<16;8,2>:ub
513
    mov (8)    mubMSGPAYLOAD2(0,   3)<4>  r2.31:ub
514
 
515
    mov (8)    mubMSGPAYLOAD2(1,   0)<4>  r[a0.0,   33]<16;8,2>:ub
516
    mov (8)    mubMSGPAYLOAD2(1,   1)<4>  r[a0.1,   33]<16;8,2>:ub
517
    mov (8)    mubMSGPAYLOAD2(1,   2)<4>  r[a0.2,   33]<16;8,2>:ub
518
    mov (8)    mubMSGPAYLOAD2(1,   3)<4>  r2.31:ub
519
 
520
    mov (8)    mubMSGPAYLOAD3(0,   0)<4>  r[a0.0,   17]<16;8,2>:ub
521
    mov (8)    mubMSGPAYLOAD3(0,   1)<4>  r[a0.1,   17]<16;8,2>:ub
522
    mov (8)    mubMSGPAYLOAD3(0,   2)<4>  r[a0.2,   17]<16;8,2>:ub
523
    mov (8)    mubMSGPAYLOAD3(0,   3)<4>  r2.31:ub
524
 
525
    mov (8)    mubMSGPAYLOAD3(1,   0)<4>  r[a0.0,   49]<16;8,2>:ub
526
    mov (8)    mubMSGPAYLOAD3(1,   1)<4>  r[a0.1,   49]<16;8,2>:ub
527
    mov (8)    mubMSGPAYLOAD3(1,   2)<4>  r[a0.2,   49]<16;8,2>:ub
528
    mov (8)    mubMSGPAYLOAD3(1,   3)<4>  r2.31:ub
529
 
530
    mov (8)    mubMSGPAYLOAD2(2,   0)<4>  r[a0.0,   65]<16;8,2>:ub
531
    mov (8)    mubMSGPAYLOAD2(2,   1)<4>  r[a0.1,   65]<16;8,2>:ub
532
    mov (8)    mubMSGPAYLOAD2(2,   2)<4>  r[a0.2,   65]<16;8,2>:ub
533
    mov (8)    mubMSGPAYLOAD2(2,   3)<4>  r2.31:ub
534
 
535
    mov (8)    mubMSGPAYLOAD2(3,   0)<4>  r[a0.0,   97]<16;8,2>:ub
536
    mov (8)    mubMSGPAYLOAD2(3,   1)<4>  r[a0.1,   97]<16;8,2>:ub
537
    mov (8)    mubMSGPAYLOAD2(3,   2)<4>  r[a0.2,   97]<16;8,2>:ub
538
    mov (8)    mubMSGPAYLOAD2(3,   3)<4>  r2.31:ub
539
 
540
    mov (8)    mubMSGPAYLOAD3(2,   0)<4>  r[a0.0,   81]<16;8,2>:ub
541
    mov (8)    mubMSGPAYLOAD3(2,   1)<4>  r[a0.1,   81]<16;8,2>:ub
542
    mov (8)    mubMSGPAYLOAD3(2,   2)<4>  r[a0.2,   81]<16;8,2>:ub
543
    mov (8)    mubMSGPAYLOAD3(2,   3)<4>  r2.31:ub
544
 
545
    mov (8)    mubMSGPAYLOAD3(3,   0)<4>  r[a0.0,   113]<16;8,2>:ub
546
    mov (8)    mubMSGPAYLOAD3(3,   1)<4>  r[a0.1,   113]<16;8,2>:ub
547
    mov (8)    mubMSGPAYLOAD3(3,   2)<4>  r[a0.2,   113]<16;8,2>:ub
548
    mov (8)    mubMSGPAYLOAD3(3,   3)<4>  r2.31:ub
549
    // send Buffer 1
550
    send (16)    null<1>:d    r46   	0x5			0x0A0A8018:ud
551
    send (16)    null<1>:d    r55	0x5			0x0A0A8018:ud
552
 
553
 
554
/* for Buffer 2/3 */
555
    mov (8) r28<1>:ud      r27<8;8,1>:ud
556
    mov (8) r37<1>:ud      r27<8;8,1>:ud
557
    mov (8) r46<1>:ud      r27<8;8,1>:ud
558
    mov (8) r55<1>:ud      r27<8;8,1>:ud
559
 
560
	add (1) r28.1<1>:d	r27.1<0;1,0>:d		8:d
561
 
562
	add (1) r37.0<1>:d     r27.0<0;1,0>:d        32:d
563
	add (1) r37.1<1>:d     r27.1<0;1,0>:d        8:d
564
 
565
	add (1) r46.1<1>:d     r27.1<0;1,0>:d        12:d
566
 
567
	add (1) r55.1<1>:d     r27.1<0;1,0>:d        12:d
568
	add (1) r55.0<1>:d     r27.0<0;1,0>:d        32:d
569
 
570
    add (4)    a0.0<1>:uw	a0.4<4;4,1>:uw          1024:uw
571
 
572
    mov (8)    mubMSGPAYLOAD0(0,   0)<4>  r[a0.0,   1]<16;8,2>:ub
573
    mov (8)    mubMSGPAYLOAD0(0,   1)<4>  r[a0.1,   1]<16;8,2>:ub
574
    mov (8)    mubMSGPAYLOAD0(0,   2)<4>  r[a0.2,   1]<16;8,2>:ub
575
    mov (8)    mubMSGPAYLOAD0(0,   3)<4>  r2.31:ub
576
 
577
    mov (8)    mubMSGPAYLOAD0(1,   0)<4>  r[a0.0,   33]<16;8,2>:ub
578
    mov (8)    mubMSGPAYLOAD0(1,   1)<4>  r[a0.1,   33]<16;8,2>:ub
579
    mov (8)    mubMSGPAYLOAD0(1,   2)<4>  r[a0.2,   33]<16;8,2>:ub
580
    mov (8)    mubMSGPAYLOAD0(1,   3)<4>  r2.31:ub
581
 
582
    mov (8)    mubMSGPAYLOAD1(0,   0)<4>  r[a0.0,   17]<16;8,2>:ub
583
    mov (8)    mubMSGPAYLOAD1(0,   1)<4>  r[a0.1,   17]<16;8,2>:ub
584
    mov (8)    mubMSGPAYLOAD1(0,   2)<4>  r[a0.2,   17]<16;8,2>:ub
585
    mov (8)    mubMSGPAYLOAD1(0,   3)<4>  r2.31:ub
586
 
587
    mov (8)    mubMSGPAYLOAD1(1,   0)<4>  r[a0.0,   49]<16;8,2>:ub
588
    mov (8)    mubMSGPAYLOAD1(1,   1)<4>  r[a0.1,   49]<16;8,2>:ub
589
    mov (8)    mubMSGPAYLOAD1(1,   2)<4>  r[a0.2,   49]<16;8,2>:ub
590
    mov (8)    mubMSGPAYLOAD1(1,   3)<4>  r2.31:ub
591
 
592
    mov (8)    mubMSGPAYLOAD0(2,   0)<4>  r[a0.0,   65]<16;8,2>:ub
593
    mov (8)    mubMSGPAYLOAD0(2,   1)<4>  r[a0.1,   65]<16;8,2>:ub
594
    mov (8)    mubMSGPAYLOAD0(2,   2)<4>  r[a0.2,   65]<16;8,2>:ub
595
    mov (8)    mubMSGPAYLOAD0(2,   3)<4>  r2.31:ub
596
 
597
    mov (8)    mubMSGPAYLOAD0(3,   0)<4>  r[a0.0,   97]<16;8,2>:ub
598
    mov (8)    mubMSGPAYLOAD0(3,   1)<4>  r[a0.1,   97]<16;8,2>:ub
599
    mov (8)    mubMSGPAYLOAD0(3,   2)<4>  r[a0.2,   97]<16;8,2>:ub
600
    mov (8)    mubMSGPAYLOAD0(3,   3)<4>  r2.31:ub
601
 
602
    mov (8)    mubMSGPAYLOAD1(2,   0)<4>  r[a0.0,   81]<16;8,2>:ub
603
    mov (8)    mubMSGPAYLOAD1(2,   1)<4>  r[a0.1,   81]<16;8,2>:ub
604
    mov (8)    mubMSGPAYLOAD1(2,   2)<4>  r[a0.2,   81]<16;8,2>:ub
605
    mov (8)    mubMSGPAYLOAD1(2,   3)<4>  r2.31:ub
606
 
607
    mov (8)    mubMSGPAYLOAD1(3,   0)<4>  r[a0.0,   113]<16;8,2>:ub
608
    mov (8)    mubMSGPAYLOAD1(3,   1)<4>  r[a0.1,   113]<16;8,2>:ub
609
    mov (8)    mubMSGPAYLOAD1(3,   2)<4>  r[a0.2,   113]<16;8,2>:ub
610
    mov (8)    mubMSGPAYLOAD1(3,   3)<4>  r2.31:ub
611
 
612
// Send Buffer 2
613
    send (16)    null<1>:d    r28   	0x5			0x0A0A8018:ud
614
    send (16)    null<1>:d    r37	0x5			0x0A0A8018:ud
615
 
616
    add (4)    a0.0<1>:uw	a0.4<4;4,1>:uw          1536:uw
617
    mov (8)    mubMSGPAYLOAD2(0,   0)<4>  r[a0.0,   1]<16;8,2>:ub
618
    mov (8)    mubMSGPAYLOAD2(0,   1)<4>  r[a0.1,   1]<16;8,2>:ub
619
    mov (8)    mubMSGPAYLOAD2(0,   2)<4>  r[a0.2,   1]<16;8,2>:ub
620
    mov (8)    mubMSGPAYLOAD2(0,   3)<4>  r2.31:ub
621
 
622
    mov (8)    mubMSGPAYLOAD2(1,   0)<4>  r[a0.0,   33]<16;8,2>:ub
623
    mov (8)    mubMSGPAYLOAD2(1,   1)<4>  r[a0.1,   33]<16;8,2>:ub
624
    mov (8)    mubMSGPAYLOAD2(1,   2)<4>  r[a0.2,   33]<16;8,2>:ub
625
    mov (8)    mubMSGPAYLOAD2(1,   3)<4>  r2.31:ub
626
 
627
    mov (8)    mubMSGPAYLOAD3(0,   0)<4>  r[a0.0,   17]<16;8,2>:ub
628
    mov (8)    mubMSGPAYLOAD3(0,   1)<4>  r[a0.1,   17]<16;8,2>:ub
629
    mov (8)    mubMSGPAYLOAD3(0,   2)<4>  r[a0.2,   17]<16;8,2>:ub
630
    mov (8)    mubMSGPAYLOAD3(0,   3)<4>  r2.31:ub
631
 
632
    mov (8)    mubMSGPAYLOAD3(1,   0)<4>  r[a0.0,   49]<16;8,2>:ub
633
    mov (8)    mubMSGPAYLOAD3(1,   1)<4>  r[a0.1,   49]<16;8,2>:ub
634
    mov (8)    mubMSGPAYLOAD3(1,   2)<4>  r[a0.2,   49]<16;8,2>:ub
635
    mov (8)    mubMSGPAYLOAD3(1,   3)<4>  r2.31:ub
636
 
637
    mov (8)    mubMSGPAYLOAD2(2,   0)<4>  r[a0.0,   65]<16;8,2>:ub
638
    mov (8)    mubMSGPAYLOAD2(2,   1)<4>  r[a0.1,   65]<16;8,2>:ub
639
    mov (8)    mubMSGPAYLOAD2(2,   2)<4>  r[a0.2,   65]<16;8,2>:ub
640
    mov (8)    mubMSGPAYLOAD2(2,   3)<4>  r2.31:ub
641
 
642
    mov (8)    mubMSGPAYLOAD2(3,   0)<4>  r[a0.0,   97]<16;8,2>:ub
643
    mov (8)    mubMSGPAYLOAD2(3,   1)<4>  r[a0.1,   97]<16;8,2>:ub
644
    mov (8)    mubMSGPAYLOAD2(3,   2)<4>  r[a0.2,   97]<16;8,2>:ub
645
    mov (8)    mubMSGPAYLOAD2(3,   3)<4>  r2.31:ub
646
 
647
    mov (8)    mubMSGPAYLOAD3(2,   0)<4>  r[a0.0,   81]<16;8,2>:ub
648
    mov (8)    mubMSGPAYLOAD3(2,   1)<4>  r[a0.1,   81]<16;8,2>:ub
649
    mov (8)    mubMSGPAYLOAD3(2,   2)<4>  r[a0.2,   81]<16;8,2>:ub
650
    mov (8)    mubMSGPAYLOAD3(2,   3)<4>  r2.31:ub
651
 
652
    mov (8)    mubMSGPAYLOAD3(3,   0)<4>  r[a0.0,   113]<16;8,2>:ub
653
    mov (8)    mubMSGPAYLOAD3(3,   1)<4>  r[a0.1,   113]<16;8,2>:ub
654
    mov (8)    mubMSGPAYLOAD3(3,   2)<4>  r[a0.2,   113]<16;8,2>:ub
655
    mov (8)    mubMSGPAYLOAD3(3,   3)<4>  r2.31:ub
656
    // send buffer 3
657
    send (16)    null<1>:d    r46   	0x5			0x0A0A8018:ud
658
    send (16)    null<1>:d    r55	0x5			0x0A0A8018:ud
659