Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5361 serge 1
/*
2
 *  Copyright 2000-2011 Intel Corporation All Rights Reserved
3
 *
4
 *  Licensed under the Apache License, Version 2.0 (the "License");
5
 *  you may not use this file except in compliance with the License.
6
 *  You may obtain a copy of the License at
7
 *
8
 *      http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 *  Unless required by applicable law or agreed to in writing, software
11
 *  distributed under the License is distributed on an "AS IS" BASIS,
12
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 *  See the License for the specific language governing permissions and
14
 *  limitations under the License.
15
 */
16
//  114    // Total instruction count
17
//    1    // Total kernel count
18
 
19
.kernel PA_DN_422CP
20
.code
21
 
22
 
23
 
24
// FileName:	DN_PA_Core.asm
25
// Author:		Vivek Kumar
26
// Description:	Tasks for DN only case (16x8 block) for Packed format
27
 
28
 
29
 
30
// FileName:	DN.asm
31
// Author:		Vivek Kumar
32
// Description:	Tasks for DN only case (16x8 block)
33
 
34
 
35
 
36
 
37
// Module name: common.inc
38
//
39
// Common header file for all Video-Processing kernels
40
//
41
 
42
.default_execution_size (16)
43
.default_register_type  :ub
44
 
45
.reg_count_total        128
46
.reg_count_payload      7
47
 
48
//========== Common constants ==========
49
 
50
 
51
//========== Macros ==========
52
 
53
 
54
//Fast Jump, For more details see "Set_Layer_N.asm"
55
 
56
 
57
//========== Defines ====================
58
 
59
//========== Static Parameters (Common To All) ==========
60
//r1
61
 
62
 
63
//r2
64
 
65
                                    //  e.g.            byte0   byte1  byte2
66
                                    // YUYV               0       1      3
67
                                    // YVYU               0       3      1
68
 
69
//Color Pipe (IECP) parameters
70
 
71
 
72
//ByteCopy
73
 
74
 
75
//r4
76
 
77
                                    //  e.g.              byte0           byte1           byte2
78
                                    // YUYV                 0               1               3
79
                                    // YVYU                 0               3               1
80
 
81
 
82
//========== Inline parameters (Common To All) ===========
83
 
84
 
85
//============== Binding Index Table===========
86
//Common between DNDI and DNUV
87
 
88
 
89
//================= Common Message Descriptor =====
90
// Message descriptor for thread spawning
91
// Message Descriptors
92
//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
93
//                  0000,0000,0000
94
//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
95
//                = 0x02000011
96
// Thread Spawner Message Descriptor
97
 
98
 
99
// Message descriptor for atomic operation add
100
// Message Descriptors
101
//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
102
//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
103
//                  0000,0000 (Binding table index, added later)
104
//                = 0x02000011
105
 
106
// Atomic Operation Add Message Descriptor
107
 
108
 
109
// Message descriptor for dataport media write
110
        // Message Descriptors
111
                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
112
                //                  1 (header present 1) 0 1010 (media block write) 000000
113
                //                  00000000 (binding table index - set later)
114
                //                = 0x020A8000
115
 
116
 
117
// Message Length defines
118
 
119
 
120
// Response Length defines
121
 
122
 
123
// Block Width and Height Size defines
124
 
125
 
126
// Extended Message Descriptors
127
 
128
 
129
// Common message descriptors:
130
 
131
 
132
//===================== Math Function Control ===================================
133
 
134
 
135
//============ Message Registers ===============
136
                             // buf4 starts from r28
137
 
138
 
139
//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
140
 
141
 
142
.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
143
.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
144
.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
145
.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
146
 
147
//=================== End of thread instruction ===========================
148
 
149
 
150
//=====================Pointers Used=====================================
151
 
152
 
153
//=======================================================================
154
 
155
 
156
//r9-r17
157
// Define temp space for any usages
158
 
159
 
160
// Common Buffers
161
 
162
 
163
// temp space for rotation
164
 
165
.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
166
 
167
.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
168
 
169
.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
170
 
171
.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
172
 
173
.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
174
 
175
 
176
// End of common.inc
177
 
178
 
179
// FileName:    DNDI.inc
180
// Author:      Vivek Kumar
181
// Description: Include file for DN, DI and DNDI
182
// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
183
 
184
 
185
 
186
 
187
// End of common.inc
188
 
189
 
190
//Interface:
191
//Static Parameters:
192
//r1
193
 
194
 
195
//====================== Binding table (Explicit To DNDI)=========================================
196
 
197
 
198
.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
199
.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
200
.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
201
 
202
 
203
.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
204
 
205
 
206
.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
207
 
208
 
209
.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
210
.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
211
.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
212
 
213
 
214
.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
215
.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
216
.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
217
 
218
 
219
.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
220
.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
221
.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
222
.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
223
 
224
 
225
.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
226
.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
227
 
228
 
229
.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
230
.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
231
 
232
//r45
233
//Use r45 as message header, so no need to "mov" the data.
234
 
235
.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
236
 
237
// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
238
.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
239
.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
240
.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
241
 
242
// Message response (UV Copy); Use buffer 5
243
.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
244
.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
245
 
246
//Temp GRFs: For 42X to 422 Conversion
247
.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
248
.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
249
//---------------------------------------------------------------------------
250
// Message descriptors
251
//---------------------------------------------------------------------------
252
// Extended message descriptor
253
    // Message descriptor   for sampler read
254
    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
255
    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
256
    //                      1000 (message type) 0000 (DI state index)
257
    //                      00000000 (binding table index - set later)
258
    //                    = 0x040b8000
259
 
260
 
261
// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
262
 
263
 
264
//---------------------------------------------------------------------------
265
// VDI Return Data format
266
//---------------------------------------------------------------------------
267
// Defines for DI enabled
268
 
269
 
270
// Defines for DI disabled
271
 
272
 
273
 
274
// FileName:	DNDI_Command.asm
275
// Author:		Vivek Kumar
276
// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
277
 
278
// Prepare the DNDI send command
279
mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
280
mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
281
mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
282
 
283
send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x49E8003:ud
284
 
285
// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
286
// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
287
mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15
288
 
289
 
290
 
291
// FileName:	DN_Hist_Save.asm
292
// Author:		Vivek Kumar
293
// Description:	Saves DN history data to statistics surface
294
 
295
// Write denoise history to memory
296
mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header
297
 
298
 
299
	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
300
 
301
 
302
shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
303
add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
304
mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
305
 
306
mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
307
send (8)	null<1>:d	r22	0x5		0x40A8021:ud
308
 
309
 
310
 
311
// FileName:	DNDI_Enc_Stats_Save.asm
312
// Author:		Vivek Kumar
313
// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
314
 
315
// Write encoder statistics to memory
316
//Currently enable this only on Gen6 validation
317
mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
318
mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
319
 
320
shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
321
mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
322
shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
323
mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
324
add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
325
 
326
 
327
	//Data block for Encoder Statistics
328
	//----------------------------------------------------
329
	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
330
	//----------------------------------------------------
331
	//| BNE |   X  |   X   |  X  |           X           |
332
	//----------------------------------------------------
333
	//|     X      |     SVCM    |           X           |
334
	//----------------------------------------------------
335
	//|   SHCM     |     STAD    |           X           |
336
	//----------------------------------------------------
337
	//|            X             |           X           |
338
	//----------------------------------------------------
339
	//|     X      |     SVCM    |           X           |
340
	//----------------------------------------------------
341
	//|   SHCM     |     STAD    |           X           |
342
	//----------------------------------------------------
343
	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
344
	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
345
	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
346
	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
347
	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
348
 
349
 
350
send (8)   null<1>:d    r24    0x5    0x40A8021:ud
351
 
352
 
353
 
354
// FileName:	DN_Save_PA.asm
355
// Author:		Vivek Kumar
356
// Description:	Save one 16x8 blocks of DN output in Packed format
357
 
358
 
359
add (4)		a0.4<1>:uw   r2.28<4;4,1>:ub   1024:w    // Initial Y,U,V offset in YUV422 block; it starts at m14
360
 
361
mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            					// message header
362
shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }     // X origin * 2 (422 output)
363
mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }  // Y origin
364
mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7001F:ud	{ NoDDChk }     // block width and height (32x8)
365
 
366
	mov (16)    r[a0.4,0]<2>:ub   ubDNDI_RESP(0,0)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
367
	mov (16)    r[a0.4,32]<2>:ub   ubDNDI_RESP(0,16)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
368
	mov (16)    r[a0.4,64]<2>:ub   ubDNDI_RESP(0,32)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
369
	mov (16)    r[a0.4,96]<2>:ub   ubDNDI_RESP(0,48)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
370
	mov (16)    r[a0.4,128]<2>:ub   ubDNDI_RESP(0,64)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
371
	mov (16)    r[a0.4,160]<2>:ub   ubDNDI_RESP(0,80)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
372
	mov (16)    r[a0.4,192]<2>:ub   ubDNDI_RESP(0,96)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
373
	mov (16)    r[a0.4,224]<2>:ub   ubDNDI_RESP(0,112)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
374
	mov (8)     r[a0.5,0]<4>:ub   ubDNDI_RESP(5,1)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
375
	mov (8)     r[a0.6,0]<4>:ub   ubDNDI_RESP(5,0)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
376
	mov (8)     r[a0.5,32]<4>:ub   ubDNDI_RESP(5,17)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
377
	mov (8)     r[a0.6,32]<4>:ub   ubDNDI_RESP(5,16)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
378
	mov (8)     r[a0.5,64]<4>:ub   ubDNDI_RESP(5,33)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
379
	mov (8)     r[a0.6,64]<4>:ub   ubDNDI_RESP(5,32)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
380
	mov (8)     r[a0.5,96]<4>:ub   ubDNDI_RESP(5,49)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
381
	mov (8)     r[a0.6,96]<4>:ub   ubDNDI_RESP(5,48)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
382
	mov (8)     r[a0.5,128]<4>:ub   ubDNDI_RESP(5,65)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
383
	mov (8)     r[a0.6,128]<4>:ub   ubDNDI_RESP(5,64)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
384
	mov (8)     r[a0.5,160]<4>:ub   ubDNDI_RESP(5,81)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
385
	mov (8)     r[a0.6,160]<4>:ub   ubDNDI_RESP(5,80)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
386
	mov (8)     r[a0.5,192]<4>:ub   ubDNDI_RESP(5,97)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
387
	mov (8)     r[a0.6,192]<4>:ub   ubDNDI_RESP(5,96)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
388
	mov (8)     r[a0.5,224]<4>:ub   ubDNDI_RESP(5,113)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
389
	mov (8)     r[a0.6,224]<4>:ub   ubDNDI_RESP(5,112)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
390
 
391
//send out data through data port
392
send (8)    null<1>:d    r31.0		0x5    0x120A8018:ud
393
 
394
 
395
 
396
// FileName:	DN_Save_422CP_16x8.asm
397
// Author:		Vivek Kumar
398
// Description:	Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
399
 
400
 
401
.declare mubMSGHDR_DN_OUT_2   Base=r36.0      ElementSize=1  Type=ub
402
 
403
 
404
mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            			// message header
405
shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }            // X origin * 2 (422 output)
406
mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }   // Y origin
407
mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7000F:ud	{ NoDDClr, NoDDChk }            // block width and height (16x8)
408
 
409
//M0.3	- 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
410
//Compose area-of-interest bit + color pipe state pointer
411
or (1)		mudMSGHDR_DN_OUT(0,3)<1>		r2.4<0;1,0>:ud 	r7.26<0;1,0>:b		{ NoDDChk }
412
 
413
// First 8 x 8 Block
414
	mov (8)		mubMSGHDR_DN_OUT(1)<2>			ubDNDI_RESP(0,0)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
415
	mov (8)		mubMSGHDR_DN_OUT(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
416
	mov (8)		mubMSGHDR_DN_OUT(2)<2>			ubDNDI_RESP(0,32)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
417
	mov (8)		mubMSGHDR_DN_OUT(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
418
	mov (8)		mubMSGHDR_DN_OUT(3)<2>			ubDNDI_RESP(0,64)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
419
	mov (8)		mubMSGHDR_DN_OUT(3,16)<2>		ubDNDI_RESP(0,80)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
420
	mov (8)		mubMSGHDR_DN_OUT(4)<2>			ubDNDI_RESP(0,96)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
421
	mov (8)		mubMSGHDR_DN_OUT(4,16)<2>		ubDNDI_RESP(0,112)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
422
 
423
	mov (4)     mubMSGHDR_DN_OUT(1,1)<4>   	ubDNDI_RESP(5,1)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
424
	mov (4)     mubMSGHDR_DN_OUT(1,17)<4>   	ubDNDI_RESP(5,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
425
 
426
	mov (4)     mubMSGHDR_DN_OUT(1,3)<4>   	ubDNDI_RESP(5,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
427
	mov (4)     mubMSGHDR_DN_OUT(1,19)<4>   	ubDNDI_RESP(5,16)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
428
	mov (4)     mubMSGHDR_DN_OUT(2,1)<4>   	ubDNDI_RESP(5,33)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
429
	mov (4)     mubMSGHDR_DN_OUT(2,17)<4>   	ubDNDI_RESP(5,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
430
 
431
	mov (4)     mubMSGHDR_DN_OUT(2,3)<4>   	ubDNDI_RESP(5,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
432
	mov (4)     mubMSGHDR_DN_OUT(2,19)<4>   	ubDNDI_RESP(5,48)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
433
	mov (4)     mubMSGHDR_DN_OUT(3,1)<4>   	ubDNDI_RESP(5,65)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
434
	mov (4)     mubMSGHDR_DN_OUT(3,17)<4>   	ubDNDI_RESP(5,81)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
435
 
436
	mov (4)     mubMSGHDR_DN_OUT(3,3)<4>   	ubDNDI_RESP(5,64)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
437
	mov (4)     mubMSGHDR_DN_OUT(3,19)<4>   	ubDNDI_RESP(5,80)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
438
	mov (4)     mubMSGHDR_DN_OUT(4,1)<4>   	ubDNDI_RESP(5,97)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
439
	mov (4)     mubMSGHDR_DN_OUT(4,17)<4>   	ubDNDI_RESP(5,113)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
440
 
441
	mov (4)     mubMSGHDR_DN_OUT(4,3)<4>   	ubDNDI_RESP(5,96)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
442
	mov (4)     mubMSGHDR_DN_OUT(4,19)<4>   	ubDNDI_RESP(5,112)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
443
 
444
// Second 8 x 8 Block
445
mov	(8)	r36.0<1>:ud		r31.0<8;8,1>:ud
446
add	(1)	r36.0<1>:ud		r36.0<0;1,0>:w		0x10:w
447
 
448
	mov (8)		mubMSGHDR_DN_OUT_2(1)<2>		ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
449
	mov (8)		mubMSGHDR_DN_OUT_2(1,16)<2>	ubDNDI_RESP(0,24)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
450
	mov (8)		mubMSGHDR_DN_OUT_2(2)<2>		ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
451
	mov (8)		mubMSGHDR_DN_OUT_2(2,16)<2>	ubDNDI_RESP(0,56)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
452
	mov (8)		mubMSGHDR_DN_OUT_2(3)<2>		ubDNDI_RESP(0,72)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
453
	mov (8)		mubMSGHDR_DN_OUT_2(3,16)<2>	ubDNDI_RESP(0,88)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
454
	mov (8)		mubMSGHDR_DN_OUT_2(4)<2>		ubDNDI_RESP(0,104)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
455
	mov (8)		mubMSGHDR_DN_OUT_2(4,16)<2>	ubDNDI_RESP(0,120)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
456
 
457
	mov (4)     mubMSGHDR_DN_OUT_2(1,1)<4>   	ubDNDI_RESP(5,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
458
	mov (4)     mubMSGHDR_DN_OUT_2(1,17)<4>   	ubDNDI_RESP(5,25)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
459
 
460
	mov (4)     mubMSGHDR_DN_OUT_2(1,3)<4>   	ubDNDI_RESP(5,8)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
461
	mov (4)     mubMSGHDR_DN_OUT_2(1,19)<4>   	ubDNDI_RESP(5,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
462
	mov (4)     mubMSGHDR_DN_OUT_2(2,1)<4>   	ubDNDI_RESP(5,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
463
	mov (4)     mubMSGHDR_DN_OUT_2(2,17)<4>   	ubDNDI_RESP(5,57)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
464
 
465
	mov (4)     mubMSGHDR_DN_OUT_2(2,3)<4>   	ubDNDI_RESP(5,40)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
466
	mov (4)     mubMSGHDR_DN_OUT_2(2,19)<4>   	ubDNDI_RESP(5,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
467
	mov (4)     mubMSGHDR_DN_OUT_2(3,1)<4>   	ubDNDI_RESP(5,73)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
468
	mov (4)     mubMSGHDR_DN_OUT_2(3,17)<4>   	ubDNDI_RESP(5,89)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
469
 
470
	mov (4)     mubMSGHDR_DN_OUT_2(3,3)<4>   	ubDNDI_RESP(5,72)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
471
	mov (4)     mubMSGHDR_DN_OUT_2(3,19)<4>   	ubDNDI_RESP(5,88)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
472
	mov (4)     mubMSGHDR_DN_OUT_2(4,1)<4>   	ubDNDI_RESP(5,105)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
473
	mov (4)     mubMSGHDR_DN_OUT_2(4,17)<4>   	ubDNDI_RESP(5,121)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
474
 
475
	mov (4)     mubMSGHDR_DN_OUT_2(4,3)<4>   	ubDNDI_RESP(5,104)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
476
	mov (4)     mubMSGHDR_DN_OUT_2(4,19)<4>   	ubDNDI_RESP(5,120)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
477
 
478
//send out data through data port
479
send (8)    null<1>:d    r31.0		0x5    0xA0A801B:ud
480
send (8)    null<1>:d    r36.0	0x5    0xA0A801B:ud
481
 
482
 
483
 
484
//End of Thread message
485
 
486
mov (8) r127<1>:ud r0.0<8;8,1>:ud
487
 send (1) null<1>:d r127 0x27 0x02000010
488
 
489
 
490
.end_code
491
.end_kernel