Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3769 Serge 1
/*
2
 *  Copyright 2000-2011 Intel Corporation All Rights Reserved
3
 *
4
 *  Licensed under the Apache License, Version 2.0 (the "License");
5
 *  you may not use this file except in compliance with the License.
6
 *  You may obtain a copy of the License at
7
 *
8
 *      http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 *  Unless required by applicable law or agreed to in writing, software
11
 *  distributed under the License is distributed on an "AS IS" BASIS,
12
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 *  See the License for the specific language governing permissions and
14
 *  limitations under the License.
15
 */
16
// 1153    // Total instruction count
17
//    1    // Total kernel count
18
 
19
 
20
.kernel NV12_DNUV_NV12
21
.code
22
 
23
 
24
 
25
//Module		: DN_UV_Setup
26
//Author		: Tatiya, Rupesh
27
//Description	: Initial Set-up for DN_UV
28
 
29
 
30
 
31
 
32
// Module name	: ChromaDenoise.inc
33
// Author		: Tatiya, Rupesh
34
 
35
 
36
 
37
 
38
// Module name: common.inc
39
//
40
// Common header file for all Video-Processing kernels
41
//
42
 
43
.default_execution_size (16)
44
.default_register_type  :ub
45
 
46
.reg_count_total        128
47
.reg_count_payload      7
48
 
49
//========== Common constants ==========
50
 
51
 
52
//========== Macros ==========
53
 
54
 
55
//Fast Jump, For more details see "Set_Layer_N.asm"
56
 
57
 
58
//========== Defines ====================
59
 
60
//========== Static Parameters (Common To All) ==========
61
//r1
62
 
63
 
64
//r2
65
 
66
                                    //  e.g.            byte0   byte1  byte2
67
                                    // YUYV               0       1      3
68
                                    // YVYU               0       3      1
69
 
70
//Color Pipe (IECP) parameters
71
 
72
 
73
//ByteCopy
74
 
75
 
76
//r4
77
 
78
                                    //  e.g.              byte0           byte1           byte2
79
                                    // YUYV                 0               1               3
80
                                    // YVYU                 0               3               1
81
 
82
 
83
//========== Inline parameters (Common To All) ===========
84
 
85
 
86
//============== Binding Index Table===========
87
//Common between DNDI and DNUV
88
 
89
 
90
//================= Common Message Descriptor =====
91
// Message descriptor for thread spawning
92
// Message Descriptors
93
//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
94
//                  0000,0000,0000
95
//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
96
//                = 0x02000011
97
// Thread Spawner Message Descriptor
98
 
99
 
100
// Message descriptor for atomic operation add
101
// Message Descriptors
102
//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
103
//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
104
//                  0000,0000 (Binding table index, added later)
105
//                = 0x02000011
106
 
107
// Atomic Operation Add Message Descriptor
108
 
109
 
110
// Message descriptor for dataport media write
111
        // Message Descriptors
112
                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
113
                //                  1 (header present 1) 0 1010 (media block write) 000000
114
                //                  00000000 (binding table index - set later)
115
                //                = 0x020A8000
116
 
117
 
118
// Message Length defines
119
 
120
 
121
// Response Length defines
122
 
123
 
124
// Block Width and Height Size defines
125
 
126
 
127
// Extended Message Descriptors
128
 
129
 
130
// Common message descriptors:
131
 
132
 
133
//===================== Math Function Control ===================================
134
 
135
 
136
//============ Message Registers ===============
137
                             // buf4 starts from r28
138
 
139
 
140
//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
141
 
142
 
143
.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
144
.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
145
.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
146
.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
147
 
148
//=================== End of thread instruction ===========================
149
 
150
 
151
//=====================Pointers Used=====================================
152
 
153
 
154
//=======================================================================
155
 
156
 
157
//r9-r17
158
// Define temp space for any usages
159
 
160
 
161
// Common Buffers
162
 
163
 
164
// temp space for rotation
165
 
166
.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
167
 
168
.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
169
 
170
.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
171
 
172
.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
173
 
174
.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
175
 
176
 
177
// End of common.inc
178
 
179
 
180
//Interface:
181
//Static Parameters:
182
//r1
183
 
184
 
185
//======================================================
186
//Interface for serpent mode Chroma Denoise, added by Le
187
//======================================================
188
//r1
189
 
190
 
191
//noise history thresholds (low and high)
192
 
193
 
194
//temporal difference thresholds (high and low)
195
 
196
 
197
//noise history thresholds (low and high)
198
//#define ubNoiseHistMaxHigh  r1.22
199
//#define ubNoiseHistMaxLow  r1.23
200
//#define ubNoiseHistDeltaHigh  r1.24
201
//#define ubNoiseHistDeltaLow  r1.25
202
 
203
//Gaussian thresholds
204
 
205
 
206
//temporal difference thresholds (default)
207
 
208
 
209
//r2
210
//history thresholds (default)
211
 
212
 
213
//denoise factor  (0-63)
214
 
215
 
216
//====================== Binding table (Explicit To DNUV)=========================================
217
//Used by DN_UV kernels
218
 
219
 
220
	//Pointer to Current Frame UV
221
 
222
 
223
//r1-r6
224
	//CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
225
 
226
 
227
	.declare	ubCURBE_TEMP	Base=r1.0	ElementSize=1	Type=ub
228
	.declare	uwCURBE_TEMP	Base=r1.0	ElementSize=2	Type=uw
229
	.declare	wCURBE_TEMP		Base=r1.0	ElementSize=2	Type=w
230
	.declare	fCURBE_TEMP		Base=r1.0	ElementSize=4	Type=f
231
	.declare	udCURBE_TEMP		Base=r1.0	ElementSize=4	Type=ud
232
	.declare	uwMAX_ABS_DIFF	Base=r5.0	ElementSize=2	Type=uw
233
 
234
	//r1
235
 
236
 
237
	//r3
238
 
239
 
240
    //r4
241
 
242
//r7
243
	//All of the following has to defined in Same GRF for optimal performance.
244
 
245
 
246
//r8-24
247
    //Previous Frame UV
248
 
249
	.declare	udPREV_UV		Base=r8.0	ElementSize=4	Type=ud
250
	.declare	ubPREV_UV		Base=r8.0	ElementSize=1	Type=ub
251
 
252
 
253
//r25-48
254
	//TEMP Space for any Usage.
255
 
256
 
257
//=========================================================================
258
//Definations and declarations for serpent mode Chroma Denoise, added by Le
259
//=========================================================================
260
 
261
 
262
	.declare	udGNE_UV		Base=r24.0	ElementSize=4	Type=ud
263
  .declare	fGNE_UV		Base=r24.0	ElementSize=4	Type=f
264
  .declare	ubGNE_UV		Base=r24.0	ElementSize=1	Type=ub
265
 
266
  .declare	udMSGHDR_BNE_SERP	Base=r25.0	ElementSize=4	Type=ud
267
  .declare	udMSGSRC_BNE_SERP	Base=r26.0	ElementSize=4	Type=ud
268
 
269
 
270
  .declare	ubDN_UV_Thresholds Base=r26.0	ElementSize=1	Type=ub
271
  .declare	ubDN_UV_Thresholds_Temp  Base=r27.0	ElementSize=1	Type=ub
272
  .declare	udDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=ud
273
  .declare	udDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=ud
274
  .declare	fDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=f
275
  .declare	fDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=f
276
 
277
 
278
//====================================================================================
279
 
280
 
281
	//TEMP23: To hold V data for PL3 surfaces
282
	.declare	udCURR_V_TEMP	Base=r25.0	ElementSize=4	Type=ud
283
	.declare	ubCURR_V_TEMP	Base=r25.0	ElementSize=1	Type=ub
284
 
285
	//GRFs to calculate Median: r25-r42
286
	.declare	ubMEDIAN_TEMP	Base=r25.0	ElementSize=1	Type=ub
287
 
288
	//18 GRFs to hold difference : r25-r42
289
	.declare	wDIFF			Base=r25.0	ElementSize=2	Type=w
290
	.declare	uwDIFF			Base=r25.0	ElementSize=2	Type=uw
291
 
292
	//Temporal Diff
293
	.declare	wDIFF_TEMPORAL			Base=r25.0	ElementSize=2	Type=w
294
	.declare	ubDIFF_TEMPORAL			Base=r25.0	ElementSize=1	Type=ub
295
 
296
	//4 GRFs to hold Sobel Value : r43-46
297
	.declare	wSOBEL_X	Base=r43.0	ElementSize=2	Type=w
298
	.declare	uwSOBEL		Base=r43.0	ElementSize=2	Type=uw
299
 
300
 
301
	//2 GRFs to hold SOAD temporarily: r47-48
302
	.declare	uwSOAD			Base=r47.0	ElementSize=2	Type=uw
303
 
304
	//Temp GRFs to hold extra YUYV pixels: r43-r48
305
	.declare	ubTEMP5			Base=r43.0	ElementSize=1	Type=ub
306
 
307
	//Temp GRFs in Median Calculation: r47-r48
308
	.declare	ubTEMP1			Base=r47.0	ElementSize=1	Type=ub
309
 
310
	.declare	uwTEMP0			Base=r48.0	ElementSize=2	Type=uw
311
	.declare	ubTEMP0			Base=r48.0	ElementSize=1	Type=ub
312
 
313
	//Temp Space to store Median : r49-50
314
 
315
	.declare	ubMEDIAN	Base=r49.0	ElementSize=1	Type=ub
316
 
317
//r49
318
 
319
 
320
//r50
321
    //Message Source
322
 
323
 
324
//r51
325
	//DN_UV History Surface
326
 
327
	.declare	udHIST_UV		Base=r51.0	ElementSize=4	Type=ud
328
	.declare	ubHIST_UV		Base=r51.0	ElementSize=1	Type=ub
329
 
330
//r52 - r91
331
	//r52
332
	//Current Frame UV
333
 
334
 
335
	.declare	udCURR_UV		Base=r52.0	ElementSize=4	Type=ud
336
	.declare	ubCURR_UV		Base=r52.0	ElementSize=1	Type=ub
337
 
338
	//r54
339
	//CURBE COPY
340
 
341
 
342
	//r55
343
 
344
 
345
	.declare 	uwSOAD_MIN_8x4		Base=r56.0	ElementSize=2	Type=uw
346
 
347
	//r61
348
 
349
 
350
	//r62
351
 
352
 
353
	//History Surface Temp Origin
354
 
355
 
356
    //r63
357
    //Current Frame Y Temp Origin
358
 
359
 
360
	//BNE Surface Origin
361
 
362
 
363
    //r70
364
 
365
	.declare	uwDIFF_TEMPORAL_SUM4x4	Base=r70.0	ElementSize=2	Type=uw  //4 GRFs
366
 
367
	//r74-91 : For Saving Dest UV (PL2/PL3)
368
 
369
 
370
	.declare	ubMSGPAYLOAD_UV0	Base=r75.0	ElementSize=1	Type=ub
371
 
372
 
373
	.declare	ubMSGPAYLOAD_U		Base=r75.0	ElementSize=1	Type=ub
374
 
375
 
376
	.declare	ubMSGPAYLOAD_UV1	Base=r84.0	ElementSize=1	Type=ub
377
 
378
 
379
	.declare	ubMSGPAYLOAD_V		Base=r84.0	ElementSize=1	Type=ub
380
 
381
	//r90
382
 
383
	.declare	uwDIFF_TEMPORAL_SUM4x4_FINAL	Base=r90.0	ElementSize=2	Type=uw  //2 GRFs
384
 
385
//r92-127
386
	//Current Frame Y
387
 
388
 
389
	//r92
390
    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_0		Base=r92	ElementSize=2	Type=uw
391
	//r101
392
    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_1		Base=r101	ElementSize=2	Type=uw
393
	//r110
394
    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_2		Base=r110	ElementSize=2	Type=uw
395
	//r119
396
    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_3		Base=r119	ElementSize=2	Type=uw
397
 
398
	.declare	udCURR_Y0		Base=r93.0	ElementSize=4	Type=ud
399
    .declare	ubCURR_Y0		Base=r93.0	ElementSize=1	Type=ub
400
    .declare	udCURR_Y1		Base=r102.0	ElementSize=4	Type=ud
401
	.declare	ubCURR_Y1		Base=r102.0	ElementSize=1	Type=ub
402
	.declare	udCURR_Y2		Base=r111.0	ElementSize=4	Type=ud
403
	.declare	ubCURR_Y2		Base=r111.0	ElementSize=1	Type=ub
404
	.declare	udCURR_Y3		Base=r120.0	ElementSize=4	Type=ud
405
	.declare	ubCURR_Y3		Base=r120.0	ElementSize=1	Type=ub
406
 
407
	//r92: To hold U data for PL3 surfaces
408
	.declare	udCURR_U_TEMP		Base=r92.0	ElementSize=4	Type=ud
409
    .declare	ubCURR_U_TEMP		Base=r92.0	ElementSize=1	Type=ub
410
 
411
    //r112: To hold U data for PL3 surfaces
412
	.declare	udPREV_U_TEMP		Base=r112.0	ElementSize=4	Type=ud
413
	.declare	ubPREV_U_TEMP		Base=r112.0	ElementSize=1	Type=ub
414
 
415
	//r120: To hold U data for PL3 surfaces
416
	.declare	udPREV_V_TEMP		Base=r120.0	ElementSize=4	Type=ud
417
	.declare	ubPREV_V_TEMP		Base=r120.0	ElementSize=1	Type=ub
418
 
419
 
420
	// Initialize message source with r0.
421
	mov (8)   r50.0<1>:ud		r0.0<8;8,1>:ud
422
	mov (8)   r92.0<1>:ud		r0.0<8;8,1>:ud
423
	mov (8)   r101.0<1>:ud		r0.0<8;8,1>:ud
424
	mov (8)   r110.0<1>:ud		r0.0<8;8,1>:ud
425
	mov (8)   r119.0<1>:ud		r0.0<8;8,1>:ud
426
 
427
 
428
 
429
//Module Name 	: 	DN_UV_PL2_Load_Curr_Frame_UV
430
//Author		:	Tatiya, Rupesh
431
//Description	:	Loads Current Frame UV data for PL2 input.
432
 
433
 
434
 
435
//Module name 	:  DN_UV_Load_Curr_Frame_UV
436
//Author		:  Tatiya, Rupesh
437
//Description	:  Loads Current Frame (UV only).
438
//				   We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
439
//				   The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
440
 
441
 
442
 
443
 
444
//36x20 interleaved UV block is partitioned as follows:
445
//				<------ 18 --------> <--------18 ------->
446
//				-----------------------------------------
447
//				|		20x8   A 	!|    20x8     D    !
448
//				|      (overlapped) !|   (overlapped) 	!
449
//				|-------------------!|------------------!
450
//				|       20x8   B    !|      20x8   E    !
451
//				|      (overlapped) !|     (overlapped) !
452
//				|-------------------!|------------------!
453
//				|		20x4   C    !|		20x4   F    !
454
//				|     (overlapped)	!|    (overlapped)	!
455
//				-----------------------------------------
456
//
457
// Cordinates: (x-2, y-2), (x+14, y-2), (x-2, y+6), (x+14, y+6), (x-2, y+14), (x+14, y+14)
458
 
459
	//UV surface origin: (ORIX, ORIY/2)
460
	add  (2)	r7.4<1>:w		r7.0<2;2,1>:w	 	r4.4<2;2,1>:w	 { AccWrEn } // Source Block origin
461
	shr  (1)	r7.5<1>:w		acc0.5<0;1,0>:w		1:w
462
	mov  (2)	acc0.0<1>:d							r7.4<2;2,1>:w
463
 
464
	//A
465
	add  (2)   	r50.0<1>:d	acc0.0<2;2,1>:d		-2:d					{ AccWrEn }
466
 	mov  (1)   	r50.2<1>:ud	0x70013:ud
467
 	send (8)	udCURR_UV(0)<1>			r50		0x4	0x2890004:ud
468
 
469
	//B
470
 	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			8:d
471
 	send (8)	udCURR_UV(8)<1>			r50		0x4	0x2890004:ud
472
 
473
	//C
474
	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			16:d
475
	mov  (1)   	r50.2<1>:ud	0x30013:ud
476
 	send (8)	udCURR_UV(16)<1>		r50		0x4	0x2490004:ud
477
 
478
	//D
479
	add  (1)    r50.0<1>:d	acc0.0<0;1,0>:d			16:d				{ AccWrEn }
480
	mov  (1)    r50.1<1>:d	acc0.1<0;1,0>:d
481
	mov  (1)   	r50.2<1>:ud	0x70013:ud
482
 	send (8)	udCURR_UV(20)<1>		r50		0x4	0x2890004:ud
483
 
484
	//E
485
 	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			8:d
486
 	send (8)	udCURR_UV(28)<1>		r50		0x4	0x2890004:ud
487
 
488
	//F
489
 	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			16:d
490
	mov  (1)   	r50.2<1>:ud	0x30013:ud
491
 	send (8)	udCURR_UV(36)<1>		r50		0x4	0x2490004:ud
492
 
493
 	//History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
494
 
495
 	//Calculate Origin For History Surface: (ORIX/4, ORIY/8)
496
	mov  (16)   acc0.0<1>:w						r7.0<0;2,1>:w
497
	shr  (1)	r7.2<1>:w		acc0.2<0;1,0>:w	2:w
498
	shr  (1)	r7.3<1>:w		acc0.3<0;1,0>:w	3:w
499
 
500
	//Calculate Origin For BNE Surface: (ORIX/8, ORIY/16)
501
	shr  (1)	r7.6<1>:w		acc0.6<0;1,0>:w		3:w
502
	shr  (1)	r7.7<1>:w		acc0.7<0;1,0>:w		4:w
503
 
504
 
505
 
506
//Module Name 	: 	DN_UV_PL2_Load_Prev_Frame_UV
507
//Author		:	Tatiya, Rupesh
508
//Description	:	Loads Pevious Frame UV data for PL2 input.
509
 
510
 
511
 
512
//Module Name 	: 	DN_UV_Load_Prev_Frame_UV
513
//Author		:	Tatiya, Rupesh
514
//Description	:   Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
515
 
516
 
517
 
518
 
519
	mov  (2)	r50.0<1>:d		r7.4<2;2,1>:w			{ AccWrEn } 	// Source lock origin
520
	mov  (1)	r50.2<1>:ud		0xF000F:ud  						// U/V block width and height (16x16)
521
	send (8)	udPREV_UV(0)<1>		r50		0x4	0x2890001:ud
522
 
523
	add  (1)	r50.0<1>:ud 		acc0.0<0;1,0>:d		16:w										// Add 16 to X origin
524
	send (8)	udPREV_UV(8)<1>		r50		0x4	0x2890001:ud
525
 
526
 
527
	//TODO - See if History loading can be combined with Prev Frame Load. - rT
528
 
529
 
530
//Module name 	:  DN_UV_Load_Hist_UV
531
//Author		:  Tatiya, Rupesh
532
//Description	:  Load DN History for UV denoise. 4x4 for each U & V.
533
 
534
 
535
 
536
 
537
	mov  (2)	r50.0<1>:d	r7.2<2;2,1>:w
538
	mov  (1)	r50.2<1>:ud	0x30007:ud
539
	send (8)	udHIST_UV(0)<1>			r50		0x4	0x2190022:ud
540
 
541
 
542
 
543
//Module Name 	: DN_UV_420_Load_Curr_Frame_Y
544
//Author		: Tatiya, Rupesh
545
//Description	: Load Curr Frame Y data for 420 Input
546
 
547
 
548
 
549
//Module Name 	: DN_UV_Load_Curr_Frame_Y
550
//Author		: Tatiya, Rupesh
551
//Description	: Loads Y of Current frame.
552
 
553
 
554
 
555
 
556
	//For 16x16 U and 16x16 V for 420, we need to read 32x32 Y.
557
 
558
	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
559
	mov (1)		acc0.2<1>:ud		0xF000F:ud
560
	add (2)		acc0.0<1>:ud		r7.0<2;2,1>:w		r4.4<2;2,1>:w
561
 
562
	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
563
 
564
	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
565
	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
566
	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
567
 
568
	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		16:d
569
 
570
	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		16:d
571
 
572
	add (2)		r119.0<1>:d 	acc0.0<2;2,1>:d   		16:d
573
 
574
	send (8)	udCURR_Y0(0)<1>		r92		0x4	0x2890003:ud
575
	send (8)	udCURR_Y1(0)<1>		r101		0x4	0x2890003:ud
576
	send (8)	udCURR_Y2(0)<1>		r110		0x4	0x2890003:ud
577
	send (8)	udCURR_Y3(0)<1>		r119		0x4	0x2890003:ud
578
 
579
 
580
 
581
//Module Name : DN_UV_Noise_Detection_UV
582
//Author	  : Tatiya, Rupesh
583
//Description : Performs noise detection on 16x16 U and 16x16 V each.
584
 
585
 
586
 
587
//Module Name 	: DN_UV_Move_CURBE_Inline_UV.asm
588
//Author		: Tatiya, Rupesh
589
 
590
 
591
 
592
 
593
	//Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
594
	mov (4)	r54.28<1>:ub		r2.28<4;4,1>:ub		//Dest. YUY2 offset
595
	mov (2) r54.5<1>:ud		r4.0<4;2,2>:ud		//Src YUY2 offset and Origin offset
596
	mov (4)	r55.28<1>:ub		r1.0<4;4,1>:ub
597
 
598
	mov (8) r61.20<1>:ub		r1.4<8;8,1>:ub
599
	mov (4) r61.28<1>:ub		r1.12<4;4,1>:ub
600
 
601
	//Move Inline Data to another space - so that it can be used as Temp Space --> r7
602
	mov (4) r62.10<1>:w				r7.0<4;4,1>:w
603
	mov (4) r63.10<1>:w		r7.4<4;4,1>:w
604
 
605
 
606
 
607
 
608
 
609
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
610
//Author		: Tatiya, Rupesh
611
//Description	: Sets sub-region region N from Top region.
612
 
613
 
614
	mov (1) a0.0:uw				1664:uw
615
	mov (1)	a0.1:uw	1816:uw
616
 
617
 
618
 
619
 
620
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
621
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
622
 
623
 
624
 
625
 
626
 
627
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
628
//Author		: Tatiya, Rupesh
629
//Description	: Sets sub-region region N from Top region.
630
 
631
 
632
	//TODO - remove one instruction here using arithmatic. -rT
633
	mov (1) a0.0:uw				1792:uw
634
	mov (1)	a0.1:uw	1820:uw
635
 
636
 
637
 
638
 
639
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
640
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
641
 
642
 
643
 
644
 
645
 
646
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
647
//Author		: Tatiya, Rupesh
648
//Description	: Sets sub-region region N from Top region.
649
 
650
 
651
	//TODO - remove one instruction here using arithmatic. -rT
652
	mov (1) a0.0:uw				1920:uw
653
	mov (1)	a0.1:uw	1848:uw
654
 
655
 
656
 
657
 
658
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
659
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
660
 
661
 
662
 
663
 
664
 
665
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
666
//Author		: Tatiya, Rupesh
667
//Description	: Sets sub-region region N from Top region.
668
 
669
 
670
	//TODO - remove one instruction here using arithmatic. -rT
671
	mov (1) a0.0:uw				2048:uw
672
	mov (1)	a0.1:uw	1852:uw
673
 
674
 
675
 
676
 
677
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
678
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
679
 
680
 
681
 
682
 
683
 
684
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
685
//Author		: Tatiya, Rupesh
686
//Description	: Sets sub-region region N from Top region.
687
 
688
 
689
	mov (1) a0.0:uw				2304:uw
690
	mov (1)	a0.1:uw	1880:uw
691
 
692
 
693
 
694
 
695
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
696
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
697
 
698
 
699
 
700
 
701
 
702
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
703
//Author		: Tatiya, Rupesh
704
//Description	: Sets sub-region region N from Top region.
705
 
706
 
707
	//TODO - remove one instruction here using arithmatic. -rT
708
	mov (1) a0.0:uw				2432:uw
709
	mov (1)	a0.1:uw	1884:uw
710
 
711
 
712
 
713
 
714
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
715
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
716
 
717
 
718
 
719
 
720
 
721
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
722
//Author		: Tatiya, Rupesh
723
//Description	: Sets sub-region region N from Top region.
724
 
725
 
726
	//TODO - remove one instruction here using arithmatic. -rT
727
	mov (1) a0.0:uw				2560:uw
728
	mov (1)	a0.1:uw	1912:uw
729
 
730
 
731
 
732
 
733
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
734
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
735
 
736
 
737
 
738
 
739
 
740
//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
741
//Author		: Tatiya, Rupesh
742
//Description	: Sets sub-region region N from Top region.
743
 
744
 
745
	//TODO - remove one instruction here using arithmatic. -rT
746
	mov (1) a0.0:uw				2688:uw
747
	mov (1)	a0.1:uw	1916:uw
748
 
749
 
750
 
751
 
752
add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
753
 jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
754
 
755
 
756
 
757
//Module 		: DN_UV_Noise_Reduction_UV
758
//Author		: Tatiya, Rupesh
759
//Description	: Performs Noise Reduction on 16x16 U and 16x16 V.
760
//Tasks			: 1. Update weight history
761
//				  2. Find if it block is motion block
762
//				  3. Compute Denoised Pixel.
763
 
764
 
765
 
766
 
767
//History is 1+1 byte every 4x4 U and 4x4 V.
768
 
769
	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.20<0;2,1>:ub
770
	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.22<0;2,1>:ub
771
 
772
	mov (16)	uwCURBE_TEMP(0)<1>	0:w
773
	mov (16)	uwCURBE_TEMP(1)<1>	0:w
774
 
775
	//Compute diff betn curr and prev. - First 16 lines
776
	// 8 lines here
777
    add (16)	wDIFF_TEMPORAL(0)<1>			ubCURR_UV(2,2)<16;16,1>		-ubPREV_UV(0,0)<16;16,1>		//Diff UV interleaved
778
    add (16)	wDIFF_TEMPORAL(1)<1>			ubCURR_UV(3,2)<16;16,1>		-ubPREV_UV(0,16)<16;16,1>		//Diff UV interleaved
779
    add (16)	wDIFF_TEMPORAL(2)<1>			ubCURR_UV(4,2)<16;16,1>		-ubPREV_UV(0,32)<16;16,1>		//Diff UV interleaved
780
    add (16)	wDIFF_TEMPORAL(3)<1>			ubCURR_UV(5,2)<16;16,1>		-ubPREV_UV(0,48)<16;16,1>		//Diff UV interleaved
781
    add (16)	wDIFF_TEMPORAL(4)<1>			ubCURR_UV(6,2)<16;16,1>		-ubPREV_UV(0,64)<16;16,1>		//Diff UV interleaved
782
    add (16)	wDIFF_TEMPORAL(5)<1>			ubCURR_UV(7,2)<16;16,1>		-ubPREV_UV(0,80)<16;16,1>		//Diff UV interleaved
783
    add (16)	wDIFF_TEMPORAL(6)<1>			ubCURR_UV(8,2)<16;16,1>		-ubPREV_UV(0,96)<16;16,1>		//Diff UV interleaved
784
    add (16)	wDIFF_TEMPORAL(7)<1>			ubCURR_UV(9,2)<16;16,1>		-ubPREV_UV(0,112)<16;16,1>		//Diff UV interleaved
785
 
786
	//Update WT HIST
787
	(-f0.0) shr 	(16) uwCURBE_TEMP(0)<1>		ubHIST_UV(0,0)<16;16,1>		1:w
788
	(f1.0)  add 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>		r61.24<0;2,1>:ub
789
	(f0.0)  mov 	(16) uwCURBE_TEMP(2)<1>		r61.20<0;2,1>:ub
790
	(-f0.0.anyv) mov 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>
791
 
792
	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.20<0;2,1>:ub
793
	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.22<0;2,1>:ub
794
 
795
	//Compute diff betn curr and prev. - First 16 lines
796
	// 8 more lines here
797
    add (16)	wDIFF_TEMPORAL(8)<1>			ubCURR_UV(10,2)<16;16,1>		-ubPREV_UV(0,128)<16;16,1>		//Diff UV interleaved
798
    add (16)	wDIFF_TEMPORAL(9)<1>			ubCURR_UV(11,2)<16;16,1>		-ubPREV_UV(0,144)<16;16,1>		//Diff UV interleaved
799
    add (16)	wDIFF_TEMPORAL(10)<1>			ubCURR_UV(12,2)<16;16,1>		-ubPREV_UV(0,160)<16;16,1>		//Diff UV interleaved
800
    add (16)	wDIFF_TEMPORAL(11)<1>			ubCURR_UV(13,2)<16;16,1>		-ubPREV_UV(0,176)<16;16,1>		//Diff UV interleaved
801
    add (16)	wDIFF_TEMPORAL(12)<1>			ubCURR_UV(14,2)<16;16,1>		-ubPREV_UV(0,192)<16;16,1>		//Diff UV interleaved
802
    add (16)	wDIFF_TEMPORAL(13)<1>			ubCURR_UV(15,2)<16;16,1>		-ubPREV_UV(0,208)<16;16,1>		//Diff UV interleaved
803
    add (16)	wDIFF_TEMPORAL(14)<1>			ubCURR_UV(16,2)<16;16,1>		-ubPREV_UV(0,224)<16;16,1>		//Diff UV interleaved
804
    add (16)	wDIFF_TEMPORAL(15)<1>			ubCURR_UV(17,2)<16;16,1>		-ubPREV_UV(0,240)<16;16,1>		//Diff UV interleaved
805
 
806
	(-f0.0) shr 	(16) uwCURBE_TEMP(1)<1>		ubHIST_UV(0,16)<16;16,1>	1:w
807
	(f1.0)  add 	(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>	r61.24<0;2,1>:ub
808
	(f0.0)  mov 	(16) uwCURBE_TEMP(3)<1>		r61.20<0;2,1>:ub
809
	(-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>
810
 
811
	//16x16 to 16x4 - First 16 lines
812
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(0)<16;16,1>	(abs)wDIFF_TEMPORAL(1)<16;16,1>
813
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(2)<16;16,1>
814
	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(3)<16;16,1>
815
	//16x16 to 16x4 - First 16 lines
816
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(4)<16;16,1>	(abs)wDIFF_TEMPORAL(5)<16;16,1>
817
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(6)<16;16,1>
818
	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(7)<16;16,1>
819
	//16x16 to 16x4 - First 16 lines
820
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(8)<16;16,1>	(abs)wDIFF_TEMPORAL(9)<16;16,1>
821
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(10)<16;16,1>
822
	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(11)<16;16,1>
823
	//16x16 to 16x4 - First 16 lines
824
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(12)<16;16,1>	(abs)wDIFF_TEMPORAL(13)<16;16,1>
825
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(14)<16;16,1>
826
	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(15)<16;16,1>
827
 
828
//Compute diff betn curr and prev. - Second 16 lines
829
//13 lines.
830
    add (16)	wDIFF_TEMPORAL(16)<1>		ubCURR_UV(22,2)<16;16,1>		-ubPREV_UV(8,0)<16;16,1>		//Diff UV interleaved
831
    add (16)	wDIFF_TEMPORAL(17)<1>		ubCURR_UV(23,2)<16;16,1>		-ubPREV_UV(8,16)<16;16,1>		//Diff UV interleaved
832
    add (16)	wDIFF_TEMPORAL(18)<1>		ubCURR_UV(24,2)<16;16,1>		-ubPREV_UV(8,32)<16;16,1>		//Diff UV interleaved
833
    add (16)	wDIFF_TEMPORAL(19)<1>		ubCURR_UV(25,2)<16;16,1>		-ubPREV_UV(8,48)<16;16,1>		//Diff UV interleaved
834
    add (16)	wDIFF_TEMPORAL(20)<1>		ubCURR_UV(26,2)<16;16,1>		-ubPREV_UV(8,64)<16;16,1>		//Diff UV interleaved
835
    add (16)	wDIFF_TEMPORAL(21)<1>		ubCURR_UV(27,2)<16;16,1>		-ubPREV_UV(8,80)<16;16,1>		//Diff UV interleaved
836
    add (16)	wDIFF_TEMPORAL(22)<1>		ubCURR_UV(28,2)<16;16,1>		-ubPREV_UV(8,96)<16;16,1>		//Diff UV interleaved
837
    add (16)	wDIFF_TEMPORAL(23)<1>		ubCURR_UV(29,2)<16;16,1>		-ubPREV_UV(8,112)<16;16,1>		//Diff UV interleaved
838
    add (16)	wDIFF_TEMPORAL(24)<1>		ubCURR_UV(30,2)<16;16,1>		-ubPREV_UV(8,128)<16;16,1>		//Diff UV interleaved
839
    add (16)	wDIFF_TEMPORAL(25)<1>		ubCURR_UV(31,2)<16;16,1>		-ubPREV_UV(8,144)<16;16,1>		//Diff UV interleaved
840
    add (16)	wDIFF_TEMPORAL(26)<1>		ubCURR_UV(32,2)<16;16,1>		-ubPREV_UV(8,160)<16;16,1>		//Diff UV interleaved
841
    add (16)	wDIFF_TEMPORAL(27)<1>		ubCURR_UV(33,2)<16;16,1>		-ubPREV_UV(8,176)<16;16,1>		//Diff UV interleaved
842
    add (16)	wDIFF_TEMPORAL(28)<1>		ubCURR_UV(34,2)<16;16,1>		-ubPREV_UV(8,192)<16;16,1>		//Diff UV interleaved
843
 
844
//3 more lines
845
    add (16)	wCURBE_TEMP(4)<1>		ubCURR_UV(35,2)<16;16,1>		-ubPREV_UV(8,208)<16;16,1>		//Diff UV interleaved
846
    add (16)	wCURBE_TEMP(5)<1>		ubCURR_UV(36,2)<16;16,1>		-ubPREV_UV(8,224)<16;16,1>		//Diff UV interleaved
847
    add (16)	wCURBE_TEMP(6)<1>		ubCURR_UV(37,2)<16;16,1>		-ubPREV_UV(8,240)<16;16,1>		//Diff UV interleaved
848
 
849
	//16x4 to 8x4 - First 16 lines
850
	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>		uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
851
	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>		uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
852
 
853
	//8x4 to 4x4 - First 16 lines
854
	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>		{ AccWrEn }
855
 
856
	//16x16 to 16x4 - Second 16 lines
857
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(16)<16;16,1>	(abs)wDIFF_TEMPORAL(17)<16;16,1>
858
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(18)<16;16,1>
859
	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(19)<16;16,1>
860
	//16x16 to 16x4 - Second 16 lines
861
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(20)<16;16,1>	(abs)wDIFF_TEMPORAL(21)<16;16,1>
862
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(22)<16;16,1>
863
	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(23)<16;16,1>
864
	//16x16 to 16x4 - Second 16 lines
865
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(24)<16;16,1>	(abs)wDIFF_TEMPORAL(25)<16;16,1>
866
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(26)<16;16,1>
867
	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(27)<16;16,1>
868
 
869
	//16x16 to 16x4 - Second 16 lines
870
	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(28)<16;16,1>	(abs)wCURBE_TEMP(4)<16;16,1>
871
	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(5)<16;16,1>
872
	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(6)<16;16,1>
873
 
874
	//Find if block is motion block - First 16 lines
875
	cmp.g.f0.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> 		r61.26<0;2,1>:ub
876
 
877
	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
878
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>
879
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>
880
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>
881
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>
882
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>
883
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>
884
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>
885
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>
886
 
887
	//Pick Appropriate Weight History Based on motion. - First 16 lines
888
	(-f0.0) mov (16) uwCURBE_TEMP(0)<1>		uwCURBE_TEMP(2)<16;16,1>
889
 
890
	//Actual DN - First 16 lines
891
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.28<0;2,1>:ub
892
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.30<0;2,1>:ub
893
	mul (8)	acc0.0<1>:w								ubCURR_UV(2,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
894
	mul (8)	acc0.8<1>:w								ubCURR_UV(2,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
895
	mac (16)	acc0<1>:w							ubCURR_UV(2,2)<16;16,1>					256:w
896
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,0)<8;8,1>					uwCURBE_TEMP(0,0)<0;2,1>
897
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,8)<8;8,1>					uwCURBE_TEMP(0,2)<0;2,1>
898
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
899
 	(f0.0) shr (16) wDIFF_TEMPORAL(0)<1> 			acc0<16;16,1>:w								8:w
900
	(-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> 			ubCURR_UV(2,2)<16;16,1>
901
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
902
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(0)<1>	wDIFF_TEMPORAL(0)<16;16,1>					ubCURR_UV(2,2)<16;16,1>
903
 
904
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.28<0;2,1>:ub
905
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.30<0;2,1>:ub
906
	mul (8)	acc0.0<1>:w								ubCURR_UV(3,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
907
	mul (8)	acc0.8<1>:w								ubCURR_UV(3,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
908
	mac (16)	acc0<1>:w							ubCURR_UV(3,2)<16;16,1>					256:w
909
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,16)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
910
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,24)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
911
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
912
 	(f0.0) shr  (16) wDIFF_TEMPORAL(1)<1>		acc0<16;16,1>:w								8:w
913
	(-f0.0) mov (16) wDIFF_TEMPORAL(1)<1>		ubCURR_UV(3,2)<16;16,1>
914
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
915
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(1)<1>	wDIFF_TEMPORAL(1)<16;16,1>				ubCURR_UV(3,2)<16;16,1>
916
 
917
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.28<0;2,1>:ub
918
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.30<0;2,1>:ub
919
	mul (8)	acc0.0<1>:w								ubCURR_UV(4,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
920
	mul (8)	acc0.8<1>:w								ubCURR_UV(4,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
921
	mac (16)	acc0<1>:w							ubCURR_UV(4,2)<16;16,1>					256:w
922
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,32)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
923
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,40)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
924
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
925
 	(f0.0) shr  (16) wDIFF_TEMPORAL(2)<1> 		acc0<16;16,1>:w								8:w
926
	(-f0.0) mov (16) wDIFF_TEMPORAL(2)<1>		ubCURR_UV(4,2)<16;16,1>
927
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
928
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(2)<1>	wDIFF_TEMPORAL(2)<16;16,1>				ubCURR_UV(4,2)<16;16,1>
929
 
930
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.28<0;2,1>:ub
931
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.30<0;2,1>:ub
932
	mul (8)	acc0.0<1>:w								ubCURR_UV(5,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
933
	mul (8)	acc0.8<1>:w								ubCURR_UV(5,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
934
	mac (16)	acc0<1>:w							ubCURR_UV(5,2)<16;16,1>					256:w
935
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,48)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
936
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,56)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
937
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
938
 	(f0.0) shr  (16) wDIFF_TEMPORAL(3)<1>		acc0<16;16,1>:w								8:w
939
	(-f0.0) mov (16) wDIFF_TEMPORAL(3)<1>		ubCURR_UV(5,2)<16;16,1>
940
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
941
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(3)<1>	wDIFF_TEMPORAL(3)<16;16,1>				ubCURR_UV(5,2)<16;16,1>
942
 
943
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.28<0;2,1>:ub
944
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.30<0;2,1>:ub
945
	mul (8)	acc0.0<1>:w								ubCURR_UV(6,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
946
	mul (8)	acc0.8<1>:w								ubCURR_UV(6,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
947
	mac (16)	acc0<1>:w							ubCURR_UV(6,2)<16;16,1>					256:w
948
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,64)<8;8,1>					uwCURBE_TEMP(0,4)<0;2,1>
949
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,72)<8;8,1>					uwCURBE_TEMP(0,6)<0;2,1>
950
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
951
 	(f0.0) shr (16) wDIFF_TEMPORAL(4)<1> 			acc0<16;16,1>:w								8:w
952
	(-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> 			ubCURR_UV(6,2)<16;16,1>
953
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
954
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(4)<1>	wDIFF_TEMPORAL(4)<16;16,1>					ubCURR_UV(6,2)<16;16,1>
955
 
956
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.28<0;2,1>:ub
957
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.30<0;2,1>:ub
958
	mul (8)	acc0.0<1>:w								ubCURR_UV(7,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
959
	mul (8)	acc0.8<1>:w								ubCURR_UV(7,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
960
	mac (16)	acc0<1>:w							ubCURR_UV(7,2)<16;16,1>					256:w
961
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,80)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
962
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,88)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
963
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
964
 	(f0.0) shr  (16) wDIFF_TEMPORAL(5)<1>		acc0<16;16,1>:w								8:w
965
	(-f0.0) mov (16) wDIFF_TEMPORAL(5)<1>		ubCURR_UV(7,2)<16;16,1>
966
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
967
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(5)<1>	wDIFF_TEMPORAL(5)<16;16,1>				ubCURR_UV(7,2)<16;16,1>
968
 
969
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.28<0;2,1>:ub
970
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.30<0;2,1>:ub
971
	mul (8)	acc0.0<1>:w								ubCURR_UV(8,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
972
	mul (8)	acc0.8<1>:w								ubCURR_UV(8,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
973
	mac (16)	acc0<1>:w							ubCURR_UV(8,2)<16;16,1>					256:w
974
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,96)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
975
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,104)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
976
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
977
 	(f0.0) shr  (16) wDIFF_TEMPORAL(6)<1> 		acc0<16;16,1>:w								8:w
978
	(-f0.0) mov (16) wDIFF_TEMPORAL(6)<1>		ubCURR_UV(8,2)<16;16,1>
979
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
980
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(6)<1>	wDIFF_TEMPORAL(6)<16;16,1>				ubCURR_UV(8,2)<16;16,1>
981
 
982
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.28<0;2,1>:ub
983
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.30<0;2,1>:ub
984
	mul (8)	acc0.0<1>:w								ubCURR_UV(9,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
985
	mul (8)	acc0.8<1>:w								ubCURR_UV(9,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
986
	mac (16)	acc0<1>:w							ubCURR_UV(9,2)<16;16,1>					256:w
987
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,112)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
988
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,120)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
989
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
990
 	(f0.0) shr  (16) wDIFF_TEMPORAL(7)<1>		acc0<16;16,1>:w								8:w
991
	(-f0.0) mov (16) wDIFF_TEMPORAL(7)<1>		ubCURR_UV(9,2)<16;16,1>
992
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
993
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(7)<1>	wDIFF_TEMPORAL(7)<16;16,1>				ubCURR_UV(9,2)<16;16,1>
994
 
995
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.28<0;2,1>:ub
996
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.30<0;2,1>:ub
997
	mul (8)	acc0.0<1>:w								ubCURR_UV(10,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
998
	mul (8)	acc0.8<1>:w								ubCURR_UV(10,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
999
	mac (16)	acc0<1>:w							ubCURR_UV(10,2)<16;16,1>					256:w
1000
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,128)<8;8,1>					uwCURBE_TEMP(0,8)<0;2,1>
1001
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,136)<8;8,1>					uwCURBE_TEMP(0,10)<0;2,1>
1002
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1003
 	(f0.0) shr (16) wDIFF_TEMPORAL(8)<1> 			acc0<16;16,1>:w								8:w
1004
	(-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> 			ubCURR_UV(10,2)<16;16,1>
1005
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1006
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(8)<1>	wDIFF_TEMPORAL(8)<16;16,1>					ubCURR_UV(10,2)<16;16,1>
1007
 
1008
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.28<0;2,1>:ub
1009
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.30<0;2,1>:ub
1010
	mul (8)	acc0.0<1>:w								ubCURR_UV(11,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
1011
	mul (8)	acc0.8<1>:w								ubCURR_UV(11,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
1012
	mac (16)	acc0<1>:w							ubCURR_UV(11,2)<16;16,1>					256:w
1013
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,144)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
1014
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,152)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
1015
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1016
 	(f0.0) shr  (16) wDIFF_TEMPORAL(9)<1>		acc0<16;16,1>:w								8:w
1017
	(-f0.0) mov (16) wDIFF_TEMPORAL(9)<1>		ubCURR_UV(11,2)<16;16,1>
1018
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1019
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(9)<1>	wDIFF_TEMPORAL(9)<16;16,1>				ubCURR_UV(11,2)<16;16,1>
1020
 
1021
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.28<0;2,1>:ub
1022
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.30<0;2,1>:ub
1023
	mul (8)	acc0.0<1>:w								ubCURR_UV(12,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
1024
	mul (8)	acc0.8<1>:w								ubCURR_UV(12,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
1025
	mac (16)	acc0<1>:w							ubCURR_UV(12,2)<16;16,1>					256:w
1026
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,160)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
1027
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,168)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
1028
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1029
 	(f0.0) shr  (16) wDIFF_TEMPORAL(10)<1> 		acc0<16;16,1>:w								8:w
1030
	(-f0.0) mov (16) wDIFF_TEMPORAL(10)<1>		ubCURR_UV(12,2)<16;16,1>
1031
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1032
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(10)<1>	wDIFF_TEMPORAL(10)<16;16,1>				ubCURR_UV(12,2)<16;16,1>
1033
 
1034
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.28<0;2,1>:ub
1035
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.30<0;2,1>:ub
1036
	mul (8)	acc0.0<1>:w								ubCURR_UV(13,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
1037
	mul (8)	acc0.8<1>:w								ubCURR_UV(13,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
1038
	mac (16)	acc0<1>:w							ubCURR_UV(13,2)<16;16,1>					256:w
1039
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,176)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
1040
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,184)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
1041
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1042
 	(f0.0) shr  (16) wDIFF_TEMPORAL(11)<1>		acc0<16;16,1>:w								8:w
1043
	(-f0.0) mov (16) wDIFF_TEMPORAL(11)<1>		ubCURR_UV(13,2)<16;16,1>
1044
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1045
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(11)<1>	wDIFF_TEMPORAL(11)<16;16,1>				ubCURR_UV(13,2)<16;16,1>
1046
 
1047
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.28<0;2,1>:ub
1048
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.30<0;2,1>:ub
1049
	mul (8)	acc0.0<1>:w								ubCURR_UV(14,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
1050
	mul (8)	acc0.8<1>:w								ubCURR_UV(14,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
1051
	mac (16)	acc0<1>:w							ubCURR_UV(14,2)<16;16,1>					256:w
1052
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,192)<8;8,1>					uwCURBE_TEMP(0,12)<0;2,1>
1053
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,200)<8;8,1>					uwCURBE_TEMP(0,14)<0;2,1>
1054
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1055
 	(f0.0) shr (16) wDIFF_TEMPORAL(12)<1> 			acc0<16;16,1>:w								8:w
1056
	(-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> 			ubCURR_UV(14,2)<16;16,1>
1057
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1058
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(12)<1>	wDIFF_TEMPORAL(12)<16;16,1>					ubCURR_UV(14,2)<16;16,1>
1059
 
1060
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.28<0;2,1>:ub
1061
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.30<0;2,1>:ub
1062
	mul (8)	acc0.0<1>:w								ubCURR_UV(15,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
1063
	mul (8)	acc0.8<1>:w								ubCURR_UV(15,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
1064
	mac (16)	acc0<1>:w							ubCURR_UV(15,2)<16;16,1>					256:w
1065
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,208)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
1066
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,216)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
1067
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1068
 	(f0.0) shr  (16) wDIFF_TEMPORAL(13)<1>		acc0<16;16,1>:w								8:w
1069
	(-f0.0) mov (16) wDIFF_TEMPORAL(13)<1>		ubCURR_UV(15,2)<16;16,1>
1070
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1071
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(13)<1>	wDIFF_TEMPORAL(13)<16;16,1>				ubCURR_UV(15,2)<16;16,1>
1072
 
1073
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.28<0;2,1>:ub
1074
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.30<0;2,1>:ub
1075
	mul (8)	acc0.0<1>:w								ubCURR_UV(16,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
1076
	mul (8)	acc0.8<1>:w								ubCURR_UV(16,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
1077
	mac (16)	acc0<1>:w							ubCURR_UV(16,2)<16;16,1>					256:w
1078
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,224)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
1079
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,232)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
1080
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1081
 	(f0.0) shr  (16) wDIFF_TEMPORAL(14)<1> 		acc0<16;16,1>:w								8:w
1082
	(-f0.0) mov (16) wDIFF_TEMPORAL(14)<1>		ubCURR_UV(16,2)<16;16,1>
1083
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1084
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(14)<1>	wDIFF_TEMPORAL(14)<16;16,1>				ubCURR_UV(16,2)<16;16,1>
1085
 
1086
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.28<0;2,1>:ub
1087
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.30<0;2,1>:ub
1088
	mul (8)	acc0.0<1>:w								ubCURR_UV(17,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
1089
	mul (8)	acc0.8<1>:w								ubCURR_UV(17,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
1090
	mac (16)	acc0<1>:w							ubCURR_UV(17,2)<16;16,1>					256:w
1091
	mac (8)	acc0.0<1>:w								ubPREV_UV(0,240)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
1092
	mac (8)	acc0.8<1>:w								ubPREV_UV(0,248)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
1093
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1094
 	(f0.0) shr  (16) wDIFF_TEMPORAL(15)<1>		acc0<16;16,1>:w								8:w
1095
	(-f0.0) mov (16) wDIFF_TEMPORAL(15)<1>		ubCURR_UV(17,2)<16;16,1>
1096
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1097
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(15)<1>	wDIFF_TEMPORAL(15)<16;16,1>				ubCURR_UV(17,2)<16;16,1>
1098
 
1099
 
1100
	//16x4 to 8x4 - Second 16 lines
1101
	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
1102
	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
1103
 
1104
	//8x4 to 4x4 - Second 16 lines
1105
	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>     { AccWrEn }
1106
 
1107
	//Find if block is motion block - Second 16 lines
1108
	cmp.g.f1.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> 		r61.26<0;2,1>:ub
1109
 
1110
	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
1111
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>
1112
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>
1113
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>
1114
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>
1115
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>
1116
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>
1117
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>
1118
	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>
1119
 
1120
	//Pick Appropriate Weight History Based on motion. - Second 16 lines
1121
	(-f1.0) mov (16) uwCURBE_TEMP(1)<1>		uwCURBE_TEMP(3)<16;16,1>
1122
 
1123
	//Actual DN - Second 16 lines
1124
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.28<0;2,1>:ub
1125
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.30<0;2,1>:ub
1126
	mul (8)	acc0.0<1>:w									ubCURR_UV(22,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
1127
	mul (8)	acc0.8<1>:w									ubCURR_UV(22,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
1128
	mac (16)	acc0<1>:w								ubCURR_UV(22,2)<16;16,1>					256:w
1129
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,0)<8;8,1>					uwCURBE_TEMP(1,0)<0;2,1>
1130
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,8)<8;8,1>					uwCURBE_TEMP(1,2)<0;2,1>
1131
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1132
 	(f0.0) shr  (16) wDIFF_TEMPORAL(16)<1> 			acc0<16;16,1>:w								8:w
1133
	(-f0.0) mov (16) wDIFF_TEMPORAL(16)<1>			ubCURR_UV(22,2)<16;16,1>
1134
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
1135
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(16)<1>	wDIFF_TEMPORAL(16)<16;16,1>			ubCURR_UV(22,2)<16;16,1>
1136
 
1137
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.28<0;2,1>:ub
1138
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.30<0;2,1>:ub
1139
	mul (8)	acc0.0<1>:w									ubCURR_UV(23,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
1140
	mul (8)	acc0.8<1>:w									ubCURR_UV(23,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
1141
	mac (16)	acc0<1>:w								ubCURR_UV(23,2)<16;16,1>					256:w
1142
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,16)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
1143
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,24)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
1144
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1145
 	(f0.0) shr  (16) wDIFF_TEMPORAL(17)<1>			acc0<16;16,1>:w								8:w
1146
	(-f0.0) mov (16) wDIFF_TEMPORAL(17)<1>			ubCURR_UV(23,2)<16;16,1>
1147
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
1148
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(17)<1>	wDIFF_TEMPORAL(17)<16;16,1>			ubCURR_UV(23,2)<16;16,1>
1149
 
1150
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.28<0;2,1>:ub
1151
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.30<0;2,1>:ub
1152
	mul (8)	acc0.0<1>:w									ubCURR_UV(24,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
1153
	mul (8)	acc0.8<1>:w									ubCURR_UV(24,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
1154
	mac (16)	acc0<1>:w								ubCURR_UV(24,2)<16;16,1>					256:w
1155
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,32)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
1156
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,40)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
1157
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1158
 	(f0.0) shr  (16) wDIFF_TEMPORAL(18)<1> 			acc0<16;16,1>:w								8:w
1159
	(-f0.0) mov (16) wDIFF_TEMPORAL(18)<1>			ubCURR_UV(24,2)<16;16,1>
1160
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
1161
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(18)<1>	wDIFF_TEMPORAL(18)<16;16,1>			ubCURR_UV(24,2)<16;16,1>
1162
 
1163
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.28<0;2,1>:ub
1164
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.30<0;2,1>:ub
1165
	mul (8)	acc0.0<1>:w									ubCURR_UV(25,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
1166
	mul (8)	acc0.8<1>:w									ubCURR_UV(25,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
1167
	mac (16)	acc0<1>:w								ubCURR_UV(25,2)<16;16,1>					256:w
1168
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,48)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
1169
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,56)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
1170
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1171
 	(f0.0) shr  (16) wDIFF_TEMPORAL(19)<1>			acc0<16;16,1>:w								8:w
1172
	(-f0.0) mov (16) wDIFF_TEMPORAL(19)<1>			ubCURR_UV(25,2)<16;16,1>
1173
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
1174
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(19)<1>	wDIFF_TEMPORAL(19)<16;16,1>			ubCURR_UV(25,2)<16;16,1>
1175
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.28<0;2,1>:ub
1176
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.30<0;2,1>:ub
1177
	mul (8)	acc0.0<1>:w									ubCURR_UV(26,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
1178
	mul (8)	acc0.8<1>:w									ubCURR_UV(26,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
1179
	mac (16)	acc0<1>:w								ubCURR_UV(26,2)<16;16,1>					256:w
1180
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,64)<8;8,1>					uwCURBE_TEMP(1,4)<0;2,1>
1181
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,72)<8;8,1>					uwCURBE_TEMP(1,6)<0;2,1>
1182
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1183
 	(f0.0) shr  (16) wDIFF_TEMPORAL(20)<1> 			acc0<16;16,1>:w								8:w
1184
	(-f0.0) mov (16) wDIFF_TEMPORAL(20)<1>			ubCURR_UV(26,2)<16;16,1>
1185
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
1186
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(20)<1>	wDIFF_TEMPORAL(20)<16;16,1>			ubCURR_UV(26,2)<16;16,1>
1187
 
1188
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.28<0;2,1>:ub
1189
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.30<0;2,1>:ub
1190
	mul (8)	acc0.0<1>:w									ubCURR_UV(27,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
1191
	mul (8)	acc0.8<1>:w									ubCURR_UV(27,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
1192
	mac (16)	acc0<1>:w								ubCURR_UV(27,2)<16;16,1>					256:w
1193
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,80)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
1194
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,88)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
1195
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1196
 	(f0.0) shr  (16) wDIFF_TEMPORAL(21)<1>			acc0<16;16,1>:w								8:w
1197
	(-f0.0) mov (16) wDIFF_TEMPORAL(21)<1>			ubCURR_UV(27,2)<16;16,1>
1198
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
1199
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(21)<1>	wDIFF_TEMPORAL(21)<16;16,1>			ubCURR_UV(27,2)<16;16,1>
1200
 
1201
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.28<0;2,1>:ub
1202
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.30<0;2,1>:ub
1203
	mul (8)	acc0.0<1>:w									ubCURR_UV(28,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
1204
	mul (8)	acc0.8<1>:w									ubCURR_UV(28,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
1205
	mac (16)	acc0<1>:w								ubCURR_UV(28,2)<16;16,1>					256:w
1206
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,96)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
1207
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,104)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
1208
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1209
 	(f0.0) shr  (16) wDIFF_TEMPORAL(22)<1> 			acc0<16;16,1>:w								8:w
1210
	(-f0.0) mov (16) wDIFF_TEMPORAL(22)<1>			ubCURR_UV(28,2)<16;16,1>
1211
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
1212
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(22)<1>	wDIFF_TEMPORAL(22)<16;16,1>			ubCURR_UV(28,2)<16;16,1>
1213
 
1214
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.28<0;2,1>:ub
1215
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.30<0;2,1>:ub
1216
	mul (8)	acc0.0<1>:w									ubCURR_UV(29,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
1217
	mul (8)	acc0.8<1>:w									ubCURR_UV(29,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
1218
	mac (16)	acc0<1>:w								ubCURR_UV(29,2)<16;16,1>					256:w
1219
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,112)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
1220
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,120)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
1221
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1222
 	(f0.0) shr  (16) wDIFF_TEMPORAL(23)<1>			acc0<16;16,1>:w								8:w
1223
	(-f0.0) mov (16) wDIFF_TEMPORAL(23)<1>			ubCURR_UV(29,2)<16;16,1>
1224
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
1225
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(23)<1>	wDIFF_TEMPORAL(23)<16;16,1>			ubCURR_UV(29,2)<16;16,1>
1226
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.28<0;2,1>:ub
1227
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.30<0;2,1>:ub
1228
	mul (8)	acc0.0<1>:w									ubCURR_UV(30,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
1229
	mul (8)	acc0.8<1>:w									ubCURR_UV(30,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
1230
	mac (16)	acc0<1>:w								ubCURR_UV(30,2)<16;16,1>					256:w
1231
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,128)<8;8,1>					uwCURBE_TEMP(1,8)<0;2,1>
1232
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,136)<8;8,1>					uwCURBE_TEMP(1,10)<0;2,1>
1233
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1234
 	(f0.0) shr  (16) wDIFF_TEMPORAL(24)<1> 			acc0<16;16,1>:w								8:w
1235
	(-f0.0) mov (16) wDIFF_TEMPORAL(24)<1>			ubCURR_UV(30,2)<16;16,1>
1236
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1237
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(24)<1>	wDIFF_TEMPORAL(24)<16;16,1>			ubCURR_UV(30,2)<16;16,1>
1238
 
1239
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.28<0;2,1>:ub
1240
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.30<0;2,1>:ub
1241
	mul (8)	acc0.0<1>:w									ubCURR_UV(31,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
1242
	mul (8)	acc0.8<1>:w									ubCURR_UV(31,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
1243
	mac (16)	acc0<1>:w								ubCURR_UV(31,2)<16;16,1>					256:w
1244
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,144)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
1245
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,152)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
1246
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1247
 	(f0.0) shr  (16) wDIFF_TEMPORAL(25)<1>			acc0<16;16,1>:w								8:w
1248
	(-f0.0) mov (16) wDIFF_TEMPORAL(25)<1>			ubCURR_UV(31,2)<16;16,1>
1249
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1250
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(25)<1>	wDIFF_TEMPORAL(25)<16;16,1>			ubCURR_UV(31,2)<16;16,1>
1251
 
1252
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.28<0;2,1>:ub
1253
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.30<0;2,1>:ub
1254
	mul (8)	acc0.0<1>:w									ubCURR_UV(32,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
1255
	mul (8)	acc0.8<1>:w									ubCURR_UV(32,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
1256
	mac (16)	acc0<1>:w								ubCURR_UV(32,2)<16;16,1>					256:w
1257
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,160)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
1258
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,168)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
1259
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1260
 	(f0.0) shr  (16) wDIFF_TEMPORAL(26)<1> 			acc0<16;16,1>:w								8:w
1261
	(-f0.0) mov (16) wDIFF_TEMPORAL(26)<1>			ubCURR_UV(32,2)<16;16,1>
1262
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1263
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(26)<1>	wDIFF_TEMPORAL(26)<16;16,1>			ubCURR_UV(32,2)<16;16,1>
1264
 
1265
	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.28<0;2,1>:ub
1266
	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.30<0;2,1>:ub
1267
	mul (8)	acc0.0<1>:w									ubCURR_UV(33,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
1268
	mul (8)	acc0.8<1>:w									ubCURR_UV(33,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
1269
	mac (16)	acc0<1>:w								ubCURR_UV(33,2)<16;16,1>					256:w
1270
	mac (8)	acc0.0<1>:w									ubPREV_UV(8,176)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
1271
	mac (8)	acc0.8<1>:w									ubPREV_UV(8,184)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
1272
	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
1273
 	(f0.0) shr  (16) wDIFF_TEMPORAL(27)<1>			acc0<16;16,1>:w								8:w
1274
	(-f0.0) mov (16) wDIFF_TEMPORAL(27)<1>			ubCURR_UV(33,2)<16;16,1>
1275
	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
1276
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(27)<1>	wDIFF_TEMPORAL(27)<16;16,1>			ubCURR_UV(33,2)<16;16,1>
1277
 
1278
	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.28<0;2,1>:ub
1279
	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.30<0;2,1>:ub
1280
	mul (8)	acc0.0<1>:w								ubCURR_UV(34,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
1281
	mul (8)	acc0.8<1>:w								ubCURR_UV(34,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
1282
	mac (16)	acc0<1>:w							ubCURR_UV(34,2)<16;16,1>					256:w
1283
	mac (8)	acc0.0<1>:w								ubPREV_UV(8,192)<8;8,1>					uwCURBE_TEMP(1,12)<0;2,1>
1284
	mac (8)	acc0.8<1>:w								ubPREV_UV(8,200)<8;8,1>					uwCURBE_TEMP(1,14)<0;2,1>
1285
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1286
 	(f0.0) shr  (16) wDIFF_TEMPORAL(28)<1> 			acc0<16;16,1>:w								8:w
1287
	(-f0.0) mov (16) wDIFF_TEMPORAL(28)<1>			ubCURR_UV(34,2)<16;16,1>
1288
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1289
	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(28)<1>	wDIFF_TEMPORAL(28)<16;16,1>				ubCURR_UV(34,2)<16;16,1>
1290
 
1291
	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.28<0;2,1>:ub
1292
	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.30<0;2,1>:ub
1293
	mul (8)	acc0.0<1>:w								ubCURR_UV(35,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
1294
	mul (8)	acc0.8<1>:w								ubCURR_UV(35,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
1295
	mac (16)	acc0<1>:w							ubCURR_UV(35,2)<16;16,1>					256:w
1296
	mac (8)	acc0.0<1>:w								ubPREV_UV(8,208)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
1297
	mac (8)	acc0.8<1>:w								ubPREV_UV(8,216)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
1298
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1299
 	(f0.0) shr  (16) wCURBE_TEMP(4)<1>				acc0<16;16,1>:w								8:w
1300
	(-f0.0) mov (16) wCURBE_TEMP(4)<1>				ubCURR_UV(35,2)<16;16,1>
1301
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1302
	(-f0.0.allv) avg (16)	wCURBE_TEMP(4)<1>		wCURBE_TEMP(4)<16;16,1>				ubCURR_UV(35,2)<16;16,1>
1303
 
1304
	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.28<0;2,1>:ub
1305
	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.30<0;2,1>:ub
1306
	mul (8)	acc0.0<1>:w								ubCURR_UV(36,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
1307
	mul (8)	acc0.8<1>:w								ubCURR_UV(36,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
1308
	mac (16)	acc0<1>:w							ubCURR_UV(36,2)<16;16,1>					256:w
1309
	mac (8)	acc0.0<1>:w								ubPREV_UV(8,224)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
1310
	mac (8)	acc0.8<1>:w								ubPREV_UV(8,232)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
1311
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1312
 	(f0.0) shr  (16) wCURBE_TEMP(5)<1> 				acc0<16;16,1>:w								8:w
1313
	(-f0.0) mov (16) wCURBE_TEMP(5)<1>				ubCURR_UV(36,2)<16;16,1>
1314
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1315
	(-f0.0.allv) avg (16)	wCURBE_TEMP(5)<1>		wCURBE_TEMP(5)<16;16,1>				ubCURR_UV(36,2)<16;16,1>
1316
 
1317
	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.28<0;2,1>:ub
1318
	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.30<0;2,1>:ub
1319
	mul (8)	acc0.0<1>:w								ubCURR_UV(37,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
1320
	mul (8)	acc0.8<1>:w								ubCURR_UV(37,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
1321
	mac (16)	acc0<1>:w							ubCURR_UV(37,2)<16;16,1>					256:w
1322
	mac (8)	acc0.0<1>:w								ubPREV_UV(8,240)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
1323
	mac (8)	acc0.8<1>:w								ubPREV_UV(8,248)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
1324
	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
1325
 	(f0.0) shr  (16) wCURBE_TEMP(6)<1>				acc0<16;16,1>:w								8:w
1326
	(-f0.0) mov (16) wCURBE_TEMP(6)<1>				ubCURR_UV(37,2)<16;16,1>
1327
	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
1328
	(-f0.0.allv) avg (16)	wCURBE_TEMP(6)<1>		wCURBE_TEMP(6)<16;16,1>				ubCURR_UV(37,2)<16;16,1>
1329
 
1330
	//Pack Weight History WORD -> BYTE
1331
	mov (16) ubCURBE_TEMP(3,0)<1>		ubCURBE_TEMP(0)<32;16,2>
1332
	mov (16) ubCURBE_TEMP(3,16)<1>		ubCURBE_TEMP(1)<32;16,2>
1333
 
1334
 
1335
 
1336
//Module Name 	: DN_UV_Compute_BNE_UV
1337
//Author		: Tatiya, Rupesh
1338
//Description	: Computes minimum SOAD for each 16x4 block.
1339
 
1340
	cmp.l.f0.0  (8) null:w     				uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
1341
	(f0.0)sel	(8) uwCURBE_TEMP(1,0)<1>	uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
1342
 
1343
    mov  (8)	ubCURBE_TEMP(1)<1>			ubCURBE_TEMP(1)<16;8,2>
1344
 
1345
 
1346
 
1347
//Module Name 	: DN_UV_PL2_Pack_Denoised_UV
1348
//Name			: Tatiya, Rupesh
1349
//Description	: Pack UV denoised data based on PL2 input.
1350
 
1351
 
1352
 
1353
//Module Name 	: DN_UV_Pack_Denoised_UV
1354
//Name			: Tatiya, Rupesh
1355
//Description	: Pack UV denoised data based on PL2/PL3/PA.
1356
 
1357
 
1358
//First 16 lines.
1359
	mov  (16)	ubMSGPAYLOAD_UV0(0,0)<1>		ubDIFF_TEMPORAL(0)<32;16,2>
1360
	mov  (16)	ubMSGPAYLOAD_UV0(0,16)<1>		ubDIFF_TEMPORAL(1)<32;16,2>
1361
	mov  (16)	ubMSGPAYLOAD_UV0(1,0)<1>		ubDIFF_TEMPORAL(2)<32;16,2>
1362
	mov  (16)	ubMSGPAYLOAD_UV0(1,16)<1>		ubDIFF_TEMPORAL(3)<32;16,2>
1363
	mov  (16)	ubMSGPAYLOAD_UV0(2,0)<1>		ubDIFF_TEMPORAL(4)<32;16,2>
1364
	mov  (16)	ubMSGPAYLOAD_UV0(2,16)<1>		ubDIFF_TEMPORAL(5)<32;16,2>
1365
	mov  (16)	ubMSGPAYLOAD_UV0(3,0)<1>		ubDIFF_TEMPORAL(6)<32;16,2>
1366
	mov  (16)	ubMSGPAYLOAD_UV0(3,16)<1>		ubDIFF_TEMPORAL(7)<32;16,2>
1367
	mov  (16)	ubMSGPAYLOAD_UV0(4,0)<1>		ubDIFF_TEMPORAL(8)<32;16,2>
1368
	mov  (16)	ubMSGPAYLOAD_UV0(4,16)<1>		ubDIFF_TEMPORAL(9)<32;16,2>
1369
	mov  (16)	ubMSGPAYLOAD_UV0(5,0)<1>		ubDIFF_TEMPORAL(10)<32;16,2>
1370
	mov  (16)	ubMSGPAYLOAD_UV0(5,16)<1>		ubDIFF_TEMPORAL(11)<32;16,2>
1371
	mov  (16)	ubMSGPAYLOAD_UV0(6,0)<1>		ubDIFF_TEMPORAL(12)<32;16,2>
1372
	mov  (16)	ubMSGPAYLOAD_UV0(6,16)<1>		ubDIFF_TEMPORAL(13)<32;16,2>
1373
	mov  (16)	ubMSGPAYLOAD_UV0(7,0)<1>		ubDIFF_TEMPORAL(14)<32;16,2>
1374
	mov  (16)	ubMSGPAYLOAD_UV0(7,16)<1>		ubDIFF_TEMPORAL(15)<32;16,2>
1375
 
1376
//Second 16 lines.
1377
//12 lines first
1378
	mov  (16)	ubMSGPAYLOAD_UV1(0,0)<1>		ubDIFF_TEMPORAL(16)<32;16,2>
1379
	mov  (16)	ubMSGPAYLOAD_UV1(0,16)<1>		ubDIFF_TEMPORAL(17)<32;16,2>
1380
	mov  (16)	ubMSGPAYLOAD_UV1(1,0)<1>		ubDIFF_TEMPORAL(18)<32;16,2>
1381
	mov  (16)	ubMSGPAYLOAD_UV1(1,16)<1>		ubDIFF_TEMPORAL(19)<32;16,2>
1382
	mov  (16)	ubMSGPAYLOAD_UV1(2,0)<1>		ubDIFF_TEMPORAL(20)<32;16,2>
1383
	mov  (16)	ubMSGPAYLOAD_UV1(2,16)<1>		ubDIFF_TEMPORAL(21)<32;16,2>
1384
	mov  (16)	ubMSGPAYLOAD_UV1(3,0)<1>		ubDIFF_TEMPORAL(22)<32;16,2>
1385
	mov  (16)	ubMSGPAYLOAD_UV1(3,16)<1>		ubDIFF_TEMPORAL(23)<32;16,2>
1386
	mov  (16)	ubMSGPAYLOAD_UV1(4,0)<1>		ubDIFF_TEMPORAL(24)<32;16,2>
1387
	mov  (16)	ubMSGPAYLOAD_UV1(4,16)<1>		ubDIFF_TEMPORAL(25)<32;16,2>
1388
	mov  (16)	ubMSGPAYLOAD_UV1(5,0)<1>		ubDIFF_TEMPORAL(26)<32;16,2>
1389
	mov  (16)	ubMSGPAYLOAD_UV1(5,16)<1>		ubDIFF_TEMPORAL(27)<32;16,2>
1390
 
1391
	//3 lines next
1392
	mov  (16)	ubMSGPAYLOAD_UV1(6,0)<1>		ubDIFF_TEMPORAL(28)<32;16,2>
1393
	mov  (16)	ubMSGPAYLOAD_UV1(6,16)<1>		ubCURBE_TEMP(4)<32;16,2>
1394
	mov  (16)	ubMSGPAYLOAD_UV1(7,0)<1>		ubCURBE_TEMP(5)<32;16,2>
1395
	mov  (16)	ubMSGPAYLOAD_UV1(7,16)<1>		ubCURBE_TEMP(6)<32;16,2>
1396
 
1397
 
1398
 
1399
//Module Name 	: DN_UV_420_Save_Curr_Frame_Y
1400
//Author		: Tatiya, Rupesh
1401
//Description	: Save Curr Frame Y data for 420 Input
1402
 
1403
 
1404
 
1405
//Module Name 	: DN_UV_Load_Curr_Frame_Y
1406
//Author		: Tatiya, Rupesh
1407
//Description	: Saves Y or YUY2 of Current frame.
1408
 
1409
 
1410
 
1411
 
1412
	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
1413
	mov (2)		acc0.0<1>:d			r62.10<2;2,1>:w
1414
 
1415
	mov (1)		acc0.2<1>:d			0xF000F:ud
1416
 
1417
	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
1418
 
1419
	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
1420
	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
1421
	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
1422
 
1423
	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		16:d
1424
 
1425
	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		16:d
1426
 
1427
	add (2)		r119.0<1>:d 	acc0.0<2;2,1>:d   		16:d
1428
 
1429
	send (8)	null<1>:d	r92		0x5		0x120A8018:ud
1430
	send (8)	null<1>:d	r101		0x5		0x120A8018:ud
1431
	send (8)	null<1>:d	r110		0x5		0x120A8018:ud
1432
	send (8)	null<1>:d	r119		0x5		0x120A8018:ud
1433
 
1434
 
1435
	//TODO - See if History saving can be combined with Curr Frame Save. - rT
1436
 
1437
 
1438
//Module Name 	: DN_UV_Save_Hist_UV
1439
//Author		: Tatiya, Rupesh
1440
//Description	: Saves DN history for UV data.
1441
 
1442
	mov (8)  r3.0<1>:ud	r0.0<8;8,1>:ud
1443
	mov (2)	 r3.0<1>:d	r62.12<2;2,1>:w
1444
	mov (1)	 r3.2<1>:d	0x30007:ud
1445
 
1446
	send (8) null<1>:d	r3		0x5		0x40A8021:ud
1447
 
1448
 
1449
 
1450
//Module Name	: DN_UV_Save_BNE_UV
1451
//Author		: Tatiya, Rupesh
1452
//Description	: Saves BNE values for 16x16 U and 16x16 V.
1453
 
1454
	mov (8)  r1.0<1>:ud	r0.0<8;8,1>:ud
1455
	mov (2)	 r1.0<1>:d		r63.12<2;2,1>:w
1456
	mov (1)	 r1.2<1>:d		0x10003:ud
1457
 
1458
	send (8) null<1>:d	r1		0x5		0x40A8023:ud
1459
 
1460
 
1461
 
1462
//Module Name 	: DN_UV_PL2_Save_Curr_Frame_UV
1463
//Author		: Tatiya, Rupesh
1464
 
1465
 
1466
 
1467
//Module name 	:  DN_UV_Save_Curr_Frame_UV
1468
//Author		:  Tatiya, Rupesh
1469
//Description	:  Saves Current Frame (UV only).
1470
 
1471
 
1472
 
1473
 
1474
	mov  (8) 		r74<1>:ud		r0.0<8;8,1>:ud
1475
	mov  (8) 		r83<1>:ud		r0.0<8;8,1>:ud
1476
 
1477
	mov (1)	 r74.0<1>:d		r62.10<0;1,0>:w
1478
	shr (1)  r74.1<1>:d		r62.11<0;1,0>:w			1:w
1479
	mov (1)	 r74.2<1>:d		0xF000F:ud
1480
 
1481
	add (1)	 r83.0<1>:d		r62.10<0;1,0>:w			16:d
1482
	shr (1)	 r83.1<1>:d		r62.11<0;1,0>:w			 1:w
1483
	mov (1)	 r83.2<1>:d		0xF000F:ud
1484
 
1485
	send (8) null<1>:d	r74		0x5		0x120A8019:ud
1486
	send (8) null<1>:d	r83		0x5		0x120A8019:ud
1487
 
1488
 
1489
 
1490
//End of Thread message
1491
 
1492
mov (8) r127<1>:ud r0.0<8;8,1>:ud
1493
 send (1) null<1>:d r127 0x27 0x02000010
1494
 
1495
 
1496
	//All sub-routines here
1497
 
1498
 
1499
// Module Name  : Noise_Detection
1500
// Author		: Tatiya, Rupesh
1501
// Description	: Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
1502
 
1503
DN_UV_NOISE_DETECTION_UV:
1504
 
1505
// Find Field Block Median
1506
//
1507
// Purpose   : Find the median value of the nine pixels in the same field
1508
//             which are centered at current pixel.
1509
//
1510
//             Works on 9 pixels centered at the current pixel
1511
//                NOTE: pixels are within same field.
1512
//                      v4 - current pixel
1513
//
1514
//                  v2 v1 v0
1515
//                   *  *  *     <--- Different field - not used
1516
//                  v5 v4 v3
1517
//                   *  *  *     <--- Different field - not used
1518
//                  v8 v7 v6
1519
 
1520
// Algorithm to find median modifies the data.
1521
// Copy the data needed to calculate median so the original source data stays intact.
1522
//
1523
 
1524
//TODO - Change Interleaved implementation to separated one if - ,  does not work on predication. - rT
1525
 
1526
//Delete Later - rT
1527
//mov (1) pCUR_UV:uw		52*32:uw
1528
 
1529
// v0
1530
mov (16) ubMEDIAN_TEMP(0,0)<1>    	r[a0.0,0]<16;16,1>
1531
// v0
1532
mov (16) ubMEDIAN_TEMP(0,16)<1>   	r[a0.0,32]<16;16,1>
1533
// v1
1534
mov (16) ubMEDIAN_TEMP(1,0)<1>    	r[a0.0,2]<16;16,1>
1535
// v1
1536
mov (16) ubMEDIAN_TEMP(1,16)<1>   	r[a0.0,34]<16;16,1>
1537
// v2
1538
mov (16) ubMEDIAN_TEMP(2,0)<1>    	r[a0.0,4]<16;16,1>
1539
// v2
1540
mov (16) ubMEDIAN_TEMP(2,16)<1>   	r[a0.0,36]<16;16,1>
1541
// v3
1542
mov (16) ubMEDIAN_TEMP(3,0)<1>    	r[a0.0,64]<16;16,1>
1543
// v3
1544
mov (16) ubMEDIAN_TEMP(3,16)<1>   	r[a0.0,96]<16;16,1>
1545
// v4
1546
mov (16) ubMEDIAN_TEMP(4,0)<1>		r[a0.0,66]<16;16,1>
1547
// v4
1548
mov (16) ubMEDIAN_TEMP(4,16)<1>   	r[a0.0,98]<16;16,1>
1549
// v5
1550
mov (16) ubMEDIAN_TEMP(5,0)<1>		r[a0.0,68]<16;16,1>
1551
// v5
1552
mov (16) ubMEDIAN_TEMP(5,16)<1>   	r[a0.0,100]<16;16,1>
1553
// v6
1554
mov (16) ubMEDIAN_TEMP(6,0)<1>    	r[a0.0,128]<16;16,1>
1555
// v6
1556
mov (16) ubMEDIAN_TEMP(6,16)<1>   	r[a0.0,160]<16;16,1>
1557
// v7
1558
mov (16) ubMEDIAN_TEMP(7,0)<1>		r[a0.0,130]<16;16,1>
1559
// v7
1560
mov (16) ubMEDIAN_TEMP(7,16)<1>   	r[a0.0,162]<16;16,1>
1561
// v8
1562
mov (16) ubMEDIAN_TEMP(8,0)<1>		r[a0.0,132]<16;16,1>
1563
// v8
1564
mov (16) ubMEDIAN_TEMP(8,16)<1>   	r[a0.0,164]<16;16,1>
1565
 
1566
//TODO - Optimize one instruction here.
1567
add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
1568
// v0
1569
mov (16) ubMEDIAN_TEMP(9,0)<1>    	r[a0.0,0]<16;16,1>
1570
// v0
1571
mov (16) ubMEDIAN_TEMP(9,16)<1>   	r[a0.0,32]<16;16,1>
1572
// v1
1573
mov (16) ubMEDIAN_TEMP(10,0)<1>    	r[a0.0,2]<16;16,1>
1574
// v1
1575
mov (16) ubMEDIAN_TEMP(10,16)<1>   	r[a0.0,34]<16;16,1>
1576
// v2
1577
mov (16) ubMEDIAN_TEMP(11,0)<1>    	r[a0.0,4]<16;16,1>
1578
// v2
1579
mov (16) ubMEDIAN_TEMP(11,16)<1>   	r[a0.0,36]<16;16,1>
1580
// v3
1581
mov (16) ubMEDIAN_TEMP(12,0)<1>    	r[a0.0,64]<16;16,1>
1582
// v3
1583
mov (16) ubMEDIAN_TEMP(12,16)<1>   	r[a0.0,96]<16;16,1>
1584
// v4
1585
mov (16) ubMEDIAN_TEMP(13,0)<1>		r[a0.0,66]<16;16,1>
1586
// v4
1587
mov (16) ubMEDIAN_TEMP(13,16)<1>   	r[a0.0,98]<16;16,1>
1588
// v5
1589
mov (16) ubMEDIAN_TEMP(14,0)<1>		r[a0.0,68]<16;16,1>
1590
// v5
1591
mov (16) ubMEDIAN_TEMP(14,16)<1>   	r[a0.0,100]<16;16,1>
1592
// v6
1593
mov (16) ubMEDIAN_TEMP(15,0)<1>    	r[a0.0,128]<16;16,1>
1594
// v6
1595
mov (16) ubMEDIAN_TEMP(15,16)<1>   	r[a0.0,160]<16;16,1>
1596
// v7
1597
mov (16) ubMEDIAN_TEMP(16,0)<1>		r[a0.0,130]<16;16,1>
1598
// v7
1599
mov (16) ubMEDIAN_TEMP(16,16)<1>   	r[a0.0,162]<16;16,1>
1600
// v8
1601
mov (16) ubMEDIAN_TEMP(17,0)<1>		r[a0.0,132]<16;16,1>
1602
// v8
1603
mov (16) ubMEDIAN_TEMP(17,16)<1>   	r[a0.0,164]<16;16,1>
1604
 
1605
//TODO - Optimize one instruction here.
1606
add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
1607
 
1608
// MedianSwap
1609
//
1610
//  MedianSwap(inOutLeft, inOutRight)
1611
//  {
1612
//      if (inOutLeft > inOutRight)
1613
//      {
1614
//          temp = inOutLeft
1615
//          inOutLeft = inOutRight
1616
//          inOutRight = temp
1617
//      }
1618
//  }
1619
 
1620
// MedianSwap(v1, v2) - U
1621
// MedianSwap(v4, v5) - U
1622
// MedianSwap(v1, v2) - V
1623
// MedianSwap(v4, v5) - V
1624
 
1625
cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  ubMEDIAN_TEMP(2,0)<32;16,2>
1626
cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  ubMEDIAN_TEMP(5,0)<32;16,2>
1627
cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
1628
cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
1629
 
1630
       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>
1631
       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>
1632
       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>
1633
		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>
1634
 
1635
(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>     	ubMEDIAN_TEMP(2,0)<32;16,2>
1636
(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(5,0)<32;16,2>
1637
(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  		ubMEDIAN_TEMP(2,1)<32;16,2>
1638
(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(5,1)<32;16,2>
1639
 
1640
(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>
1641
(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>
1642
(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>
1643
(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>
1644
 
1645
// MedianSwap(v7, v8) - U
1646
// MedianSwap(v0, v1) - U
1647
// MedianSwap(v7, v8) - V
1648
// MedianSwap(v0, v1) - V
1649
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
1650
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(1,0)<32;16,2>
1651
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
1652
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(1,1)<32;16,2>
1653
 
1654
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>
1655
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>
1656
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>
1657
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>
1658
 
1659
 (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>
1660
 (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(1,0)<32;16,2>
1661
 (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>
1662
 (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(1,1)<32;16,2>
1663
 
1664
 (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>
1665
 (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2>     ubTEMP1(0,16)<16;16,1>
1666
 (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>
1667
 (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2>     ubTEMP1(1,16)<16;16,1>
1668
 
1669
 // MedianSwap(v3, v4) - U
1670
 // MedianSwap(v6, v7) - U
1671
 // MedianSwap(v3, v4) - V
1672
 // MedianSwap(v6, v7) - V
1673
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(4,0)<32;16,2>
1674
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
1675
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
1676
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
1677
 
1678
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(3,0)<32;16,2>
1679
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(6,0)<32;16,2>
1680
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(3,1)<32;16,2>
1681
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(6,1)<32;16,2>
1682
 
1683
 (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2>   ubMEDIAN_TEMP(4,0)<32;16,2>
1684
 (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2>	ubMEDIAN_TEMP(7,0)<32;16,2>
1685
 (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2>  	ubMEDIAN_TEMP(4,1)<32;16,2>
1686
 (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2>	ubMEDIAN_TEMP(7,1)<32;16,2>
1687
 
1688
 (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubTEMP1(0,0)<16;16,1>
1689
 (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>
1690
 (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubTEMP1(1,0)<16;16,1>
1691
 (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>
1692
 
1693
 // MedianSwap(v1, v2) - U
1694
 // MedianSwap(v4, v5) - U
1695
 // MedianSwap(v1, v2) - V
1696
 // MedianSwap(v4, v5) - V
1697
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
1698
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(5,0)<32;16,2>
1699
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
1700
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
1701
 
1702
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>
1703
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>
1704
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>
1705
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>
1706
 
1707
 (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>   ubMEDIAN_TEMP(2,0)<32;16,2>
1708
 (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>	ubMEDIAN_TEMP(5,0)<32;16,2>
1709
 (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
1710
 (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>	ubMEDIAN_TEMP(5,1)<32;16,2>
1711
 
1712
 (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>
1713
 (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>
1714
 (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>
1715
 (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>
1716
 
1717
 // MedianSwap(v7, v8) - U
1718
 // MedianSwap(v0, v3) - U
1719
 // MedianSwap(v7, v8) - V
1720
 // MedianSwap(v0, v3) - V
1721
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
1722
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(3,0)<32;16,2>
1723
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
1724
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(3,1)<32;16,2>
1725
 
1726
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>
1727
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>
1728
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>
1729
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>
1730
 
1731
 (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>
1732
 (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(3,0)<32;16,2>
1733
 (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>
1734
 (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(3,1)<32;16,2>
1735
 
1736
 (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>
1737
 (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2>     ubTEMP1(0,16)<16;16,1>
1738
 (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>
1739
 (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2>     ubTEMP1(1,16)<16;16,1>
1740
 
1741
 // NOTE:
1742
 // Compare v0 to v6 to find the minimum.
1743
 // Store the minimum for future use.
1744
 //TODO - Find if MIN is needed.
1745
 //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
1746
 //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
1747
 //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
1748
 //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
1749
 //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
1750
 //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
1751
 
1752
 // MedianSwap(v5, v8) - U
1753
 // MedianSwap(v4, v7) - U
1754
 // MedianSwap(v5, v8) - V
1755
 // MedianSwap(v4, v7) - V
1756
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(5,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
1757
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
1758
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(5,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
1759
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
1760
 
1761
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(5,0)<32;16,2>
1762
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>
1763
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(5,1)<32;16,2>
1764
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>
1765
 
1766
 (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2>    	ubMEDIAN_TEMP(8,0)<32;16,2>
1767
 (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(7,0)<32;16,2>
1768
 (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>
1769
 (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(7,1)<32;16,2>
1770
 
1771
 (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>
1772
 (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>
1773
 (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>
1774
 (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>
1775
 
1776
// NOTE:
1777
// Compare v2 to v8 to find the maximum.
1778
// Store the maximum for future use.
1779
 //TODO - Find if MAX is needed.
1780
// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
1781
// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
1782
//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
1783
//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
1784
//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
1785
//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
1786
 
1787
// MedianSwap(v3, v6) - U
1788
// MedianSwap(v1, v4) - U
1789
// MedianSwap(v3, v6) - V
1790
// MedianSwap(v1, v4) - V
1791
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(6,0)<32;16,2>
1792
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  	ubMEDIAN_TEMP(4,0)<32;16,2>
1793
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(6,1)<32;16,2>
1794
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
1795
 
1796
 (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2>     ubMEDIAN_TEMP(3,0)<32;16,2>
1797
 (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(1,0)<32;16,2>
1798
 (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2>     ubMEDIAN_TEMP(3,1)<32;16,2>
1799
 (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(1,1)<32;16,2>
1800
 
1801
 // MedianSwap(v2,v5) - U
1802
 // MedianSwap(v4,v7) - U
1803
 // MedianSwap(v2,v5) - V
1804
 // MedianSwap(v4,v7) - V
1805
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(2,0)<32;16,2>   	ubMEDIAN_TEMP(5,0)<32;16,2>
1806
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
1807
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(2,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
1808
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
1809
 
1810
 (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubMEDIAN_TEMP(5,0)<32;16,2>
1811
 (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(7,0)<32;16,2>
1812
 (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubMEDIAN_TEMP(5,1)<32;16,2>
1813
 (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(7,1)<32;16,2>
1814
 
1815
 // MedianSwap(v4,v2) - U
1816
 // MedianSwap(v4,v2) - V
1817
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
1818
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
1819
 
1820
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(4,0)<32;16,2>
1821
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>
1822
 
1823
 (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>
1824
 (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>
1825
 
1826
 (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>
1827
 (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(0,16)<16;16,1>
1828
 
1829
 // MedianSwap(v6,v4) - U
1830
 // MedianSwap(v6,v4) - V
1831
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>   ubMEDIAN_TEMP(4,0)<32;16,2>
1832
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2>   ubMEDIAN_TEMP(4,1)<32;16,2>
1833
 
1834
 (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(6,0)<32;16,2>
1835
 (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(6,1)<32;16,2>
1836
 
1837
 // MedianSwap(v4,v2) - U
1838
 // MedianSwap(v4,v2) - V
1839
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
1840
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
1841
 
1842
 (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>
1843
 (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>
1844
cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  ubMEDIAN_TEMP(11,0)<32;16,2>
1845
cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  ubMEDIAN_TEMP(14,0)<32;16,2>
1846
cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
1847
cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
1848
 
1849
       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>
1850
       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>
1851
       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>
1852
		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>
1853
 
1854
(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>     	ubMEDIAN_TEMP(11,0)<32;16,2>
1855
(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(14,0)<32;16,2>
1856
(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  		ubMEDIAN_TEMP(11,1)<32;16,2>
1857
(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(14,1)<32;16,2>
1858
 
1859
(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>
1860
(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>
1861
(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>
1862
(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>
1863
 
1864
// MedianSwap(v7, v8) - U
1865
// MedianSwap(v0, v1) - U
1866
// MedianSwap(v7, v8) - V
1867
// MedianSwap(v0, v1) - V
1868
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
1869
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(10,0)<32;16,2>
1870
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
1871
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(10,1)<32;16,2>
1872
 
1873
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>
1874
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>
1875
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>
1876
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>
1877
 
1878
 (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>
1879
 (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(10,0)<32;16,2>
1880
 (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>
1881
 (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(10,1)<32;16,2>
1882
 
1883
 (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>
1884
 (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2>     ubTEMP1(0,16)<16;16,1>
1885
 (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>
1886
 (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2>     ubTEMP1(1,16)<16;16,1>
1887
 
1888
 // MedianSwap(v3, v4) - U
1889
 // MedianSwap(v6, v7) - U
1890
 // MedianSwap(v3, v4) - V
1891
 // MedianSwap(v6, v7) - V
1892
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(13,0)<32;16,2>
1893
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
1894
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
1895
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
1896
 
1897
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(12,0)<32;16,2>
1898
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(15,0)<32;16,2>
1899
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(12,1)<32;16,2>
1900
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(15,1)<32;16,2>
1901
 
1902
 (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2>   ubMEDIAN_TEMP(13,0)<32;16,2>
1903
 (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2>	ubMEDIAN_TEMP(16,0)<32;16,2>
1904
 (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2>  	ubMEDIAN_TEMP(13,1)<32;16,2>
1905
 (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2>	ubMEDIAN_TEMP(16,1)<32;16,2>
1906
 
1907
 (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubTEMP1(0,0)<16;16,1>
1908
 (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>
1909
 (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubTEMP1(1,0)<16;16,1>
1910
 (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>
1911
 
1912
 // MedianSwap(v1, v2) - U
1913
 // MedianSwap(v4, v5) - U
1914
 // MedianSwap(v1, v2) - V
1915
 // MedianSwap(v4, v5) - V
1916
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
1917
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(14,0)<32;16,2>
1918
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
1919
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
1920
 
1921
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>
1922
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>
1923
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>
1924
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>
1925
 
1926
 (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>   ubMEDIAN_TEMP(11,0)<32;16,2>
1927
 (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>	ubMEDIAN_TEMP(14,0)<32;16,2>
1928
 (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
1929
 (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>	ubMEDIAN_TEMP(14,1)<32;16,2>
1930
 
1931
 (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>
1932
 (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>
1933
 (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>
1934
 (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>
1935
 
1936
 // MedianSwap(v7, v8) - U
1937
 // MedianSwap(v0, v3) - U
1938
 // MedianSwap(v7, v8) - V
1939
 // MedianSwap(v0, v3) - V
1940
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
1941
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(12,0)<32;16,2>
1942
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
1943
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(12,1)<32;16,2>
1944
 
1945
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>
1946
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>
1947
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>
1948
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>
1949
 
1950
 (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>
1951
 (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(12,0)<32;16,2>
1952
 (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>
1953
 (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(12,1)<32;16,2>
1954
 
1955
 (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>
1956
 (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2>     ubTEMP1(0,16)<16;16,1>
1957
 (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>
1958
 (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2>     ubTEMP1(1,16)<16;16,1>
1959
 
1960
 // NOTE:
1961
 // Compare v0 to v6 to find the minimum.
1962
 // Store the minimum for future use.
1963
 //TODO - Find if MIN is needed.
1964
 //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
1965
 //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
1966
 //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
1967
 //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
1968
 //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
1969
 //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
1970
 
1971
 // MedianSwap(v5, v8) - U
1972
 // MedianSwap(v4, v7) - U
1973
 // MedianSwap(v5, v8) - V
1974
 // MedianSwap(v4, v7) - V
1975
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(14,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
1976
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
1977
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(14,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
1978
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
1979
 
1980
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(14,0)<32;16,2>
1981
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>
1982
        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(14,1)<32;16,2>
1983
 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>
1984
 
1985
 (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2>    	ubMEDIAN_TEMP(17,0)<32;16,2>
1986
 (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(16,0)<32;16,2>
1987
 (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>
1988
 (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(16,1)<32;16,2>
1989
 
1990
 (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>
1991
 (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>
1992
 (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>
1993
 (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>
1994
 
1995
// NOTE:
1996
// Compare v2 to v8 to find the maximum.
1997
// Store the maximum for future use.
1998
 //TODO - Find if MAX is needed.
1999
// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
2000
// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
2001
//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
2002
//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
2003
//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
2004
//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
2005
 
2006
// MedianSwap(v3, v6) - U
2007
// MedianSwap(v1, v4) - U
2008
// MedianSwap(v3, v6) - V
2009
// MedianSwap(v1, v4) - V
2010
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(15,0)<32;16,2>
2011
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  	ubMEDIAN_TEMP(13,0)<32;16,2>
2012
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(15,1)<32;16,2>
2013
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
2014
 
2015
 (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2>     ubMEDIAN_TEMP(12,0)<32;16,2>
2016
 (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(10,0)<32;16,2>
2017
 (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2>     ubMEDIAN_TEMP(12,1)<32;16,2>
2018
 (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(10,1)<32;16,2>
2019
 
2020
 // MedianSwap(v2,v5) - U
2021
 // MedianSwap(v4,v7) - U
2022
 // MedianSwap(v2,v5) - V
2023
 // MedianSwap(v4,v7) - V
2024
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(11,0)<32;16,2>   	ubMEDIAN_TEMP(14,0)<32;16,2>
2025
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
2026
 cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(11,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
2027
 cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
2028
 
2029
 (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubMEDIAN_TEMP(14,0)<32;16,2>
2030
 (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(16,0)<32;16,2>
2031
 (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubMEDIAN_TEMP(14,1)<32;16,2>
2032
 (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(16,1)<32;16,2>
2033
 
2034
 // MedianSwap(v4,v2) - U
2035
 // MedianSwap(v4,v2) - V
2036
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
2037
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
2038
 
2039
        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(13,0)<32;16,2>
2040
        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>
2041
 
2042
 (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>
2043
 (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>
2044
 
2045
 (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>
2046
 (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(0,16)<16;16,1>
2047
 
2048
 // MedianSwap(v6,v4) - U
2049
 // MedianSwap(v6,v4) - V
2050
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>   ubMEDIAN_TEMP(13,0)<32;16,2>
2051
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2>   ubMEDIAN_TEMP(13,1)<32;16,2>
2052
 
2053
 (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(15,0)<32;16,2>
2054
 (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(15,1)<32;16,2>
2055
 
2056
 // MedianSwap(v4,v2) - U
2057
 // MedianSwap(v4,v2) - V
2058
 cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
2059
 cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
2060
 
2061
 (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>
2062
 (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>
2063
 
2064
// Sobel Value calculation for the current pixel v4
2065
//          v2 v1 v0
2066
//           *  *  *     <--- Different field - not used
2067
//          v5 v4 v3
2068
//           *  *  *     <--- Different field - not used
2069
//          v8 v7 v6
2070
//
2071
//    Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
2072
//    Gy =  v0 + 2*v1 + v2 - v6 - 2*v7 - v8
2073
//
2074
//  Sobel = (|Gx| + |Gy|) >> 3
2075
 
2076
//TODO - Change Later - rT
2077
add (1) a0.0:uw  a0.0<0;1,0>:uw -128:uw
2078
 
2079
// - 2 * v3
2080
mul (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-2:w
2081
// + v8
2082
mac (16) acc0.0<1>:w  		r[a0.0,132]<16;16,1>:ub   		1:w
2083
// - v0
2084
mac (16) acc0.0<1>:w  		r[a0.0,0]<16;16,1>:ub  		-1:w
2085
// - v6
2086
mac (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-1:w
2087
// + v2
2088
mac (16) acc0.0<1>:w  		r[a0.0,4]<16;16,1>:ub   		1:w
2089
// + 2 * v5
2090
mac (16) wSOBEL_X(0)<1> 	r[a0.0,68]<16;16,1>:ub   		2:w
2091
// - 2 * v3
2092
mul (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-2:w
2093
// + v8
2094
mac (16) acc0.0<1>:w  		r[a0.0,164]<16;16,1>:ub   		1:w
2095
// - v0
2096
mac (16) acc0.0<1>:w  		r[a0.0,32]<16;16,1>:ub  		-1:w
2097
// - v6
2098
mac (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-1:w
2099
// + v2
2100
mac (16) acc0.0<1>:w  		r[a0.0,36]<16;16,1>:ub   		1:w
2101
// + 2 * v5
2102
mac (16) wSOBEL_X(1)<1> 	r[a0.0,100]<16;16,1>:ub   		2:w
2103
// - 2 * v3
2104
mul (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-2:w
2105
// + v8
2106
mac (16) acc0.0<1>:w  		r[a0.0,196]<16;16,1>:ub   		1:w
2107
// - v0
2108
mac (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-1:w
2109
// - v6
2110
mac (16) acc0.0<1>:w  		r[a0.0,192]<16;16,1>:ub  		-1:w
2111
// + v2
2112
mac (16) acc0.0<1>:w  		r[a0.0,68]<16;16,1>:ub   		1:w
2113
// + 2 * v5
2114
mac (16) wSOBEL_X(2)<1> 	r[a0.0,132]<16;16,1>:ub   		2:w
2115
// - 2 * v3
2116
mul (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-2:w
2117
// + v8
2118
mac (16) acc0.0<1>:w  		r[a0.0,228]<16;16,1>:ub   		1:w
2119
// - v0
2120
mac (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-1:w
2121
// - v6
2122
mac (16) acc0.0<1>:w  		r[a0.0,224]<16;16,1>:ub  		-1:w
2123
// + v2
2124
mac (16) acc0.0<1>:w  		r[a0.0,100]<16;16,1>:ub   		1:w
2125
// + 2 * v5
2126
mac (16) wSOBEL_X(3)<1> 	r[a0.0,164]<16;16,1>:ub   		2:w
2127
 
2128
// + 2 * v1
2129
mul (16) acc0.0<1>:w  r[a0.0,2]<16;16,1>:ub   	2:w
2130
// + v0
2131
mac (16) acc0.0<1>:w  r[a0.0,0]<16;16,1>:ub   	1:w
2132
// - v8
2133
mac (16) acc0.0<1>:w  r[a0.0,132]<16;16,1>:ub  -1:w
2134
// + v2
2135
mac (16) acc0.0<1>:w  r[a0.0,4]<16;16,1>:ub   	1:w
2136
// - v6
2137
mac (16) acc0.0<1>:w  r[a0.0,128]<16;16,1>:ub  -1:w
2138
// - 2 * v7
2139
mac (16) acc0.0<1>:w  r[a0.0,130]<16;16,1>:ub  -2:w
2140
 
2141
add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(0)<16;16,1>
2142
 
2143
shr (16) uwSOBEL(0)<1>	acc0.0<16;16,1>:uw   3:uw
2144
// + 2 * v1
2145
mul (16) acc0.0<1>:w  r[a0.0,34]<16;16,1>:ub   	2:w
2146
// + v0
2147
mac (16) acc0.0<1>:w  r[a0.0,32]<16;16,1>:ub   	1:w
2148
// - v8
2149
mac (16) acc0.0<1>:w  r[a0.0,164]<16;16,1>:ub  -1:w
2150
// + v2
2151
mac (16) acc0.0<1>:w  r[a0.0,36]<16;16,1>:ub   	1:w
2152
// - v6
2153
mac (16) acc0.0<1>:w  r[a0.0,160]<16;16,1>:ub  -1:w
2154
// - 2 * v7
2155
mac (16) acc0.0<1>:w  r[a0.0,162]<16;16,1>:ub  -2:w
2156
 
2157
add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(1)<16;16,1>
2158
 
2159
shr (16) uwSOBEL(1)<1>	acc0.0<16;16,1>:uw   3:uw
2160
// + 2 * v1
2161
mul (16) acc0.0<1>:w  r[a0.0,66]<16;16,1>:ub   	2:w
2162
// + v0
2163
mac (16) acc0.0<1>:w  r[a0.0,64]<16;16,1>:ub   	1:w
2164
// - v8
2165
mac (16) acc0.0<1>:w  r[a0.0,196]<16;16,1>:ub  -1:w
2166
// + v2
2167
mac (16) acc0.0<1>:w  r[a0.0,68]<16;16,1>:ub   	1:w
2168
// - v6
2169
mac (16) acc0.0<1>:w  r[a0.0,192]<16;16,1>:ub  -1:w
2170
// - 2 * v7
2171
mac (16) acc0.0<1>:w  r[a0.0,194]<16;16,1>:ub  -2:w
2172
 
2173
add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(2)<16;16,1>
2174
 
2175
shr (16) uwSOBEL(2)<1>	acc0.0<16;16,1>:uw   3:uw
2176
// + 2 * v1
2177
mul (16) acc0.0<1>:w  r[a0.0,98]<16;16,1>:ub   	2:w
2178
// + v0
2179
mac (16) acc0.0<1>:w  r[a0.0,96]<16;16,1>:ub   	1:w
2180
// - v8
2181
mac (16) acc0.0<1>:w  r[a0.0,228]<16;16,1>:ub  -1:w
2182
// + v2
2183
mac (16) acc0.0<1>:w  r[a0.0,100]<16;16,1>:ub   	1:w
2184
// - v6
2185
mac (16) acc0.0<1>:w  r[a0.0,224]<16;16,1>:ub  -1:w
2186
// - 2 * v7
2187
mac (16) acc0.0<1>:w  r[a0.0,226]<16;16,1>:ub  -2:w
2188
 
2189
add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(3)<16;16,1>
2190
 
2191
shr (16) uwSOBEL(3)<1>	acc0.0<16;16,1>:uw   3:uw
2192
 
2193
//Mov Median in CURBE_TEMP to free up temp space.
2194
mov (16)	ubMEDIAN(0,0)<1>  	ubMEDIAN_TEMP(4,0)<16;16,1>
2195
mov (16)	ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>
2196
mov (16)	ubMEDIAN(0,32)<1>  	ubMEDIAN_TEMP(13,0)<16;16,1>
2197
mov (16)	ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>
2198
 
2199
// Find:
2200
//      absDiff = abs(ubCurY - ubMedian)
2201
// Find the difference between pixel and median value.
2202
 
2203
//Median is interleaved. So difference is also interleaved.
2204
 
2205
//------------------------------------------------------------------------------------------
2206
//Process 16 U and 16 V pixels here and rest later.
2207
// first row - v0,v1,v2
2208
add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2209
add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2210
add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2211
 
2212
// second row - v3,v4,v5
2213
add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2214
add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2215
add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2216
 
2217
// third row - v6,v7,v8
2218
add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2219
add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2220
add (16) wDIFF(8)<1>   r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
2221
// first row - v0,v1,v2
2222
add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2223
add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2224
add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2225
 
2226
// second row - v3,v4,v5
2227
add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2228
add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2229
add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2230
 
2231
// third row - v6,v7,v8
2232
add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2233
add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2234
add (16) wDIFF(17)<1>   r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
2235
 
2236
//TODO - Change Later - rT
2237
add (1) a0.0:uw  a0.0<0;1,0>:uw 64:uw
2238
 
2239
// Find sum of all absolute differences AND
2240
// maximum absolute difference for 16 U and 16 V here.
2241
//First 2 rows of 8x4
2242
//Compare 0-1, 2-3, 4-5, 6-7
2243
cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
2244
cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
2245
cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
2246
cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
2247
 
2248
//Calculate SAD
2249
	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
2250
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
2251
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
2252
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
2253
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
2254
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
2255
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
2256
	add        (16) uwSOAD(0)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
2257
 
2258
(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
2259
(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
2260
(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
2261
(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
2262
 
2263
//------------
2264
	//DIFF(0-7) is not needed here. Populate it.
2265
	// first row - v0,v1,v2
2266
	add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2267
	add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2268
	add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2269
 
2270
	// second row - v3,v4,v5
2271
	add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2272
	add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2273
	add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2274
 
2275
	// third row - v6,v7
2276
	add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2277
	add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2278
//------------
2279
 
2280
//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
2281
cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2282
cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2283
 
2284
(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2285
(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2286
 
2287
//Compare Max(0,1,2,3) - Max(4,5,6,7)
2288
cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2289
(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2290
 
2291
//Compare Max(0,1,2,3,4,5,6,7) - 8
2292
cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
2293
(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
2294
 
2295
//------------
2296
	//Load v8 - DIFF(8)
2297
	add (16) wDIFF(8)<1>   			r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
2298
//------------
2299
//Compare 0-1, 2-3, 4-5, 6-7
2300
cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
2301
cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
2302
cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
2303
cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
2304
 
2305
//Calculate SAD
2306
	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
2307
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
2308
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
2309
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
2310
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
2311
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
2312
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
2313
	add        (16) uwSOAD(1)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
2314
 
2315
(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
2316
(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
2317
(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
2318
(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
2319
 
2320
//------------
2321
	//DIFF(0-7) is not needed here. Populate it.
2322
	// first row - v0,v1,v2
2323
	add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2324
	add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2325
	add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2326
 
2327
	// second row - v3,v4,v5
2328
	add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2329
	add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2330
	add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2331
 
2332
	// third row - v6,v7
2333
	add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2334
	add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2335
//------------
2336
 
2337
//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
2338
cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2339
cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2340
 
2341
(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2342
(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2343
 
2344
//Compare Max(0,1,2,3) - Max(4,5,6,7)
2345
cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2346
(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2347
 
2348
//Compare Max(0,1,2,3,4,5,6,7) - 8
2349
cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
2350
(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
2351
 
2352
//------------
2353
	//Load v8 - DIFF(8)
2354
	add (16) wDIFF(17)<1>   			r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
2355
//------------
2356
 
2357
//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
2358
//						if (sigma_mb_min > sigma)
2359
//							sigma_mb_min = sigma;
2360
 
2361
//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
2362
//		So just do -
2363
//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
2364
//							sigma_mb_min = sigma;
2365
 
2366
//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
2367
//Compare first 8 bytes with max possible (255).
2368
//Start above condition from second 8 bytes.
2369
 
2370
//TODO - Change Later - rT
2371
//	mov (1)	pCUR_MIN_SOAD_8x4:uw	1752:uw		//r54.24:ub
2372
 
2373
//First row of 8x4
2374
        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(0)<16;16,1>         r55.30<0;2,1>:ub
2375
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
2376
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			255:uw
2377
(f0.0)  sel 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>			255:uw
2378
 
2379
//Second row of 8x4
2380
		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(1)<16;16,1>         r55.30<0;2,1>:ub
2381
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
2382
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
2383
(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
2384
 
2385
// Find sum of all absolute differences AND
2386
// maximum absolute difference for 16 U and 16 V here.
2387
//Second 2 rows of 8x4
2388
//Compare 0-1, 2-3, 4-5, 6-7
2389
cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
2390
cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
2391
cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
2392
cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
2393
 
2394
//Calculate SAD
2395
	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
2396
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
2397
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
2398
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
2399
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
2400
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
2401
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
2402
	add        (16) uwSOAD(0)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
2403
 
2404
(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
2405
(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
2406
(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
2407
(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
2408
 
2409
//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
2410
cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2411
cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2412
 
2413
(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2414
(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2415
 
2416
//Compare Max(0,1,2,3) - Max(4,5,6,7)
2417
cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2418
(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2419
 
2420
//Compare Max(0,1,2,3,4,5,6,7) - 8
2421
cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
2422
(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
2423
//Compare 0-1, 2-3, 4-5, 6-7
2424
cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
2425
cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
2426
cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
2427
cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
2428
 
2429
//Calculate SAD
2430
	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
2431
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
2432
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
2433
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
2434
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
2435
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
2436
	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
2437
	add        (16) uwSOAD(1)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
2438
 
2439
(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
2440
(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
2441
(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
2442
(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
2443
 
2444
//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
2445
cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2446
cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2447
 
2448
(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
2449
(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
2450
 
2451
//Compare Max(0,1,2,3) - Max(4,5,6,7)
2452
cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2453
(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
2454
 
2455
//Compare Max(0,1,2,3,4,5,6,7) - 8
2456
cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
2457
(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
2458
 
2459
//Third row of 8x4
2460
        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(2)<16;16,1>     	r55.30<0;2,1>:ub
2461
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
2462
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			uwSOBEL(0)<16;16,1>
2463
(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>
2464
 
2465
//Fourth row of 8x4
2466
		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(3)<16;16,1>     	r55.30<0;2,1>:ub
2467
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
2468
(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
2469
(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
2470
 
2471
		cmp.l.f0.0 	(8) null:uw     		uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
2472
(f0.0)  sel 		(8) uwSOBEL(0)<1>   	uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
2473
 
2474
		cmp.l.f0.0 	(4) null:uw     		uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
2475
(f0.0)  sel 		(4) uwSOBEL(0)<1>   	uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
2476
 
2477
		cmp.l.f0.0 	(2) null:uw     					uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
2478
(f0.0)  sel 		(2) r[a0.1,0]<1>:uw   	uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
2479
 
2480
 
2481
 
2482
 
2483
 
2484
 
2485
// End of common.inc
2486
 
2487
mov (1) ip:ud r7.7<0;1,0>:d
2488
 
2489
 
2490
.end_code
2491
.end_kernel