Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5361 | serge | 1 | /* |
2 | * Copyright © <2010>, Intel Corporation. |
||
3 | * |
||
4 | * This program is licensed under the terms and conditions of the |
||
5 | * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at |
||
6 | * http://www.opensource.org/licenses/eclipse-1.0.php. |
||
7 | * |
||
8 | */ |
||
9 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
||
10 | // AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp) |
||
11 | // |
||
12 | // First de-block vertical edges from left to right. |
||
13 | // Second de-block horizontal edge from top to bottom. |
||
14 | // |
||
15 | // For 4:2:0, chroma is always de-blocked at 8x8. |
||
16 | // NV12 format allows to filter U and V together. |
||
17 | // |
||
18 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
||
19 | #define AVC_ILDB |
||
20 | |||
21 | .kernel AVC_ILDB_CHILD_UV |
||
22 | #if defined(COMBINED_KERNEL) |
||
23 | ILDB_LABEL(AVC_ILDB_CHILD_UV): |
||
24 | #endif |
||
25 | |||
26 | #include "SetupVPKernel.asm" |
||
27 | #include "AVC_ILDB.inc" |
||
28 | |||
29 | #if defined(_DEBUG) |
||
30 | mov (1) EntrySignatureC:w 0x9997:w |
||
31 | #endif |
||
32 | |||
33 | // Init local variables |
||
34 | shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times |
||
35 | |||
36 | // Init addr register for vertical control data |
||
37 | mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init ECM_AddrReg |
||
38 | |||
39 | //=== Null Kernel =============================================================== |
||
40 | // jmpi ILDB_LABEL(POST_ILDB_UV_UV) |
||
41 | //=============================================================================== |
||
42 | |||
43 | #if defined(DEV_CL) |
||
44 | mov (1) acc0.0:w 240:w |
||
45 | #else |
||
46 | //==================================================================================== |
||
47 | // For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C. |
||
48 | // MB_offset = MBsCntX * CurRow + CurCol |
||
49 | // MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64 |
||
50 | mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w |
||
51 | add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w |
||
52 | |||
53 | // Assign to MSGSRC.2:ud for memory access |
||
54 | // mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 64:uw |
||
55 | mul (1) MSGSRC.2:ud CntrlDataOffsetY:ud 64:uw |
||
56 | |||
57 | mov (1) acc0.0:w 320:w |
||
58 | #endif |
||
59 | mac (1) URBOffsetC:w ORIY:w 4:w // UV URB entries are right after Y entries |
||
60 | |||
61 | |||
62 | // Init local variables |
||
63 | // shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times |
||
64 | add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w |
||
65 | add (1) ORIY_TOP:w ORIY_TOP:w -4:w |
||
66 | |||
67 | // Build a ramp from 0 to 15 |
||
68 | mov (16) RRampW(0)<1> RampConstC<0;8,1>:ub |
||
69 | add (8) RRampW(0,8)<1> RRampW(0,8) 8:w // RRampW = ramp 15-0 |
||
70 | |||
71 | // Load current MB control data |
||
72 | #if defined(DEV_CL) |
||
73 | #if defined(_APPLE) |
||
74 | #include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only |
||
75 | #else |
||
76 | #include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline |
||
77 | #endif |
||
78 | #else |
||
79 | #include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond |
||
80 | #endif |
||
81 | |||
82 | // Check loaded control data |
||
83 | #if defined(_APPLE) |
||
84 | and.z.f0.1 (8) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw 0xFFFF:uw // Skip ILDB? |
||
85 | (f0.1) and.z.f0.1 (2) null<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw 0xFFFF:uw // Skip ILDB? |
||
86 | #else |
||
87 | and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB? |
||
88 | #endif |
||
89 | |||
90 | and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB? |
||
91 | |||
92 | mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset |
||
93 | |||
94 | #if defined(_APPLE) |
||
95 | (f0.1.all8h) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB |
||
96 | #else |
||
97 | (f0.1.all16h) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB |
||
98 | #endif |
||
99 | |||
100 | (f0.0) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB |
||
101 | |||
102 | |||
103 | |||
104 | #include "load_Cur_UV_8x8T.asm" // Load transposed data 8x8 |
||
105 | // #include "load_Left_UV_2x8T.asm" |
||
106 | #include "load_Top_UV_8x2.asm" // Load top MB (8x2) Y data from memory if exists |
||
107 | |||
108 | #include "Transpose_Cur_UV_8x8.asm" |
||
109 | // #include "Transpose_Left_UV_2x8.asm" |
||
110 | |||
111 | |||
112 | //---------- Perform vertical ILDB filting on UV ---------- |
||
113 | #include "AVC_ILDB_Filter_UV_v.asm" |
||
114 | //--------------------------------------------------------- |
||
115 | |||
116 | #include "save_Left_UV_8x2T.asm" // Write left MB (2x8) Y data to memory if exists |
||
117 | #include "Transpose_Cur_UV_8x8.asm" // Transpose a MB for horizontal edge de-blocking |
||
118 | |||
119 | //---------- Perform horizontal ILDB filting on UV ---------- |
||
120 | #include "AVC_ILDB_Filter_UV_h.asm" |
||
121 | //----------------------------------------------------------- |
||
122 | |||
123 | #include "save_Cur_UV_8x8.asm" // Write 8x8 |
||
124 | #include "save_Top_UV_8x2.asm" // Write top MB (8x2) if not the top row |
||
125 | |||
126 | //---------- Write right most 4 columns of cur MB to URB ---------- |
||
127 | // Transpose the right most 2 cols 2x8 (word) in GRF to 8x2 in BUF_D. It is 2 left most cols in cur MB. |
||
128 | #include "Transpose_Cur_UV_2x8.asm" |
||
129 | |||
130 | ILDB_LABEL(WRITE_URB_UV): |
||
131 | mov (8) m1<1>:ud LEFT_TEMP_D(1)<8;8,1> // Copy 1 GRF to 1 URB entry (U+V) |
||
132 | |||
133 | #include "writeURB_UV_Child.asm" |
||
134 | //----------------------------------------------------------------- |
||
135 | |||
136 | //=========== Check write commit of the last write ============ |
||
137 | mov (8) WritebackResponse(0)<1> WritebackResponse(0) |
||
138 | |||
139 | ILDB_LABEL(POST_ILDB_UV): |
||
140 | //--------------------------------- |
||
141 | |||
142 | // Send notification thru Gateway to root thread, update chroma Status[CurRow] |
||
143 | #include "AVC_ILDB_ForwardMsg.asm" |
||
144 | |||
145 | #if !defined(GW_DCN) // For non-ILK chipsets |
||
146 | //child send EOT : Request type = 1 |
||
147 | END_CHILD_THREAD |
||
148 | #endif // !defined(DEV_ILK) |
||
149 | |||
150 | // The thread finishs here |
||
151 | //------------------------------------------------------------------------------ |
||
152 | |||
153 | ILDB_LABEL(READ_FOR_URB_UV): |
||
154 | // Still need to prepare URB data for the right neighbor MB |
||
155 | #include "load_Cur_UV_Right_Most_2x8.asm" // Load cur MB ( right most 4x16) Y data from memory |
||
156 | #include "Transpose_Cur_UV_Right_Most_2x8.asm" |
||
157 | // jmpi ILDB_LABEL(WRITE_URB_UV) |
||
158 | |||
159 | mov (8) m1<1>:ud LEFT_TEMP_D(1)<8;8,1> // Copy 1 GRF to 1 URB entry (U+V) |
||
160 | |||
161 | #include "writeURB_UV_Child.asm" |
||
162 | //----------------------------------------------------------------- |
||
163 | |||
164 | // Send notification thru Gateway to root thread, update chroma Status[CurRow] |
||
165 | #include "AVC_ILDB_ForwardMsg.asm" |
||
166 | |||
167 | #if !defined(GW_DCN) // For non-ILK chipsets |
||
168 | //child send EOT : Request type = 1 |
||
169 | END_CHILD_THREAD |
||
170 | #endif // !defined(DEV_ILK) |
||
171 | |||
172 | // The thread finishs here |
||
173 | //------------------------------------------------------------------------------ |
||
174 | |||
175 | |||
176 | //////////////////////////////////////////////////////////////////////////////// |
||
177 | // Include other subrutines being called |
||
178 | // #include "AVC_ILDB_Luma_Core.asm" |
||
179 | #include "AVC_ILDB_Chroma_Core.asm" |
||
180 | |||
181 | |||
182 | #if !defined(COMBINED_KERNEL) // For standalone kernel only |
||
183 | .end_code |
||
184 | |||
185 | .end_kernel |
||
186 | #endif8;8,1>1>1>8;8,1>1>16;16,1>1>2;2,1>1>8;8,1>1>1>0;8,1>1>0;2,1>1>1>0;2,1>1>2010> |