Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5361 | serge | 1 | /* |
2 | * Copyright © <2010>, Intel Corporation. |
||
3 | * |
||
4 | * This program is licensed under the terms and conditions of the |
||
5 | * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at |
||
6 | * http://www.opensource.org/licenses/eclipse-1.0.php. |
||
7 | * Author : Zhao Yakui |
||
8 | */ |
||
9 | // Modual name: mpeg2_inter_haswell.asm |
||
10 | // |
||
11 | // Make MPEG2 inter predition estimation for Inter-frame on Haswell |
||
12 | // |
||
13 | |||
14 | // |
||
15 | // Now, begin source code.... |
||
16 | // |
||
17 | |||
18 | #define SAVE_RET add (1) RETURN_REG<1>:ud ip:ud 32:ud |
||
19 | #define RETURN mov (1) ip:ud RETURN_REG<0,1,0>:ud |
||
20 | |||
21 | /* |
||
22 | * __START |
||
23 | */ |
||
24 | __INTER_START: |
||
25 | mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; |
||
26 | mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; |
||
27 | mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; |
||
28 | mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; |
||
29 | |||
30 | shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ |
||
31 | mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ |
||
32 | |||
33 | mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; |
||
34 | add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; |
||
35 | mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1}; |
||
36 | mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ |
||
37 | |||
38 | shl (2) pic_ref.0<1>:uw r4.24<2,2,1>:uw 4:uw {align1}; |
||
39 | mov (2) pic_ref.16<1>:uw r4.20<2,2,1>:uw {align1}; |
||
40 | mov (8) mb_mvp_ref.0<1>:ud 0:ud {align1}; |
||
41 | mov (8) mb_ref_win.0<1>:ud 0:ud {align1}; |
||
42 | mov (8) mba_result.0<1>:ud 0x0:ud {align1}; |
||
43 | mov (8) mbb_result.0<1>:ud 0x0:ud {align1}; |
||
44 | mov (8) mbc_result.0<1>:ud 0x0:ud {align1}; |
||
45 | |||
46 | and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; |
||
47 | (f0.0) jmpi (1) __mb_hwdep_end; |
||
48 | /* read back the data for MB A */ |
||
49 | /* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag), |
||
50 | * rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID) |
||
51 | */ |
||
52 | mba_start: |
||
53 | mov (8) mb_msg0.0<1>:ud 0:ud {align1}; |
||
54 | and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_AE:uw {align1}; |
||
55 | /* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */ |
||
56 | (f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; |
||
57 | (f0.0) jmpi (1) mbb_start; |
||
58 | mov (1) mba_result.0<1>:d MB_AVAIL {align1}; |
||
59 | mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; |
||
60 | add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; |
||
61 | mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; |
||
62 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; |
||
63 | mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; |
||
64 | mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ |
||
65 | |||
66 | /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ |
||
67 | send (16) |
||
68 | mb_ind |
||
69 | mb_wb.0<1>:ud |
||
70 | NULL |
||
71 | data_port( |
||
72 | OBR_CACHE_TYPE, |
||
73 | OBR_MESSAGE_TYPE, |
||
74 | OBR_CONTROL_4, |
||
75 | OBR_BIND_IDX, |
||
76 | OBR_WRITE_COMMIT_CATEGORY, |
||
77 | OBR_HEADER_PRESENT |
||
78 | ) |
||
79 | mlen 1 |
||
80 | rlen 2 |
||
81 | {align1}; |
||
82 | |||
83 | /* TODO: RefID is required after multi-references are added */ |
||
84 | cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; |
||
85 | (f0.0) mov (2) mba_result.20<1>:w -1:w {align1}; |
||
86 | (f0.0) jmpi (1) mbb_start; |
||
87 | |||
88 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; |
||
89 | /* Read MV for MB A */ |
||
90 | /* bind index 3, read 2 oword (16bytes), msg type: 0(OWord Block Read) */ |
||
91 | send (16) |
||
92 | mb_ind |
||
93 | mb_mv0.0<1>:ud |
||
94 | NULL |
||
95 | data_port( |
||
96 | OBR_CACHE_TYPE, |
||
97 | OBR_MESSAGE_TYPE, |
||
98 | OBR_CONTROL_2, |
||
99 | OBR_BIND_IDX, |
||
100 | OBR_WRITE_COMMIT_CATEGORY, |
||
101 | OBR_HEADER_PRESENT |
||
102 | ) |
||
103 | mlen 1 |
||
104 | rlen 1 |
||
105 | {align1}; |
||
106 | /* TODO: RefID is required after multi-references are added */ |
||
107 | /* MV */ |
||
108 | mov (2) mba_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; |
||
109 | mov (1) mba_result.16<1>:w MB_PRED_FLAG {align1}; |
||
110 | |||
111 | mbb_start: |
||
112 | mov (8) mb_msg0.0<1>:ud 0:ud {align1}; |
||
113 | and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_B:uw {align1}; |
||
114 | /* MB B doesn't exist. Zero MV. mba_flag is zero */ |
||
115 | /* If MB B doesn't exist, neither MB C nor D exists */ |
||
116 | (f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; |
||
117 | (f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; |
||
118 | (f0.0) jmpi (1) mb_mvp_start; |
||
119 | mov (1) mbb_result.0<1>:d MB_AVAIL {align1}; |
||
120 | mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; |
||
121 | add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; |
||
122 | mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; |
||
123 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; |
||
124 | mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; |
||
125 | mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ |
||
126 | |||
127 | /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ |
||
128 | send (16) |
||
129 | mb_ind |
||
130 | mb_wb.0<1>:ud |
||
131 | NULL |
||
132 | data_port( |
||
133 | OBR_CACHE_TYPE, |
||
134 | OBR_MESSAGE_TYPE, |
||
135 | OBR_CONTROL_4, |
||
136 | OBR_BIND_IDX, |
||
137 | OBR_WRITE_COMMIT_CATEGORY, |
||
138 | OBR_HEADER_PRESENT |
||
139 | ) |
||
140 | mlen 1 |
||
141 | rlen 2 |
||
142 | {align1}; |
||
143 | |||
144 | /* TODO: RefID is required after multi-references are added */ |
||
145 | cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; |
||
146 | (f0.0) mov (2) mbb_result.20<1>:w -1:w {align1}; |
||
147 | (f0.0) jmpi (1) mbc_start; |
||
148 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; |
||
149 | /* Read MV for MB B */ |
||
150 | /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ |
||
151 | send (16) |
||
152 | mb_ind |
||
153 | mb_mv0.0<1>:ud |
||
154 | NULL |
||
155 | data_port( |
||
156 | OBR_CACHE_TYPE, |
||
157 | OBR_MESSAGE_TYPE, |
||
158 | OBR_CONTROL_2, |
||
159 | OBR_BIND_IDX, |
||
160 | OBR_WRITE_COMMIT_CATEGORY, |
||
161 | OBR_HEADER_PRESENT |
||
162 | ) |
||
163 | mlen 1 |
||
164 | rlen 1 |
||
165 | {align1}; |
||
166 | /* TODO: RefID is required after multi-references are added */ |
||
167 | mov (2) mbb_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; |
||
168 | mov (1) mbb_result.16<1>:w MB_PRED_FLAG {align1}; |
||
169 | |||
170 | mbc_start: |
||
171 | mov (8) mb_msg0.0<1>:ud 0:ud {align1}; |
||
172 | and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_C:uw {align1}; |
||
173 | /* MB C doesn't exist. Zero MV. mba_flag is zero */ |
||
174 | /* Based on h264 spec the MB D will be replaced if MB C doesn't exist */ |
||
175 | (f0.0) jmpi (1) mbd_start; |
||
176 | mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; |
||
177 | mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; |
||
178 | add (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; |
||
179 | add (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w 1:w {align1}; |
||
180 | mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; |
||
181 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; |
||
182 | mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; |
||
183 | mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ |
||
184 | |||
185 | /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ |
||
186 | send (16) |
||
187 | mb_ind |
||
188 | mb_wb.0<1>:ud |
||
189 | NULL |
||
190 | data_port( |
||
191 | OBR_CACHE_TYPE, |
||
192 | OBR_MESSAGE_TYPE, |
||
193 | OBR_CONTROL_4, |
||
194 | OBR_BIND_IDX, |
||
195 | OBR_WRITE_COMMIT_CATEGORY, |
||
196 | OBR_HEADER_PRESENT |
||
197 | ) |
||
198 | mlen 1 |
||
199 | rlen 2 |
||
200 | {align1}; |
||
201 | |||
202 | /* TODO: RefID is required after multi-references are added */ |
||
203 | cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; |
||
204 | (f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; |
||
205 | (f0.0) jmpi (1) mb_mvp_start; |
||
206 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; |
||
207 | /* Read MV for MB C */ |
||
208 | /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ |
||
209 | send (16) |
||
210 | mb_ind |
||
211 | mb_mv0.0<1>:ud |
||
212 | NULL |
||
213 | data_port( |
||
214 | OBR_CACHE_TYPE, |
||
215 | OBR_MESSAGE_TYPE, |
||
216 | OBR_CONTROL_2, |
||
217 | OBR_BIND_IDX, |
||
218 | OBR_WRITE_COMMIT_CATEGORY, |
||
219 | OBR_HEADER_PRESENT |
||
220 | ) |
||
221 | mlen 1 |
||
222 | rlen 1 |
||
223 | {align1}; |
||
224 | /* TODO: RefID is required after multi-references are added */ |
||
225 | /* Forward MV */ |
||
226 | mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; |
||
227 | mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; |
||
228 | |||
229 | jmpi (1) mb_mvp_start; |
||
230 | mbd_start: |
||
231 | mov (8) mb_msg0.0<1>:ud 0:ud {align1}; |
||
232 | and.z.f0.0 (1) null:uw input_mb_intra_ub<0,1,0>:ub INTRA_PRED_AVAIL_FLAG_D:uw {align1}; |
||
233 | (f0.0) jmpi (1) mb_mvp_start; |
||
234 | mov (1) mbc_result.0<1>:d MB_AVAIL {align1}; |
||
235 | mov (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB {align1}; |
||
236 | add (2) tmp_reg0.0<1>:w tmp_reg0.0<2,2,1>:w -1:w {align1}; |
||
237 | mul (1) mb_msg0.8<1>:UD w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1}; |
||
238 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD tmp_reg0.0<0,1,0>:uw {align1}; |
||
239 | mul (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:UD 24:UD {align1}; |
||
240 | mov (1) mb_msg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ |
||
241 | |||
242 | /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */ |
||
243 | send (16) |
||
244 | mb_ind |
||
245 | mb_wb.0<1>:ud |
||
246 | NULL |
||
247 | data_port( |
||
248 | OBR_CACHE_TYPE, |
||
249 | OBR_MESSAGE_TYPE, |
||
250 | OBR_CONTROL_4, |
||
251 | OBR_BIND_IDX, |
||
252 | OBR_WRITE_COMMIT_CATEGORY, |
||
253 | OBR_HEADER_PRESENT |
||
254 | ) |
||
255 | mlen 1 |
||
256 | rlen 2 |
||
257 | {align1}; |
||
258 | |||
259 | cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1}; |
||
260 | (f0.0) mov (2) mbc_result.20<1>:w -1:w {align1}; |
||
261 | (f0.0) jmpi (1) mb_mvp_start; |
||
262 | |||
263 | add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1}; |
||
264 | /* Read MV for MB D */ |
||
265 | /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */ |
||
266 | send (16) |
||
267 | mb_ind |
||
268 | mb_mv0.0<1>:ub |
||
269 | NULL |
||
270 | data_port( |
||
271 | OBR_CACHE_TYPE, |
||
272 | OBR_MESSAGE_TYPE, |
||
273 | OBR_CONTROL_2, |
||
274 | OBR_BIND_IDX, |
||
275 | OBR_WRITE_COMMIT_CATEGORY, |
||
276 | OBR_HEADER_PRESENT |
||
277 | ) |
||
278 | mlen 1 |
||
279 | rlen 1 |
||
280 | {align1}; |
||
281 | |||
282 | /* TODO: RefID is required after multi-references are added */ |
||
283 | |||
284 | /* Forward MV */ |
||
285 | mov (2) mbc_result.4<1>:ud mb_mv0.0<2,2,1>:ud {align1}; |
||
286 | mov (1) mbc_result.16<1>:w MB_PRED_FLAG {align1}; |
||
287 | |||
288 | mb_mvp_start: |
||
289 | /*TODO: Add the skip prediction */ |
||
290 | /* Check whether both MB B and C are inavailable */ |
||
291 | add (1) tmp_reg0.0<1>:d mbb_result.0<0,1,0>:d mbc_result.0<0,1,0>:d {align1}; |
||
292 | cmp.z.f0.0 (1) null:d tmp_reg0.0<0,1,0>:d 0:d {align1}; |
||
293 | (-f0.0) jmpi (1) mb_median_start; |
||
294 | cmp.nz.f0.0 (1) null:d mba_result.0<0,1,0>:d 0:d {align1}; |
||
295 | (f0.0) mov (1) mbb_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; |
||
296 | (f0.0) mov (1) mbc_result.4<1>:ud mba_result.4<0,1,0>:ud {align1}; |
||
297 | (f0.0) mov (1) mbb_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; |
||
298 | (f0.0) mov (1) mbc_result.20<1>:uw mba_result.20<0,1,0>:uw {align1}; |
||
299 | (f0.0) mov (1) mb_mvp_ref.0<1>:ud mba_result.4<0,1,0>:ud {align1}; |
||
300 | (-f0.0) mov (1) mb_mvp_ref.0<1>:ud 0:ud {align1}; |
||
301 | jmpi (1) __mb_hwdep_end; |
||
302 | |||
303 | mb_median_start: |
||
304 | /* check whether only one neighbour MB has the same ref ID with the current MB */ |
||
305 | mov (8) tmp_reg0.0<1>:ud 0:ud {align1}; |
||
306 | cmp.z.f0.0 (1) null:d mba_result.20<1>:w 0:w {align1}; |
||
307 | (f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; |
||
308 | (f0.0) mov (1) tmp_reg0.4<1>:ud mba_result.4<0,1,0>:ud {align1}; |
||
309 | cmp.z.f0.0 (1) null:d mbb_result.20<1>:w 0:w {align1}; |
||
310 | (f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; |
||
311 | (f0.0) mov (1) tmp_reg0.4<1>:ud mbb_result.4<0,1,0>:ud {align1}; |
||
312 | cmp.z.f0.0 (1) null:d mbc_result.20<1>:w 0:w {align1}; |
||
313 | (f0.0) add (1) tmp_reg0.0<1>:w tmp_reg0.0<1>:w 1:w {align1}; |
||
314 | (f0.0) mov (1) tmp_reg0.4<1>:ud mbc_result.4<0,1,0>:ud {align1}; |
||
315 | cmp.e.f0.0 (1) null:d tmp_reg0.0<1>:w 1:w {align1}; |
||
316 | (f0.0) mov (1) mb_mvp_ref.0<1>:ud tmp_reg0.4<0,1,0>:ud {align1}; |
||
317 | (f0.0) jmpi (1) __mb_hwdep_end; |
||
318 | |||
319 | mov (1) INPUT_ARG0.0<1>:w mba_result.4<0,1,0>:w {align1}; |
||
320 | mov (1) INPUT_ARG0.4<1>:w mbb_result.4<0,1,0>:w {align1}; |
||
321 | mov (1) INPUT_ARG0.8<1>:w mbc_result.4<0,1,0>:w {align1}; |
||
322 | SAVE_RET {align1}; |
||
323 | jmpi (1) word_imedian; |
||
324 | mov (1) mb_mvp_ref.0<1>:w RET_ARG<0,1,0>:w {align1}; |
||
325 | mov (1) INPUT_ARG0.0<1>:w mba_result.6<0,1,0>:w {align1}; |
||
326 | mov (1) INPUT_ARG0.4<1>:w mbb_result.6<0,1,0>:w {align1}; |
||
327 | mov (1) INPUT_ARG0.8<1>:w mbc_result.6<0,1,0>:w {align1}; |
||
328 | SAVE_RET {align1}; |
||
329 | jmpi (1) word_imedian; |
||
330 | mov (1) mb_mvp_ref.2<1>:w RET_ARG<0,1,0>:w {align1}; |
||
331 | |||
332 | __mb_hwdep_end: |
||
333 | |||
334 | mov (2) mv_cc_ref.0<1>:w mba_result.4<2,2,1>:w {align1}; |
||
335 | |||
336 | /* Calibrate the ref window for MPEG2 */ |
||
337 | mov (1) vme_m0.0<1>:W -16:W {align1}; |
||
338 | mov (1) vme_m0.2<1>:W -12:W {align1}; |
||
339 | |||
340 | mov (1) INPUT_ARG0.0<1>:ud vme_m0.0<0,1,0>:ud {align1}; |
||
341 | mov (1) INPUT_ARG0.8<1>:ud vme_m0.8<0,1,0>:ud {align1}; |
||
342 | mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; |
||
343 | |||
344 | SAVE_RET {align1}; |
||
345 | jmpi (1) ref_boundary_check; |
||
346 | mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; |
||
347 | |||
348 | /* m2, get the MV/Mb cost passed from constant buffer when |
||
349 | spawning thread by MEDIA_OBJECT */ |
||
350 | mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; |
||
351 | |||
352 | mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; |
||
353 | |||
354 | /* m3 */ |
||
355 | mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; |
||
356 | |||
357 | /* the neighbour pixel is zero for MPEG2 Intra-prediction */ |
||
358 | |||
359 | /* m4 */ |
||
360 | mov (8) vme_msg_4<1>:UD 0:UD {align1}; |
||
361 | mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1}; |
||
362 | /* Use the Luma mode */ |
||
363 | mov (1) vme_msg_4.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; |
||
364 | mov (1) tmp_reg0.0<1>:UW INTRA16_DC_PRED:UW {align1}; |
||
365 | mov (1) vme_msg_4.4<1>:ub tmp_reg0.0<0,1,0>:UB {align1}; |
||
366 | |||
367 | /* m5 */ |
||
368 | mov (8) vme_msg_5<1>:UD 0x0:UD {align1}; |
||
369 | mov (1) vme_msg_5.16<1>:UD INTRA_PREDICTORE_MODE {align1}; |
||
370 | |||
371 | /* the penalty for Intra mode */ |
||
372 | mov (1) vme_msg_5.28<1>:UD 0x010101:UD {align1}; |
||
373 | |||
374 | |||
375 | /* m6 */ |
||
376 | mov (8) vme_msg_6.0<1>:UD 0:Ud {align1}; |
||
377 | |||
378 | /* |
||
379 | * SIC VME message |
||
380 | */ |
||
381 | /* m0 */ |
||
382 | mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; |
||
383 | |||
384 | /* Disable Intra8x8/Intra4x4 Intra-prediction */ |
||
385 | /* m1 */ |
||
386 | mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; |
||
387 | |||
388 | mov (1) intra_flag<1>:UW 0x0:UW {align1} ; |
||
389 | mov (1) tmp_reg0.0<1>:uw LUMA_INTRA_8x8_DISABLE:uw {align1}; |
||
390 | add (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw LUMA_INTRA_4x4_DISABLE:uw {align1}; |
||
391 | mov (1) intra_part_mask_ub<1>:UB tmp_reg0.0<0,1,0>:ub {align1}; |
||
392 | |||
393 | /* assign MB intra struct from the thread payload*/ |
||
394 | mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; |
||
395 | |||
396 | /* Enable DC HAAR component when calculating HARR SATD block */ |
||
397 | mov (1) tmp_reg0.0<1>:UW DC_HARR_ENABLE:UW {align1}; |
||
398 | mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; |
||
399 | |||
400 | mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */ |
||
401 | /* m0 */ |
||
402 | mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; |
||
403 | mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; |
||
404 | |||
405 | /* after verification it will be passed by using payload */ |
||
406 | send (8) |
||
407 | vme_msg_ind |
||
408 | vme_wb<1>:UD |
||
409 | null |
||
410 | cre( |
||
411 | BIND_IDX_VME, |
||
412 | VME_SIC_MESSAGE_TYPE |
||
413 | ) |
||
414 | mlen sic_vme_msg_length |
||
415 | rlen vme_wb_length |
||
416 | {align1}; |
||
417 | |||
418 | /* |
||
419 | * Oword Block Write message |
||
420 | */ |
||
421 | mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; |
||
422 | |||
423 | mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; |
||
424 | mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; |
||
425 | mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; |
||
426 | mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; |
||
427 | |||
428 | /* Distortion, Intra (17-16), */ |
||
429 | mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; |
||
430 | |||
431 | mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; |
||
432 | /* VME clock counts */ |
||
433 | mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; |
||
434 | |||
435 | mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; |
||
436 | |||
437 | /* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ |
||
438 | send (16) |
||
439 | msg_ind |
||
440 | obw_wb |
||
441 | null |
||
442 | data_port( |
||
443 | OBW_CACHE_TYPE, |
||
444 | OBW_MESSAGE_TYPE, |
||
445 | OBW_CONTROL_2, |
||
446 | OBW_BIND_IDX, |
||
447 | OBW_WRITE_COMMIT_CATEGORY, |
||
448 | OBW_HEADER_PRESENT |
||
449 | ) |
||
450 | mlen 2 |
||
451 | rlen obw_wb_length |
||
452 | {align1}; |
||
453 | |||
454 | /* IME search */ |
||
455 | mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ |
||
456 | mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ |
||
457 | |||
458 | mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; |
||
459 | |||
460 | mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; |
||
461 | |||
462 | mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; |
||
463 | /* the Max MV number is passed by constant buffer */ |
||
464 | mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; |
||
465 | mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; |
||
466 | /* Set the MV cost center */ |
||
467 | mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; |
||
468 | mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; |
||
469 | |||
470 | mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; |
||
471 | |||
472 | mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; |
||
473 | /* M3/M4 search path */ |
||
474 | |||
475 | mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; |
||
476 | mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; |
||
477 | mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; |
||
478 | mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; |
||
479 | mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; |
||
480 | mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; |
||
481 | mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; |
||
482 | mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; |
||
483 | |||
484 | mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; |
||
485 | mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; |
||
486 | mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; |
||
487 | mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; |
||
488 | |||
489 | mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; |
||
490 | |||
491 | send (8) |
||
492 | vme_msg_ind |
||
493 | vme_wb<1>:UD |
||
494 | null |
||
495 | vme( |
||
496 | BIND_IDX_VME, |
||
497 | 0, |
||
498 | 0, |
||
499 | VME_IME_MESSAGE_TYPE |
||
500 | ) |
||
501 | mlen ime_vme_msg_length |
||
502 | rlen vme_wb_length {align1}; |
||
503 | |||
504 | /* Set Macroblock-shape/mode for FBR */ |
||
505 | |||
506 | mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; |
||
507 | mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; |
||
508 | mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; |
||
509 | |||
510 | and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; |
||
511 | mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; |
||
512 | |||
513 | /* Send FBR message into CRE */ |
||
514 | |||
515 | mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; |
||
516 | mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; |
||
517 | mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; |
||
518 | mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; |
||
519 | |||
520 | mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ |
||
521 | |||
522 | /* Bilinear filter */ |
||
523 | mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; |
||
524 | add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; |
||
525 | |||
526 | mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; |
||
527 | mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; |
||
528 | |||
529 | mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; |
||
530 | |||
531 | /* after verification it will be passed by using payload */ |
||
532 | send (8) |
||
533 | vme_msg_ind |
||
534 | vme_wb<1>:UD |
||
535 | null |
||
536 | cre( |
||
537 | BIND_IDX_VME, |
||
538 | VME_FBR_MESSAGE_TYPE |
||
539 | ) |
||
540 | mlen fbr_vme_msg_length |
||
541 | rlen vme_wb_length |
||
542 | {align1}; |
||
543 | |||
544 | and.z.f0.0 (1) null:uw mb_hwdep<0,1,0>:uw 0x04:uw {align1}; |
||
545 | (-f0.0) jmpi (1) vme_run_again; |
||
546 | nop; |
||
547 | vme_mv_output: |
||
548 | |||
549 | add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; |
||
550 | mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; |
||
551 | /* write FME info */ |
||
552 | mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; |
||
553 | |||
554 | mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1}; |
||
555 | /* Inter distortion of FME */ |
||
556 | mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1}; |
||
557 | |||
558 | mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1}; |
||
559 | |||
560 | /* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */ |
||
561 | send (16) |
||
562 | msg_ind |
||
563 | obw_wb |
||
564 | null |
||
565 | data_port( |
||
566 | OBW_CACHE_TYPE, |
||
567 | OBW_MESSAGE_TYPE, |
||
568 | OBW_CONTROL_0, |
||
569 | OBW_BIND_IDX, |
||
570 | OBW_WRITE_COMMIT_CATEGORY, |
||
571 | OBW_HEADER_PRESENT |
||
572 | ) |
||
573 | mlen 2 |
||
574 | rlen obw_wb_length |
||
575 | {align1}; |
||
576 | |||
577 | |||
578 | /* Write FME/BME MV */ |
||
579 | add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1}; |
||
580 | mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; |
||
581 | |||
582 | |||
583 | mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; |
||
584 | mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; |
||
585 | mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; |
||
586 | mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; |
||
587 | /* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */ |
||
588 | send (16) |
||
589 | msg_ind |
||
590 | obw_wb |
||
591 | null |
||
592 | data_port( |
||
593 | OBW_CACHE_TYPE, |
||
594 | OBW_MESSAGE_TYPE, |
||
595 | OBW_CONTROL_2, |
||
596 | OBW_BIND_IDX, |
||
597 | OBW_WRITE_COMMIT_CATEGORY, |
||
598 | OBW_HEADER_PRESENT |
||
599 | ) |
||
600 | mlen 2 |
||
601 | rlen obw_wb_length |
||
602 | {align1}; |
||
603 | |||
604 | /* Write FME/BME RefID */ |
||
605 | add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1}; |
||
606 | mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; |
||
607 | |||
608 | mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1}; |
||
609 | |||
610 | /* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ |
||
611 | send (16) |
||
612 | msg_ind |
||
613 | obw_wb |
||
614 | null |
||
615 | data_port( |
||
616 | OBW_CACHE_TYPE, |
||
617 | OBW_MESSAGE_TYPE, |
||
618 | OBW_CONTROL_2, |
||
619 | OBW_BIND_IDX, |
||
620 | OBW_WRITE_COMMIT_CATEGORY, |
||
621 | OBW_HEADER_PRESENT |
||
622 | ) |
||
623 | mlen 2 |
||
624 | rlen obw_wb_length |
||
625 | {align1}; |
||
626 | |||
627 | |||
628 | /* Issue message fence so that the previous write message is committed */ |
||
629 | send (16) |
||
630 | mb_ind |
||
631 | mb_wb.0<1>:ud |
||
632 | NULL |
||
633 | data_port( |
||
634 | OBR_CACHE_TYPE, |
||
635 | OBR_MESSAGE_FENCE, |
||
636 | OBR_MF_COMMIT, |
||
637 | OBR_BIND_IDX, |
||
638 | OBR_WRITE_COMMIT_CATEGORY, |
||
639 | OBR_HEADER_PRESENT |
||
640 | ) |
||
641 | mlen 1 |
||
642 | rlen 1 |
||
643 | {align1}; |
||
644 | |||
645 | __EXIT: |
||
646 | /* |
||
647 | * kill thread |
||
648 | */ |
||
649 | mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; |
||
650 | send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; |
||
651 | |||
652 | nop ; |
||
653 | nop ; |
||
654 | |||
655 | word_imedian: |
||
656 | cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1}; |
||
657 | (f0.0) jmpi (1) cmp_a_ge_b; |
||
658 | cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; |
||
659 | (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; |
||
660 | (f0.0) jmpi (1) cmp_end; |
||
661 | cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; |
||
662 | (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; |
||
663 | (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; |
||
664 | jmpi (1) cmp_end; |
||
665 | cmp_a_ge_b: |
||
666 | cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; |
||
667 | (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1}; |
||
668 | (f0.0) jmpi (1) cmp_end; |
||
669 | cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1}; |
||
670 | (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1}; |
||
671 | (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1}; |
||
672 | cmp_end: |
||
673 | RETURN {align1}; |
||
674 | |||
675 | nop; |
||
676 | nop; |
||
677 | |||
678 | ref_boundary_check: |
||
679 | |||
680 | /* The left/up coordinate of reference window */ |
||
681 | add (2) TEMP_VAR0.0<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG0.0<2,2,1>:w {align1}; |
||
682 | /* The right/bottom coordinate of reference window */ |
||
683 | add (1) TEMP_VAR0.16<1>:w TEMP_VAR0.0<0,1,0>:w 48:w {align1}; |
||
684 | add (1) TEMP_VAR0.18<1>:w TEMP_VAR0.2<0,1,0>:w 40:w {align1}; |
||
685 | |||
686 | /* Firstly the MV range is checked */ |
||
687 | mul (2) TEMP_VAR1.16<1>:w INPUT_ARG1.16<2,2,1>:w -1:w {align1}; |
||
688 | add (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w TEMP_VAR1.16<2,2,1>:w {align1}; |
||
689 | add (2) TEMP_VAR1.4<1>:w INPUT_ARG0.8<2,2,1>:w INPUT_ARG1.16<2,2,1>:w {align1}; |
||
690 | |||
691 | cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w TEMP_VAR1.0<0,1,0>:w {align1}; |
||
692 | (f0.0) mov (1) TEMP_VAR0.0<1>:w TEMP_VAR1.0<0,1,0>:w {align1}; |
||
693 | cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w TEMP_VAR1.4<0,1,0>:w {align1}; |
||
694 | (f0.0) add (1) TEMP_VAR0.0<1>:w TEMP_VAR1.4<0,1,0>:w -48:w {align1}; |
||
695 | cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w TEMP_VAR1.2<0,1,0>:w {align1}; |
||
696 | (f0.0) mov (1) TEMP_VAR0.2<1>:w TEMP_VAR1.2<0,1,0>:w {align1}; |
||
697 | cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w TEMP_VAR1.6<0,1,0>:w {align1}; |
||
698 | (f0.0) add (1) TEMP_VAR0.2<1>:w TEMP_VAR1.6<0,1,0>:w -40:w {align1}; |
||
699 | |||
700 | x_left_cmp: |
||
701 | cmp.l.f0.0 (1) null:w TEMP_VAR0.0<0,1,0>:w 0:w {align1}; |
||
702 | (-f0.0) jmpi (1) x_right_cmp; |
||
703 | (f0.0) mov (1) TEMP_VAR0.0<1>:w 0:w {align1}; |
||
704 | jmpi (1) y_top_cmp; |
||
705 | x_right_cmp: |
||
706 | cmp.g.f0.0 (1) null:w TEMP_VAR0.16<0,1,0>:w INPUT_ARG1.0<0,1,0>:w {align1}; |
||
707 | (-f0.0) jmpi (1) y_top_cmp; |
||
708 | (f0.0) add (1) TEMP_VAR0.0<1>:w INPUT_ARG1.0<0,1,0>:w -48:w {align1}; |
||
709 | y_top_cmp: |
||
710 | cmp.l.f0.0 (1) null:w TEMP_VAR0.2<0,1,0>:w 0:w {align1}; |
||
711 | (-f0.0) jmpi (1) y_bottom_cmp; |
||
712 | (f0.0) mov (1) TEMP_VAR0.2<1>:w 0:w {align1}; |
||
713 | jmpi (1) y_bottom_end; |
||
714 | y_bottom_cmp: |
||
715 | cmp.g.f0.0 (1) null:w TEMP_VAR0.18<0,1,0>:w INPUT_ARG1.2<0,1,0>:w {align1}; |
||
716 | (f0.0) add (1) TEMP_VAR0.2<1>:w INPUT_ARG1.2<0,1,0>:w -40:w {align1}; |
||
717 | |||
718 | y_bottom_end: |
||
719 | mul (2) TEMP_VAR1.0<1>:w INPUT_ARG0.8<2,2,1>:w -1:w {align1}; |
||
720 | add (2) RET_ARG<1>:w TEMP_VAR0.0<2,2,1>:w TEMP_VAR1.0<2,2,1>:w {align1}; |
||
721 | RETURN {align1}; |
||
722 | nop; |
||
723 | nop; |
||
724 | |||
725 | vme_run_again: |
||
726 | |||
727 | asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; |
||
728 | mov (2) tmp_reg0.0<1>:w mb_ref_win.0<2,2,1>:w {align1}; |
||
729 | add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; |
||
730 | and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; |
||
731 | |||
732 | cmp.l.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 0:w {align1}; |
||
733 | (f0.0) mul (1) tmp_reg0.0<1>:w tmp_reg0.0<0,1,0>:w -1:w {align1}; |
||
734 | cmp.l.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 0:w {align1}; |
||
735 | (f0.0) mul (1) tmp_reg0.2<1>:w tmp_reg0.2<0,1,0>:w -1:w {align1}; |
||
736 | |||
737 | cmp.ge.f0.0 (1) null:w tmp_reg0.0<0,1,0>:w 4:w {align1}; |
||
738 | (f0.0) jmpi (1) vme_start; |
||
739 | cmp.ge.f0.0 (1) null:w tmp_reg0.2<0,1,0>:w 4:w {align1}; |
||
740 | (f0.0) jmpi (1) vme_start; |
||
741 | |||
742 | jmpi (1) vme_done; |
||
743 | |||
744 | vme_start: |
||
745 | mov (8) tmp_vme_wb0.0<1>:ud vme_wb0.0<8,8,1>:ud {align1}; |
||
746 | mov (8) tmp_vme_wb1.0<1>:ud vme_wb1.0<8,8,1>:ud {align1}; |
||
747 | |||
748 | /* Calibrate the ref window for MPEG2 */ |
||
749 | mov (1) vme_m0.0<1>:W -16:W {align1}; |
||
750 | mov (1) vme_m0.2<1>:W -12:W {align1}; |
||
751 | mov (4) INPUT_ARG0.0<1>:ud vme_m0.0<4,4,1>:ud {align1}; |
||
752 | add (2) INPUT_ARG0.0<1>:w INPUT_ARG0.0<2,2,1>:w mb_ref_win.16<2,2,1>:w {align1}; |
||
753 | mov (8) INPUT_ARG1.0<1>:ud pic_ref.0<8,8,1>:ud {align1}; |
||
754 | |||
755 | SAVE_RET {align1}; |
||
756 | jmpi (1) ref_boundary_check; |
||
757 | mov (2) vme_m0.0<1>:w RET_ARG<2,2,1>:w {align1}; |
||
758 | |||
759 | /* IME search */ |
||
760 | mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */ |
||
761 | mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */ |
||
762 | |||
763 | mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1}; |
||
764 | |||
765 | mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; |
||
766 | |||
767 | mov (8) vme_m1.0<1>:ud 0x0:UD {align1}; |
||
768 | |||
769 | mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ; |
||
770 | /* the Max MV number is passed by constant buffer */ |
||
771 | mov (1) vme_m1.4<1>:UB r4.28<0,1,0>:UB {align1}; |
||
772 | mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1}; |
||
773 | /* Set the MV cost center */ |
||
774 | mov (1) vme_m1.16<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; |
||
775 | mov (1) vme_m1.20<1>:ud mv_cc_ref.0<0,1,0>:ud {align1}; |
||
776 | mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; |
||
777 | |||
778 | mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; |
||
779 | /* M3/M4 search path */ |
||
780 | |||
781 | mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1}; |
||
782 | mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1}; |
||
783 | mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1}; |
||
784 | mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1}; |
||
785 | mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1}; |
||
786 | mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1}; |
||
787 | mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1}; |
||
788 | mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1}; |
||
789 | |||
790 | mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1}; |
||
791 | mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1}; |
||
792 | mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1}; |
||
793 | mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1}; |
||
794 | |||
795 | mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1}; |
||
796 | |||
797 | send (8) |
||
798 | vme_msg_ind |
||
799 | vme_wb<1>:UD |
||
800 | null |
||
801 | vme( |
||
802 | BIND_IDX_VME, |
||
803 | 0, |
||
804 | 0, |
||
805 | VME_IME_MESSAGE_TYPE |
||
806 | ) |
||
807 | mlen ime_vme_msg_length |
||
808 | rlen vme_wb_length {align1}; |
||
809 | |||
810 | /* Set Macroblock-shape/mode for FBR */ |
||
811 | |||
812 | mov (1) vme_m2.20<1>:UD 0x0:UD {align1}; |
||
813 | mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1}; |
||
814 | mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1}; |
||
815 | |||
816 | and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1}; |
||
817 | mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; |
||
818 | |||
819 | /* Send FBR message into CRE */ |
||
820 | |||
821 | mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1}; |
||
822 | mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1}; |
||
823 | mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1}; |
||
824 | mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1}; |
||
825 | |||
826 | mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_HALF + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/2 pixel, harr, BME disable */ |
||
827 | |||
828 | /* Bilinear filter */ |
||
829 | mov (1) tmp_reg0.0<1>:uw 0x04:uw {align1}; |
||
830 | add (1) vme_m1.30<1>:ub vme_m1.30<0,1,0>:ub tmp_reg0.0<0,1,0>:ub {align1}; |
||
831 | |||
832 | mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; |
||
833 | mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; |
||
834 | |||
835 | mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1}; |
||
836 | |||
837 | /* after verification it will be passed by using payload */ |
||
838 | send (8) |
||
839 | vme_msg_ind |
||
840 | vme_wb<1>:UD |
||
841 | null |
||
842 | cre( |
||
843 | BIND_IDX_VME, |
||
844 | VME_FBR_MESSAGE_TYPE |
||
845 | ) |
||
846 | mlen fbr_vme_msg_length |
||
847 | rlen vme_wb_length |
||
848 | {align1}; |
||
849 | |||
850 | cmp.l.f0.0 (1) null:uw vme_wb0.8<0,1,0>:uw tmp_vme_wb0.8<0,1,0>:uw {align1}; |
||
851 | (f0.0) jmpi (1) vme_done; |
||
852 | mov (8) vme_wb0.0<1>:ud tmp_vme_wb0.0<8,8,1>:ud {align1}; |
||
853 | mov (8) vme_wb1.0<1>:ud tmp_vme_wb1.0<8,8,1>:ud {align1}; |
||
854 | |||
855 | vme_done: |
||
856 | jmpi (1) vme_mv_output; |
||
857 | nop; |
||
858 | nop; |
||
859 | nop;8,8,1>1>8,8,1>1>0,1,0>0,1,0>1>8,8,1>1>8,8,1>1>8,8,1>1>0,1,0>0,1,0>1>1>1>8,8,1>1>8,8,1>1>8,8,1>1>8,8,1>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>1>1>1>1>1>1>1>1>1>1>1>1>1>1>1>8,8,1>1>8,8,1>1>0,1,0>1>0,1,0>1>1>0,1,0>1>1>1>8,8,1>1>0,1,0>1>1>1>2,2,1>1>8,8,1>1>2,2,1>2,2,1>1>4,4,1>1>1>1>8,8,1>1>8,8,1>1>0,1,0>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>2,2,1>1>2,2,1>1>2,2,1>1>2,2,1>1>2,2,1>2,2,1>1>2,2,1>1>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>0,1,0>1>0,1,0>0,1,0>0,1,0>1>0,1,0>0,1,0>0,1,0>1>0,1,0>0,1,0>2,2,1>2,2,1>1>2,2,1>2,2,1>1>2,2,1>1>0,1,0>1>0,1,0>1>2,2,1>2,2,1>1>0,1,0>1>0,1,0>1>0,1,0>0,1,0>0,1,0>1>0,1,0>0,1,0>0,1,0>1>0,1,0>1>0,1,0>0,1,0>0,1,0>1>0,1,0>0,1,0>0,1,0>0,1,0>1>8,8,1>1>1>8,8,1>1>8,8,1>1>0,1,0>1>8,8,1>1>8,8,1>1>8,8,1>1>8,8,1>1>8,8,1>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>8,8,1>1>0,1,0>1>0,1,0>1>8,8,1>1>8,8,1>1>8,8,1>1>0,1,0>0,1,0>1>1>1>8,8,1>1>8,8,1>1>8,8,1>1>8,8,1>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>1>1>1>1>1>1>1>1>1>1>1>1>1>1>1>8,8,1>1>8,8,1>1>0,1,0>1>0,1,0>1>1>0,1,0>1>1>8,8,1>1>0,1,0>1>1>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>8,8,1>1>1>8,8,1>1>8,8,1>1>1>0,1,0>1>1>0,1,0>1>0,1,0>1>0,1,0>1>1>1>1>8,8,1>1>1>1>1>1>0,1,0>1>1>0,1,0>1>1>1>1>8,8,1>1>8,8,1>1>2,2,1>1>8,8,1>1>0,1,0>1>0,1,0>1>1>1>2,2,1>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>1>0,1,0>1>1>1>1>0,1,0>1>1>1>1>0,1,0>1>1>1>1>1>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>1>0,1,0>0,1,0>0,1,0>0,1,0>1>1>2,2,1>1>1>0,1,0>1>1>0,1,0>0,1,0>1>1>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>2,2,1>1>2,2,1>1>1>0,1,0>1>1>2,2,1>1>1>0,1,0>1>1>0,1,0>0,1,0>1>1>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>1>0,1,0>1>2,2,1>1>1>0,1,0>1>1>2,2,1>1>1>0,1,0>1>1>0,1,0>0,1,0>1>1>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>1>2,2,1>1>1>1>1>0,1,0>1>1>2,2,1>1>1>0,1,0>1>1>0,1,0>0,1,0>1>1>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>0,1,0>1>2,2,1>1>1>1>0,1,0>1>0,1,0>1>1>1>1>1>2,2,1>1>2,2,1>1>1>0,1,0>1>0,1,0>0,1,0>1>0,1,0>0,1,0>1>1>2,2,1>1>1>1>1>1>0,1,0>1>2010> |
||
860 |