Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder |
||
3 | * Copyright (c) 2003 Michael Niedermayer |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | /** |
||
23 | * @file |
||
24 | * H.264 / AVC / MPEG4 part10 codec. |
||
25 | * @author Michael Niedermayer |
||
26 | */ |
||
27 | |||
28 | #define UNCHECKED_BITSTREAM_READER 1 |
||
29 | |||
30 | #include "libavutil/avassert.h" |
||
31 | #include "libavutil/imgutils.h" |
||
32 | #include "libavutil/opt.h" |
||
33 | #include "internal.h" |
||
34 | #include "cabac.h" |
||
35 | #include "cabac_functions.h" |
||
36 | #include "dsputil.h" |
||
37 | #include "error_resilience.h" |
||
38 | #include "avcodec.h" |
||
39 | #include "mpegvideo.h" |
||
40 | #include "h264.h" |
||
41 | #include "h264data.h" |
||
42 | #include "h264chroma.h" |
||
43 | #include "h264_mvpred.h" |
||
44 | #include "golomb.h" |
||
45 | #include "mathops.h" |
||
46 | #include "rectangle.h" |
||
47 | #include "svq3.h" |
||
48 | #include "thread.h" |
||
49 | #include "vdpau_internal.h" |
||
50 | |||
51 | #include |
||
52 | |||
53 | static void flush_change(H264Context *h); |
||
54 | |||
55 | const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 }; |
||
56 | |||
57 | static const uint8_t rem6[QP_MAX_NUM + 1] = { |
||
58 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, |
||
59 | 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, |
||
60 | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, |
||
61 | 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, |
||
62 | 0, 1, 2, 3, |
||
63 | }; |
||
64 | |||
65 | static const uint8_t div6[QP_MAX_NUM + 1] = { |
||
66 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, |
||
67 | 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, |
||
68 | 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, |
||
69 | 10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13, |
||
70 | 14,14,14,14, |
||
71 | }; |
||
72 | |||
73 | static const uint8_t field_scan[16+1] = { |
||
74 | |||
75 | |||
76 | 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4, |
||
77 | 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, |
||
78 | }; |
||
79 | |||
80 | static const uint8_t field_scan8x8[64+1] = { |
||
81 | |||
82 | 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8, |
||
83 | 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8, |
||
84 | |||
85 | 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8, |
||
86 | 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8, |
||
87 | 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8, |
||
88 | 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8, |
||
89 | 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8, |
||
90 | 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8, |
||
91 | 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8, |
||
92 | 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8, |
||
93 | 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8, |
||
94 | 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8, |
||
95 | 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8, |
||
96 | 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8, |
||
97 | }; |
||
98 | |||
99 | static const uint8_t field_scan8x8_cavlc[64+1] = { |
||
100 | |||
101 | 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8, |
||
102 | 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8, |
||
103 | 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8, |
||
104 | |||
105 | 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8, |
||
106 | 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8, |
||
107 | 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8, |
||
108 | |||
109 | 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8, |
||
110 | 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8, |
||
111 | 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8, |
||
112 | 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8, |
||
113 | 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8, |
||
114 | 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8, |
||
115 | 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8, |
||
116 | }; |
||
117 | |||
118 | // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] |
||
119 | static const uint8_t zigzag_scan8x8_cavlc[64+1] = { |
||
120 | |||
121 | 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8, |
||
122 | 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8, |
||
123 | 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8, |
||
124 | 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8, |
||
125 | 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8, |
||
126 | 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8, |
||
127 | 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8, |
||
128 | |||
129 | 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8, |
||
130 | 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8, |
||
131 | 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8, |
||
132 | |||
133 | 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8, |
||
134 | |||
135 | 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8, |
||
136 | }; |
||
137 | |||
138 | static const uint8_t dequant4_coeff_init[6][3] = { |
||
139 | { 10, 13, 16 }, |
||
140 | { 11, 14, 18 }, |
||
141 | { 13, 16, 20 }, |
||
142 | { 14, 18, 23 }, |
||
143 | { 16, 20, 25 }, |
||
144 | { 18, 23, 29 }, |
||
145 | }; |
||
146 | |||
147 | static const uint8_t dequant8_coeff_init_scan[16] = { |
||
148 | 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1 |
||
149 | }; |
||
150 | |||
151 | static const uint8_t dequant8_coeff_init[6][6] = { |
||
152 | { 20, 18, 32, 19, 25, 24 }, |
||
153 | { 22, 19, 35, 21, 28, 26 }, |
||
154 | { 26, 23, 42, 24, 33, 31 }, |
||
155 | { 28, 25, 45, 26, 35, 33 }, |
||
156 | { 32, 28, 51, 30, 40, 38 }, |
||
157 | { 36, 32, 58, 34, 46, 43 }, |
||
158 | }; |
||
159 | |||
160 | static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420[] = { |
||
161 | #if CONFIG_H264_DXVA2_HWACCEL |
||
162 | AV_PIX_FMT_DXVA2_VLD, |
||
163 | #endif |
||
164 | #if CONFIG_H264_VAAPI_HWACCEL |
||
165 | AV_PIX_FMT_VAAPI_VLD, |
||
166 | #endif |
||
167 | #if CONFIG_H264_VDA_HWACCEL |
||
168 | AV_PIX_FMT_VDA_VLD, |
||
169 | #endif |
||
170 | #if CONFIG_H264_VDPAU_HWACCEL |
||
171 | AV_PIX_FMT_VDPAU, |
||
172 | #endif |
||
173 | AV_PIX_FMT_YUV420P, |
||
174 | AV_PIX_FMT_NONE |
||
175 | }; |
||
176 | |||
177 | static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420[] = { |
||
178 | #if CONFIG_H264_DXVA2_HWACCEL |
||
179 | AV_PIX_FMT_DXVA2_VLD, |
||
180 | #endif |
||
181 | #if CONFIG_H264_VAAPI_HWACCEL |
||
182 | AV_PIX_FMT_VAAPI_VLD, |
||
183 | #endif |
||
184 | #if CONFIG_H264_VDA_HWACCEL |
||
185 | AV_PIX_FMT_VDA_VLD, |
||
186 | #endif |
||
187 | #if CONFIG_H264_VDPAU_HWACCEL |
||
188 | AV_PIX_FMT_VDPAU, |
||
189 | #endif |
||
190 | AV_PIX_FMT_YUVJ420P, |
||
191 | AV_PIX_FMT_NONE |
||
192 | }; |
||
193 | |||
194 | int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx) |
||
195 | { |
||
196 | H264Context *h = avctx->priv_data; |
||
197 | return h ? h->sps.num_reorder_frames : 0; |
||
198 | } |
||
199 | |||
200 | static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, |
||
201 | int (*mv)[2][4][2], |
||
202 | int mb_x, int mb_y, int mb_intra, int mb_skipped) |
||
203 | { |
||
204 | H264Context *h = opaque; |
||
205 | |||
206 | h->mb_x = mb_x; |
||
207 | h->mb_y = mb_y; |
||
208 | h->mb_xy = mb_x + mb_y * h->mb_stride; |
||
209 | memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache)); |
||
210 | av_assert1(ref >= 0); |
||
211 | /* FIXME: It is possible albeit uncommon that slice references |
||
212 | * differ between slices. We take the easy approach and ignore |
||
213 | * it for now. If this turns out to have any relevance in |
||
214 | * practice then correct remapping should be added. */ |
||
215 | if (ref >= h->ref_count[0]) |
||
216 | ref = 0; |
||
217 | if (!h->ref_list[0][ref].f.data[0]) { |
||
218 | av_log(h->avctx, AV_LOG_DEBUG, "Reference not available for error concealing\n"); |
||
219 | ref = 0; |
||
220 | } |
||
221 | if ((h->ref_list[0][ref].reference&3) != 3) { |
||
222 | av_log(h->avctx, AV_LOG_DEBUG, "Reference invalid\n"); |
||
223 | return; |
||
224 | } |
||
225 | fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy], |
||
226 | 2, 2, 2, ref, 1); |
||
227 | fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); |
||
228 | fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, |
||
229 | pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4); |
||
230 | h->mb_mbaff = |
||
231 | h->mb_field_decoding_flag = 0; |
||
232 | ff_h264_hl_decode_mb(h); |
||
233 | } |
||
234 | |||
235 | void ff_h264_draw_horiz_band(H264Context *h, int y, int height) |
||
236 | { |
||
237 | AVCodecContext *avctx = h->avctx; |
||
238 | Picture *cur = &h->cur_pic; |
||
239 | Picture *last = h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0] : NULL; |
||
240 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); |
||
241 | int vshift = desc->log2_chroma_h; |
||
242 | const int field_pic = h->picture_structure != PICT_FRAME; |
||
243 | if (field_pic) { |
||
244 | height <<= 1; |
||
245 | y <<= 1; |
||
246 | } |
||
247 | |||
248 | height = FFMIN(height, avctx->height - y); |
||
249 | |||
250 | if (field_pic && h->first_field && !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD)) |
||
251 | return; |
||
252 | |||
253 | if (avctx->draw_horiz_band) { |
||
254 | AVFrame *src; |
||
255 | int offset[AV_NUM_DATA_POINTERS]; |
||
256 | int i; |
||
257 | |||
258 | if (cur->f.pict_type == AV_PICTURE_TYPE_B || h->low_delay || |
||
259 | (avctx->slice_flags & SLICE_FLAG_CODED_ORDER)) |
||
260 | src = &cur->f; |
||
261 | else if (last) |
||
262 | src = &last->f; |
||
263 | else |
||
264 | return; |
||
265 | |||
266 | offset[0] = y * src->linesize[0]; |
||
267 | offset[1] = |
||
268 | offset[2] = (y >> vshift) * src->linesize[1]; |
||
269 | for (i = 3; i < AV_NUM_DATA_POINTERS; i++) |
||
270 | offset[i] = 0; |
||
271 | |||
272 | emms_c(); |
||
273 | |||
274 | avctx->draw_horiz_band(avctx, src, offset, |
||
275 | y, h->picture_structure, height); |
||
276 | } |
||
277 | } |
||
278 | |||
279 | static void unref_picture(H264Context *h, Picture *pic) |
||
280 | { |
||
281 | int off = offsetof(Picture, tf) + sizeof(pic->tf); |
||
282 | int i; |
||
283 | |||
284 | if (!pic->f.data[0]) |
||
285 | return; |
||
286 | |||
287 | ff_thread_release_buffer(h->avctx, &pic->tf); |
||
288 | av_buffer_unref(&pic->hwaccel_priv_buf); |
||
289 | |||
290 | av_buffer_unref(&pic->qscale_table_buf); |
||
291 | av_buffer_unref(&pic->mb_type_buf); |
||
292 | for (i = 0; i < 2; i++) { |
||
293 | av_buffer_unref(&pic->motion_val_buf[i]); |
||
294 | av_buffer_unref(&pic->ref_index_buf[i]); |
||
295 | } |
||
296 | |||
297 | memset((uint8_t*)pic + off, 0, sizeof(*pic) - off); |
||
298 | } |
||
299 | |||
300 | static void release_unused_pictures(H264Context *h, int remove_current) |
||
301 | { |
||
302 | int i; |
||
303 | |||
304 | /* release non reference frames */ |
||
305 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
||
306 | if (h->DPB[i].f.data[0] && !h->DPB[i].reference && |
||
307 | (remove_current || &h->DPB[i] != h->cur_pic_ptr)) { |
||
308 | unref_picture(h, &h->DPB[i]); |
||
309 | } |
||
310 | } |
||
311 | } |
||
312 | |||
313 | static int ref_picture(H264Context *h, Picture *dst, Picture *src) |
||
314 | { |
||
315 | int ret, i; |
||
316 | |||
317 | av_assert0(!dst->f.buf[0]); |
||
318 | av_assert0(src->f.buf[0]); |
||
319 | |||
320 | src->tf.f = &src->f; |
||
321 | dst->tf.f = &dst->f; |
||
322 | ret = ff_thread_ref_frame(&dst->tf, &src->tf); |
||
323 | if (ret < 0) |
||
324 | goto fail; |
||
325 | |||
326 | dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf); |
||
327 | dst->mb_type_buf = av_buffer_ref(src->mb_type_buf); |
||
328 | if (!dst->qscale_table_buf || !dst->mb_type_buf) |
||
329 | goto fail; |
||
330 | dst->qscale_table = src->qscale_table; |
||
331 | dst->mb_type = src->mb_type; |
||
332 | |||
333 | for (i = 0; i < 2; i++) { |
||
334 | dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]); |
||
335 | dst->ref_index_buf[i] = av_buffer_ref(src->ref_index_buf[i]); |
||
336 | if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i]) |
||
337 | goto fail; |
||
338 | dst->motion_val[i] = src->motion_val[i]; |
||
339 | dst->ref_index[i] = src->ref_index[i]; |
||
340 | } |
||
341 | |||
342 | if (src->hwaccel_picture_private) { |
||
343 | dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); |
||
344 | if (!dst->hwaccel_priv_buf) |
||
345 | goto fail; |
||
346 | dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; |
||
347 | } |
||
348 | |||
349 | for (i = 0; i < 2; i++) |
||
350 | dst->field_poc[i] = src->field_poc[i]; |
||
351 | |||
352 | memcpy(dst->ref_poc, src->ref_poc, sizeof(src->ref_poc)); |
||
353 | memcpy(dst->ref_count, src->ref_count, sizeof(src->ref_count)); |
||
354 | |||
355 | dst->poc = src->poc; |
||
356 | dst->frame_num = src->frame_num; |
||
357 | dst->mmco_reset = src->mmco_reset; |
||
358 | dst->pic_id = src->pic_id; |
||
359 | dst->long_ref = src->long_ref; |
||
360 | dst->mbaff = src->mbaff; |
||
361 | dst->field_picture = src->field_picture; |
||
362 | dst->needs_realloc = src->needs_realloc; |
||
363 | dst->reference = src->reference; |
||
364 | dst->sync = src->sync; |
||
365 | dst->crop = src->crop; |
||
366 | dst->crop_left = src->crop_left; |
||
367 | dst->crop_top = src->crop_top; |
||
368 | |||
369 | return 0; |
||
370 | fail: |
||
371 | unref_picture(h, dst); |
||
372 | return ret; |
||
373 | } |
||
374 | |||
375 | static int alloc_scratch_buffers(H264Context *h, int linesize) |
||
376 | { |
||
377 | int alloc_size = FFALIGN(FFABS(linesize) + 32, 32); |
||
378 | |||
379 | if (h->bipred_scratchpad) |
||
380 | return 0; |
||
381 | |||
382 | h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size); |
||
383 | // edge emu needs blocksize + filter length - 1 |
||
384 | // (= 21x21 for h264) |
||
385 | h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21); |
||
386 | h->me.scratchpad = av_mallocz(alloc_size * 2 * 16 * 2); |
||
387 | |||
388 | if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) { |
||
389 | av_freep(&h->bipred_scratchpad); |
||
390 | av_freep(&h->edge_emu_buffer); |
||
391 | av_freep(&h->me.scratchpad); |
||
392 | return AVERROR(ENOMEM); |
||
393 | } |
||
394 | |||
395 | h->me.temp = h->me.scratchpad; |
||
396 | |||
397 | return 0; |
||
398 | } |
||
399 | |||
400 | static int init_table_pools(H264Context *h) |
||
401 | { |
||
402 | const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1; |
||
403 | const int mb_array_size = h->mb_stride * h->mb_height; |
||
404 | const int b4_stride = h->mb_width * 4 + 1; |
||
405 | const int b4_array_size = b4_stride * h->mb_height * 4; |
||
406 | |||
407 | h->qscale_table_pool = av_buffer_pool_init(big_mb_num + h->mb_stride, |
||
408 | av_buffer_allocz); |
||
409 | h->mb_type_pool = av_buffer_pool_init((big_mb_num + h->mb_stride) * |
||
410 | sizeof(uint32_t), av_buffer_allocz); |
||
411 | h->motion_val_pool = av_buffer_pool_init(2 * (b4_array_size + 4) * |
||
412 | sizeof(int16_t), av_buffer_allocz); |
||
413 | h->ref_index_pool = av_buffer_pool_init(4 * mb_array_size, av_buffer_allocz); |
||
414 | |||
415 | if (!h->qscale_table_pool || !h->mb_type_pool || !h->motion_val_pool || |
||
416 | !h->ref_index_pool) { |
||
417 | av_buffer_pool_uninit(&h->qscale_table_pool); |
||
418 | av_buffer_pool_uninit(&h->mb_type_pool); |
||
419 | av_buffer_pool_uninit(&h->motion_val_pool); |
||
420 | av_buffer_pool_uninit(&h->ref_index_pool); |
||
421 | return AVERROR(ENOMEM); |
||
422 | } |
||
423 | |||
424 | return 0; |
||
425 | } |
||
426 | |||
427 | static int alloc_picture(H264Context *h, Picture *pic) |
||
428 | { |
||
429 | int i, ret = 0; |
||
430 | |||
431 | av_assert0(!pic->f.data[0]); |
||
432 | |||
433 | pic->tf.f = &pic->f; |
||
434 | ret = ff_thread_get_buffer(h->avctx, &pic->tf, pic->reference ? |
||
435 | AV_GET_BUFFER_FLAG_REF : 0); |
||
436 | if (ret < 0) |
||
437 | goto fail; |
||
438 | |||
439 | h->linesize = pic->f.linesize[0]; |
||
440 | h->uvlinesize = pic->f.linesize[1]; |
||
441 | pic->crop = h->sps.crop; |
||
442 | pic->crop_top = h->sps.crop_top; |
||
443 | pic->crop_left= h->sps.crop_left; |
||
444 | |||
445 | if (h->avctx->hwaccel) { |
||
446 | const AVHWAccel *hwaccel = h->avctx->hwaccel; |
||
447 | av_assert0(!pic->hwaccel_picture_private); |
||
448 | if (hwaccel->priv_data_size) { |
||
449 | pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->priv_data_size); |
||
450 | if (!pic->hwaccel_priv_buf) |
||
451 | return AVERROR(ENOMEM); |
||
452 | pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data; |
||
453 | } |
||
454 | } |
||
455 | |||
456 | if (!h->qscale_table_pool) { |
||
457 | ret = init_table_pools(h); |
||
458 | if (ret < 0) |
||
459 | goto fail; |
||
460 | } |
||
461 | |||
462 | pic->qscale_table_buf = av_buffer_pool_get(h->qscale_table_pool); |
||
463 | pic->mb_type_buf = av_buffer_pool_get(h->mb_type_pool); |
||
464 | if (!pic->qscale_table_buf || !pic->mb_type_buf) |
||
465 | goto fail; |
||
466 | |||
467 | pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1; |
||
468 | pic->qscale_table = pic->qscale_table_buf->data + 2 * h->mb_stride + 1; |
||
469 | |||
470 | for (i = 0; i < 2; i++) { |
||
471 | pic->motion_val_buf[i] = av_buffer_pool_get(h->motion_val_pool); |
||
472 | pic->ref_index_buf[i] = av_buffer_pool_get(h->ref_index_pool); |
||
473 | if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) |
||
474 | goto fail; |
||
475 | |||
476 | pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4; |
||
477 | pic->ref_index[i] = pic->ref_index_buf[i]->data; |
||
478 | } |
||
479 | |||
480 | return 0; |
||
481 | fail: |
||
482 | unref_picture(h, pic); |
||
483 | return (ret < 0) ? ret : AVERROR(ENOMEM); |
||
484 | } |
||
485 | |||
486 | static inline int pic_is_unused(H264Context *h, Picture *pic) |
||
487 | { |
||
488 | if (pic->f.data[0] == NULL) |
||
489 | return 1; |
||
490 | if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF)) |
||
491 | return 1; |
||
492 | return 0; |
||
493 | } |
||
494 | |||
495 | static int find_unused_picture(H264Context *h) |
||
496 | { |
||
497 | int i; |
||
498 | |||
499 | for (i = 0; i < MAX_PICTURE_COUNT; i++) { |
||
500 | if (pic_is_unused(h, &h->DPB[i])) |
||
501 | break; |
||
502 | } |
||
503 | if (i == MAX_PICTURE_COUNT) |
||
504 | return AVERROR_INVALIDDATA; |
||
505 | |||
506 | if (h->DPB[i].needs_realloc) { |
||
507 | h->DPB[i].needs_realloc = 0; |
||
508 | unref_picture(h, &h->DPB[i]); |
||
509 | } |
||
510 | |||
511 | return i; |
||
512 | } |
||
513 | |||
514 | /** |
||
515 | * Check if the top & left blocks are available if needed and |
||
516 | * change the dc mode so it only uses the available blocks. |
||
517 | */ |
||
518 | int ff_h264_check_intra4x4_pred_mode(H264Context *h) |
||
519 | { |
||
520 | static const int8_t top[12] = { |
||
521 | -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0 |
||
522 | }; |
||
523 | static const int8_t left[12] = { |
||
524 | 0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED |
||
525 | }; |
||
526 | int i; |
||
527 | |||
528 | if (!(h->top_samples_available & 0x8000)) { |
||
529 | for (i = 0; i < 4; i++) { |
||
530 | int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]]; |
||
531 | if (status < 0) { |
||
532 | av_log(h->avctx, AV_LOG_ERROR, |
||
533 | "top block unavailable for requested intra4x4 mode %d at %d %d\n", |
||
534 | status, h->mb_x, h->mb_y); |
||
535 | return AVERROR_INVALIDDATA; |
||
536 | } else if (status) { |
||
537 | h->intra4x4_pred_mode_cache[scan8[0] + i] = status; |
||
538 | } |
||
539 | } |
||
540 | } |
||
541 | |||
542 | if ((h->left_samples_available & 0x8888) != 0x8888) { |
||
543 | static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 }; |
||
544 | for (i = 0; i < 4; i++) |
||
545 | if (!(h->left_samples_available & mask[i])) { |
||
546 | int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]]; |
||
547 | if (status < 0) { |
||
548 | av_log(h->avctx, AV_LOG_ERROR, |
||
549 | "left block unavailable for requested intra4x4 mode %d at %d %d\n", |
||
550 | status, h->mb_x, h->mb_y); |
||
551 | return AVERROR_INVALIDDATA; |
||
552 | } else if (status) { |
||
553 | h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status; |
||
554 | } |
||
555 | } |
||
556 | } |
||
557 | |||
558 | return 0; |
||
559 | } // FIXME cleanup like ff_h264_check_intra_pred_mode |
||
560 | |||
561 | /** |
||
562 | * Check if the top & left blocks are available if needed and |
||
563 | * change the dc mode so it only uses the available blocks. |
||
564 | */ |
||
565 | int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma) |
||
566 | { |
||
567 | static const int8_t top[4] = { LEFT_DC_PRED8x8, 1, -1, -1 }; |
||
568 | static const int8_t left[5] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 }; |
||
569 | |||
570 | if (mode > 3U) { |
||
571 | av_log(h->avctx, AV_LOG_ERROR, |
||
572 | "out of range intra chroma pred mode at %d %d\n", |
||
573 | h->mb_x, h->mb_y); |
||
574 | return AVERROR_INVALIDDATA; |
||
575 | } |
||
576 | |||
577 | if (!(h->top_samples_available & 0x8000)) { |
||
578 | mode = top[mode]; |
||
579 | if (mode < 0) { |
||
580 | av_log(h->avctx, AV_LOG_ERROR, |
||
581 | "top block unavailable for requested intra mode at %d %d\n", |
||
582 | h->mb_x, h->mb_y); |
||
583 | return AVERROR_INVALIDDATA; |
||
584 | } |
||
585 | } |
||
586 | |||
587 | if ((h->left_samples_available & 0x8080) != 0x8080) { |
||
588 | mode = left[mode]; |
||
589 | if (is_chroma && (h->left_samples_available & 0x8080)) { |
||
590 | // mad cow disease mode, aka MBAFF + constrained_intra_pred |
||
591 | mode = ALZHEIMER_DC_L0T_PRED8x8 + |
||
592 | (!(h->left_samples_available & 0x8000)) + |
||
593 | 2 * (mode == DC_128_PRED8x8); |
||
594 | } |
||
595 | if (mode < 0) { |
||
596 | av_log(h->avctx, AV_LOG_ERROR, |
||
597 | "left block unavailable for requested intra mode at %d %d\n", |
||
598 | h->mb_x, h->mb_y); |
||
599 | return AVERROR_INVALIDDATA; |
||
600 | } |
||
601 | } |
||
602 | |||
603 | return mode; |
||
604 | } |
||
605 | |||
606 | const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, |
||
607 | int *dst_length, int *consumed, int length) |
||
608 | { |
||
609 | int i, si, di; |
||
610 | uint8_t *dst; |
||
611 | int bufidx; |
||
612 | |||
613 | // src[0]&0x80; // forbidden bit |
||
614 | h->nal_ref_idc = src[0] >> 5; |
||
615 | h->nal_unit_type = src[0] & 0x1F; |
||
616 | |||
617 | src++; |
||
618 | length--; |
||
619 | |||
620 | #define STARTCODE_TEST \ |
||
621 | if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \ |
||
622 | if (src[i + 2] != 3) { \ |
||
623 | /* startcode, so we must be past the end */ \ |
||
624 | length = i; \ |
||
625 | } \ |
||
626 | break; \ |
||
627 | } |
||
628 | |||
629 | #if HAVE_FAST_UNALIGNED |
||
630 | #define FIND_FIRST_ZERO \ |
||
631 | if (i > 0 && !src[i]) \ |
||
632 | i--; \ |
||
633 | while (src[i]) \ |
||
634 | i++ |
||
635 | |||
636 | #if HAVE_FAST_64BIT |
||
637 | for (i = 0; i + 1 < length; i += 9) { |
||
638 | if (!((~AV_RN64A(src + i) & |
||
639 | (AV_RN64A(src + i) - 0x0100010001000101ULL)) & |
||
640 | 0x8000800080008080ULL)) |
||
641 | continue; |
||
642 | FIND_FIRST_ZERO; |
||
643 | STARTCODE_TEST; |
||
644 | i -= 7; |
||
645 | } |
||
646 | #else |
||
647 | for (i = 0; i + 1 < length; i += 5) { |
||
648 | if (!((~AV_RN32A(src + i) & |
||
649 | (AV_RN32A(src + i) - 0x01000101U)) & |
||
650 | 0x80008080U)) |
||
651 | continue; |
||
652 | FIND_FIRST_ZERO; |
||
653 | STARTCODE_TEST; |
||
654 | i -= 3; |
||
655 | } |
||
656 | #endif |
||
657 | #else |
||
658 | for (i = 0; i + 1 < length; i += 2) { |
||
659 | if (src[i]) |
||
660 | continue; |
||
661 | if (i > 0 && src[i - 1] == 0) |
||
662 | i--; |
||
663 | STARTCODE_TEST; |
||
664 | } |
||
665 | #endif |
||
666 | |||
667 | // use second escape buffer for inter data |
||
668 | bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; |
||
669 | |||
670 | si = h->rbsp_buffer_size[bufidx]; |
||
671 | av_fast_padded_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+MAX_MBPAIR_SIZE); |
||
672 | dst = h->rbsp_buffer[bufidx]; |
||
673 | |||
674 | if (dst == NULL) |
||
675 | return NULL; |
||
676 | |||
677 | if(i>=length-1){ //no escaped 0 |
||
678 | *dst_length= length; |
||
679 | *consumed= length+1; //+1 for the header |
||
680 | if(h->avctx->flags2 & CODEC_FLAG2_FAST){ |
||
681 | return src; |
||
682 | }else{ |
||
683 | memcpy(dst, src, length); |
||
684 | return dst; |
||
685 | } |
||
686 | } |
||
687 | |||
688 | memcpy(dst, src, i); |
||
689 | si = di = i; |
||
690 | while (si + 2 < length) { |
||
691 | // remove escapes (very rare 1:2^22) |
||
692 | if (src[si + 2] > 3) { |
||
693 | dst[di++] = src[si++]; |
||
694 | dst[di++] = src[si++]; |
||
695 | } else if (src[si] == 0 && src[si + 1] == 0) { |
||
696 | if (src[si + 2] == 3) { // escape |
||
697 | dst[di++] = 0; |
||
698 | dst[di++] = 0; |
||
699 | si += 3; |
||
700 | continue; |
||
701 | } else // next start code |
||
702 | goto nsc; |
||
703 | } |
||
704 | |||
705 | dst[di++] = src[si++]; |
||
706 | } |
||
707 | while (si < length) |
||
708 | dst[di++] = src[si++]; |
||
709 | |||
710 | nsc: |
||
711 | memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
||
712 | |||
713 | *dst_length = di; |
||
714 | *consumed = si + 1; // +1 for the header |
||
715 | /* FIXME store exact number of bits in the getbitcontext |
||
716 | * (it is needed for decoding) */ |
||
717 | return dst; |
||
718 | } |
||
719 | |||
720 | /** |
||
721 | * Identify the exact end of the bitstream |
||
722 | * @return the length of the trailing, or 0 if damaged |
||
723 | */ |
||
724 | static int decode_rbsp_trailing(H264Context *h, const uint8_t *src) |
||
725 | { |
||
726 | int v = *src; |
||
727 | int r; |
||
728 | |||
729 | tprintf(h->avctx, "rbsp trailing %X\n", v); |
||
730 | |||
731 | for (r = 1; r < 9; r++) { |
||
732 | if (v & 1) |
||
733 | return r; |
||
734 | v >>= 1; |
||
735 | } |
||
736 | return 0; |
||
737 | } |
||
738 | |||
739 | static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, |
||
740 | int height, int y_offset, int list) |
||
741 | { |
||
742 | int raw_my = h->mv_cache[list][scan8[n]][1]; |
||
743 | int filter_height_down = (raw_my & 3) ? 3 : 0; |
||
744 | int full_my = (raw_my >> 2) + y_offset; |
||
745 | int bottom = full_my + filter_height_down + height; |
||
746 | |||
747 | av_assert2(height >= 0); |
||
748 | |||
749 | return FFMAX(0, bottom); |
||
750 | } |
||
751 | |||
752 | static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, |
||
753 | int height, int y_offset, int list0, |
||
754 | int list1, int *nrefs) |
||
755 | { |
||
756 | int my; |
||
757 | |||
758 | y_offset += 16 * (h->mb_y >> MB_FIELD(h)); |
||
759 | |||
760 | if (list0) { |
||
761 | int ref_n = h->ref_cache[0][scan8[n]]; |
||
762 | Picture *ref = &h->ref_list[0][ref_n]; |
||
763 | |||
764 | // Error resilience puts the current picture in the ref list. |
||
765 | // Don't try to wait on these as it will cause a deadlock. |
||
766 | // Fields can wait on each other, though. |
||
767 | if (ref->tf.progress->data != h->cur_pic.tf.progress->data || |
||
768 | (ref->reference & 3) != h->picture_structure) { |
||
769 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); |
||
770 | if (refs[0][ref_n] < 0) |
||
771 | nrefs[0] += 1; |
||
772 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); |
||
773 | } |
||
774 | } |
||
775 | |||
776 | if (list1) { |
||
777 | int ref_n = h->ref_cache[1][scan8[n]]; |
||
778 | Picture *ref = &h->ref_list[1][ref_n]; |
||
779 | |||
780 | if (ref->tf.progress->data != h->cur_pic.tf.progress->data || |
||
781 | (ref->reference & 3) != h->picture_structure) { |
||
782 | my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); |
||
783 | if (refs[1][ref_n] < 0) |
||
784 | nrefs[1] += 1; |
||
785 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); |
||
786 | } |
||
787 | } |
||
788 | } |
||
789 | |||
790 | /** |
||
791 | * Wait until all reference frames are available for MC operations. |
||
792 | * |
||
793 | * @param h the H264 context |
||
794 | */ |
||
795 | static void await_references(H264Context *h) |
||
796 | { |
||
797 | const int mb_xy = h->mb_xy; |
||
798 | const int mb_type = h->cur_pic.mb_type[mb_xy]; |
||
799 | int refs[2][48]; |
||
800 | int nrefs[2] = { 0 }; |
||
801 | int ref, list; |
||
802 | |||
803 | memset(refs, -1, sizeof(refs)); |
||
804 | |||
805 | if (IS_16X16(mb_type)) { |
||
806 | get_lowest_part_y(h, refs, 0, 16, 0, |
||
807 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
||
808 | } else if (IS_16X8(mb_type)) { |
||
809 | get_lowest_part_y(h, refs, 0, 8, 0, |
||
810 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
||
811 | get_lowest_part_y(h, refs, 8, 8, 8, |
||
812 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
||
813 | } else if (IS_8X16(mb_type)) { |
||
814 | get_lowest_part_y(h, refs, 0, 16, 0, |
||
815 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); |
||
816 | get_lowest_part_y(h, refs, 4, 16, 0, |
||
817 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); |
||
818 | } else { |
||
819 | int i; |
||
820 | |||
821 | av_assert2(IS_8X8(mb_type)); |
||
822 | |||
823 | for (i = 0; i < 4; i++) { |
||
824 | const int sub_mb_type = h->sub_mb_type[i]; |
||
825 | const int n = 4 * i; |
||
826 | int y_offset = (i & 2) << 2; |
||
827 | |||
828 | if (IS_SUB_8X8(sub_mb_type)) { |
||
829 | get_lowest_part_y(h, refs, n, 8, y_offset, |
||
830 | IS_DIR(sub_mb_type, 0, 0), |
||
831 | IS_DIR(sub_mb_type, 0, 1), |
||
832 | nrefs); |
||
833 | } else if (IS_SUB_8X4(sub_mb_type)) { |
||
834 | get_lowest_part_y(h, refs, n, 4, y_offset, |
||
835 | IS_DIR(sub_mb_type, 0, 0), |
||
836 | IS_DIR(sub_mb_type, 0, 1), |
||
837 | nrefs); |
||
838 | get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4, |
||
839 | IS_DIR(sub_mb_type, 0, 0), |
||
840 | IS_DIR(sub_mb_type, 0, 1), |
||
841 | nrefs); |
||
842 | } else if (IS_SUB_4X8(sub_mb_type)) { |
||
843 | get_lowest_part_y(h, refs, n, 8, y_offset, |
||
844 | IS_DIR(sub_mb_type, 0, 0), |
||
845 | IS_DIR(sub_mb_type, 0, 1), |
||
846 | nrefs); |
||
847 | get_lowest_part_y(h, refs, n + 1, 8, y_offset, |
||
848 | IS_DIR(sub_mb_type, 0, 0), |
||
849 | IS_DIR(sub_mb_type, 0, 1), |
||
850 | nrefs); |
||
851 | } else { |
||
852 | int j; |
||
853 | av_assert2(IS_SUB_4X4(sub_mb_type)); |
||
854 | for (j = 0; j < 4; j++) { |
||
855 | int sub_y_offset = y_offset + 2 * (j & 2); |
||
856 | get_lowest_part_y(h, refs, n + j, 4, sub_y_offset, |
||
857 | IS_DIR(sub_mb_type, 0, 0), |
||
858 | IS_DIR(sub_mb_type, 0, 1), |
||
859 | nrefs); |
||
860 | } |
||
861 | } |
||
862 | } |
||
863 | } |
||
864 | |||
865 | for (list = h->list_count - 1; list >= 0; list--) |
||
866 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { |
||
867 | int row = refs[list][ref]; |
||
868 | if (row >= 0) { |
||
869 | Picture *ref_pic = &h->ref_list[list][ref]; |
||
870 | int ref_field = ref_pic->reference - 1; |
||
871 | int ref_field_picture = ref_pic->field_picture; |
||
872 | int pic_height = 16 * h->mb_height >> ref_field_picture; |
||
873 | |||
874 | row <<= MB_MBAFF(h); |
||
875 | nrefs[list]--; |
||
876 | |||
877 | if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields |
||
878 | ff_thread_await_progress(&ref_pic->tf, |
||
879 | FFMIN((row >> 1) - !(row & 1), |
||
880 | pic_height - 1), |
||
881 | 1); |
||
882 | ff_thread_await_progress(&ref_pic->tf, |
||
883 | FFMIN((row >> 1), pic_height - 1), |
||
884 | 0); |
||
885 | } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame |
||
886 | ff_thread_await_progress(&ref_pic->tf, |
||
887 | FFMIN(row * 2 + ref_field, |
||
888 | pic_height - 1), |
||
889 | 0); |
||
890 | } else if (FIELD_PICTURE(h)) { |
||
891 | ff_thread_await_progress(&ref_pic->tf, |
||
892 | FFMIN(row, pic_height - 1), |
||
893 | ref_field); |
||
894 | } else { |
||
895 | ff_thread_await_progress(&ref_pic->tf, |
||
896 | FFMIN(row, pic_height - 1), |
||
897 | 0); |
||
898 | } |
||
899 | } |
||
900 | } |
||
901 | } |
||
902 | |||
903 | static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, |
||
904 | int n, int square, int height, |
||
905 | int delta, int list, |
||
906 | uint8_t *dest_y, uint8_t *dest_cb, |
||
907 | uint8_t *dest_cr, |
||
908 | int src_x_offset, int src_y_offset, |
||
909 | qpel_mc_func *qpix_op, |
||
910 | h264_chroma_mc_func chroma_op, |
||
911 | int pixel_shift, int chroma_idc) |
||
912 | { |
||
913 | const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8; |
||
914 | int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8; |
||
915 | const int luma_xy = (mx & 3) + ((my & 3) << 2); |
||
916 | ptrdiff_t offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize; |
||
917 | uint8_t *src_y = pic->f.data[0] + offset; |
||
918 | uint8_t *src_cb, *src_cr; |
||
919 | int extra_width = 0; |
||
920 | int extra_height = 0; |
||
921 | int emu = 0; |
||
922 | const int full_mx = mx >> 2; |
||
923 | const int full_my = my >> 2; |
||
924 | const int pic_width = 16 * h->mb_width; |
||
925 | const int pic_height = 16 * h->mb_height >> MB_FIELD(h); |
||
926 | int ysh; |
||
927 | |||
928 | if (mx & 7) |
||
929 | extra_width -= 3; |
||
930 | if (my & 7) |
||
931 | extra_height -= 3; |
||
932 | |||
933 | if (full_mx < 0 - extra_width || |
||
934 | full_my < 0 - extra_height || |
||
935 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || |
||
936 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { |
||
937 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize, |
||
938 | src_y - (2 << pixel_shift) - 2 * h->mb_linesize, |
||
939 | h->mb_linesize, |
||
940 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, |
||
941 | full_my - 2, pic_width, pic_height); |
||
942 | src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
||
943 | emu = 1; |
||
944 | } |
||
945 | |||
946 | qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps? |
||
947 | if (!square) |
||
948 | qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
||
949 | |||
950 | if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY) |
||
951 | return; |
||
952 | |||
953 | if (chroma_idc == 3 /* yuv444 */) { |
||
954 | src_cb = pic->f.data[1] + offset; |
||
955 | if (emu) { |
||
956 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize, |
||
957 | src_cb - (2 << pixel_shift) - 2 * h->mb_linesize, |
||
958 | h->mb_linesize, |
||
959 | 16 + 5, 16 + 5 /*FIXME*/, |
||
960 | full_mx - 2, full_my - 2, |
||
961 | pic_width, pic_height); |
||
962 | src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
||
963 | } |
||
964 | qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps? |
||
965 | if (!square) |
||
966 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); |
||
967 | |||
968 | src_cr = pic->f.data[2] + offset; |
||
969 | if (emu) { |
||
970 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize, |
||
971 | src_cr - (2 << pixel_shift) - 2 * h->mb_linesize, |
||
972 | h->mb_linesize, |
||
973 | 16 + 5, 16 + 5 /*FIXME*/, |
||
974 | full_mx - 2, full_my - 2, |
||
975 | pic_width, pic_height); |
||
976 | src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; |
||
977 | } |
||
978 | qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps? |
||
979 | if (!square) |
||
980 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); |
||
981 | return; |
||
982 | } |
||
983 | |||
984 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
||
985 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(h)) { |
||
986 | // chroma offset when predicting from a field of opposite parity |
||
987 | my += 2 * ((h->mb_y & 1) - (pic->reference - 1)); |
||
988 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); |
||
989 | } |
||
990 | |||
991 | src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + |
||
992 | (my >> ysh) * h->mb_uvlinesize; |
||
993 | src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + |
||
994 | (my >> ysh) * h->mb_uvlinesize; |
||
995 | |||
996 | if (emu) { |
||
997 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_uvlinesize, src_cb, h->mb_uvlinesize, |
||
998 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), |
||
999 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); |
||
1000 | src_cb = h->edge_emu_buffer; |
||
1001 | } |
||
1002 | chroma_op(dest_cb, src_cb, h->mb_uvlinesize, |
||
1003 | height >> (chroma_idc == 1 /* yuv420 */), |
||
1004 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); |
||
1005 | |||
1006 | if (emu) { |
||
1007 | h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_uvlinesize, src_cr, h->mb_uvlinesize, |
||
1008 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), |
||
1009 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); |
||
1010 | src_cr = h->edge_emu_buffer; |
||
1011 | } |
||
1012 | chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), |
||
1013 | mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); |
||
1014 | } |
||
1015 | |||
1016 | static av_always_inline void mc_part_std(H264Context *h, int n, int square, |
||
1017 | int height, int delta, |
||
1018 | uint8_t *dest_y, uint8_t *dest_cb, |
||
1019 | uint8_t *dest_cr, |
||
1020 | int x_offset, int y_offset, |
||
1021 | qpel_mc_func *qpix_put, |
||
1022 | h264_chroma_mc_func chroma_put, |
||
1023 | qpel_mc_func *qpix_avg, |
||
1024 | h264_chroma_mc_func chroma_avg, |
||
1025 | int list0, int list1, |
||
1026 | int pixel_shift, int chroma_idc) |
||
1027 | { |
||
1028 | qpel_mc_func *qpix_op = qpix_put; |
||
1029 | h264_chroma_mc_func chroma_op = chroma_put; |
||
1030 | |||
1031 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
||
1032 | if (chroma_idc == 3 /* yuv444 */) { |
||
1033 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
||
1034 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
||
1035 | } else if (chroma_idc == 2 /* yuv422 */) { |
||
1036 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
||
1037 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
||
1038 | } else { /* yuv420 */ |
||
1039 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
||
1040 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
||
1041 | } |
||
1042 | x_offset += 8 * h->mb_x; |
||
1043 | y_offset += 8 * (h->mb_y >> MB_FIELD(h)); |
||
1044 | |||
1045 | if (list0) { |
||
1046 | Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]]; |
||
1047 | mc_dir_part(h, ref, n, square, height, delta, 0, |
||
1048 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
||
1049 | qpix_op, chroma_op, pixel_shift, chroma_idc); |
||
1050 | |||
1051 | qpix_op = qpix_avg; |
||
1052 | chroma_op = chroma_avg; |
||
1053 | } |
||
1054 | |||
1055 | if (list1) { |
||
1056 | Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]]; |
||
1057 | mc_dir_part(h, ref, n, square, height, delta, 1, |
||
1058 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
||
1059 | qpix_op, chroma_op, pixel_shift, chroma_idc); |
||
1060 | } |
||
1061 | } |
||
1062 | |||
1063 | static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, |
||
1064 | int height, int delta, |
||
1065 | uint8_t *dest_y, uint8_t *dest_cb, |
||
1066 | uint8_t *dest_cr, |
||
1067 | int x_offset, int y_offset, |
||
1068 | qpel_mc_func *qpix_put, |
||
1069 | h264_chroma_mc_func chroma_put, |
||
1070 | h264_weight_func luma_weight_op, |
||
1071 | h264_weight_func chroma_weight_op, |
||
1072 | h264_biweight_func luma_weight_avg, |
||
1073 | h264_biweight_func chroma_weight_avg, |
||
1074 | int list0, int list1, |
||
1075 | int pixel_shift, int chroma_idc) |
||
1076 | { |
||
1077 | int chroma_height; |
||
1078 | |||
1079 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
||
1080 | if (chroma_idc == 3 /* yuv444 */) { |
||
1081 | chroma_height = height; |
||
1082 | chroma_weight_avg = luma_weight_avg; |
||
1083 | chroma_weight_op = luma_weight_op; |
||
1084 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
||
1085 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize; |
||
1086 | } else if (chroma_idc == 2 /* yuv422 */) { |
||
1087 | chroma_height = height; |
||
1088 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
||
1089 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize; |
||
1090 | } else { /* yuv420 */ |
||
1091 | chroma_height = height >> 1; |
||
1092 | dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
||
1093 | dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize; |
||
1094 | } |
||
1095 | x_offset += 8 * h->mb_x; |
||
1096 | y_offset += 8 * (h->mb_y >> MB_FIELD(h)); |
||
1097 | |||
1098 | if (list0 && list1) { |
||
1099 | /* don't optimize for luma-only case, since B-frames usually |
||
1100 | * use implicit weights => chroma too. */ |
||
1101 | uint8_t *tmp_cb = h->bipred_scratchpad; |
||
1102 | uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift); |
||
1103 | uint8_t *tmp_y = h->bipred_scratchpad + 16 * h->mb_uvlinesize; |
||
1104 | int refn0 = h->ref_cache[0][scan8[n]]; |
||
1105 | int refn1 = h->ref_cache[1][scan8[n]]; |
||
1106 | |||
1107 | mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0, |
||
1108 | dest_y, dest_cb, dest_cr, |
||
1109 | x_offset, y_offset, qpix_put, chroma_put, |
||
1110 | pixel_shift, chroma_idc); |
||
1111 | mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1, |
||
1112 | tmp_y, tmp_cb, tmp_cr, |
||
1113 | x_offset, y_offset, qpix_put, chroma_put, |
||
1114 | pixel_shift, chroma_idc); |
||
1115 | |||
1116 | if (h->use_weight == 2) { |
||
1117 | int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1]; |
||
1118 | int weight1 = 64 - weight0; |
||
1119 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, |
||
1120 | height, 5, weight0, weight1, 0); |
||
1121 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, |
||
1122 | chroma_height, 5, weight0, weight1, 0); |
||
1123 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, |
||
1124 | chroma_height, 5, weight0, weight1, 0); |
||
1125 | } else { |
||
1126 | luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, |
||
1127 | h->luma_log2_weight_denom, |
||
1128 | h->luma_weight[refn0][0][0], |
||
1129 | h->luma_weight[refn1][1][0], |
||
1130 | h->luma_weight[refn0][0][1] + |
||
1131 | h->luma_weight[refn1][1][1]); |
||
1132 | chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, |
||
1133 | h->chroma_log2_weight_denom, |
||
1134 | h->chroma_weight[refn0][0][0][0], |
||
1135 | h->chroma_weight[refn1][1][0][0], |
||
1136 | h->chroma_weight[refn0][0][0][1] + |
||
1137 | h->chroma_weight[refn1][1][0][1]); |
||
1138 | chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, |
||
1139 | h->chroma_log2_weight_denom, |
||
1140 | h->chroma_weight[refn0][0][1][0], |
||
1141 | h->chroma_weight[refn1][1][1][0], |
||
1142 | h->chroma_weight[refn0][0][1][1] + |
||
1143 | h->chroma_weight[refn1][1][1][1]); |
||
1144 | } |
||
1145 | } else { |
||
1146 | int list = list1 ? 1 : 0; |
||
1147 | int refn = h->ref_cache[list][scan8[n]]; |
||
1148 | Picture *ref = &h->ref_list[list][refn]; |
||
1149 | mc_dir_part(h, ref, n, square, height, delta, list, |
||
1150 | dest_y, dest_cb, dest_cr, x_offset, y_offset, |
||
1151 | qpix_put, chroma_put, pixel_shift, chroma_idc); |
||
1152 | |||
1153 | luma_weight_op(dest_y, h->mb_linesize, height, |
||
1154 | h->luma_log2_weight_denom, |
||
1155 | h->luma_weight[refn][list][0], |
||
1156 | h->luma_weight[refn][list][1]); |
||
1157 | if (h->use_weight_chroma) { |
||
1158 | chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, |
||
1159 | h->chroma_log2_weight_denom, |
||
1160 | h->chroma_weight[refn][list][0][0], |
||
1161 | h->chroma_weight[refn][list][0][1]); |
||
1162 | chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, |
||
1163 | h->chroma_log2_weight_denom, |
||
1164 | h->chroma_weight[refn][list][1][0], |
||
1165 | h->chroma_weight[refn][list][1][1]); |
||
1166 | } |
||
1167 | } |
||
1168 | } |
||
1169 | |||
1170 | static av_always_inline void prefetch_motion(H264Context *h, int list, |
||
1171 | int pixel_shift, int chroma_idc) |
||
1172 | { |
||
1173 | /* fetch pixels for estimated mv 4 macroblocks ahead |
||
1174 | * optimized for 64byte cache lines */ |
||
1175 | const int refn = h->ref_cache[list][scan8[0]]; |
||
1176 | if (refn >= 0) { |
||
1177 | const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8; |
||
1178 | const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y; |
||
1179 | uint8_t **src = h->ref_list[list][refn].f.data; |
||
1180 | int off = (mx << pixel_shift) + |
||
1181 | (my + (h->mb_x & 3) * 4) * h->mb_linesize + |
||
1182 | (64 << pixel_shift); |
||
1183 | h->vdsp.prefetch(src[0] + off, h->linesize, 4); |
||
1184 | if (chroma_idc == 3 /* yuv444 */) { |
||
1185 | h->vdsp.prefetch(src[1] + off, h->linesize, 4); |
||
1186 | h->vdsp.prefetch(src[2] + off, h->linesize, 4); |
||
1187 | } else { |
||
1188 | off= (((mx>>1)+64)< |
||
1189 | h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); |
||
1190 | } |
||
1191 | } |
||
1192 | } |
||
1193 | |||
1194 | static void free_tables(H264Context *h, int free_rbsp) |
||
1195 | { |
||
1196 | int i; |
||
1197 | H264Context *hx; |
||
1198 | |||
1199 | av_freep(&h->intra4x4_pred_mode); |
||
1200 | av_freep(&h->chroma_pred_mode_table); |
||
1201 | av_freep(&h->cbp_table); |
||
1202 | av_freep(&h->mvd_table[0]); |
||
1203 | av_freep(&h->mvd_table[1]); |
||
1204 | av_freep(&h->direct_table); |
||
1205 | av_freep(&h->non_zero_count); |
||
1206 | av_freep(&h->slice_table_base); |
||
1207 | h->slice_table = NULL; |
||
1208 | av_freep(&h->list_counts); |
||
1209 | |||
1210 | av_freep(&h->mb2b_xy); |
||
1211 | av_freep(&h->mb2br_xy); |
||
1212 | |||
1213 | for (i = 0; i < 3; i++) |
||
1214 | av_freep(&h->visualization_buffer[i]); |
||
1215 | |||
1216 | av_buffer_pool_uninit(&h->qscale_table_pool); |
||
1217 | av_buffer_pool_uninit(&h->mb_type_pool); |
||
1218 | av_buffer_pool_uninit(&h->motion_val_pool); |
||
1219 | av_buffer_pool_uninit(&h->ref_index_pool); |
||
1220 | |||
1221 | if (free_rbsp && h->DPB) { |
||
1222 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
||
1223 | unref_picture(h, &h->DPB[i]); |
||
1224 | av_freep(&h->DPB); |
||
1225 | } else if (h->DPB) { |
||
1226 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
||
1227 | h->DPB[i].needs_realloc = 1; |
||
1228 | } |
||
1229 | |||
1230 | h->cur_pic_ptr = NULL; |
||
1231 | |||
1232 | for (i = 0; i < MAX_THREADS; i++) { |
||
1233 | hx = h->thread_context[i]; |
||
1234 | if (!hx) |
||
1235 | continue; |
||
1236 | av_freep(&hx->top_borders[1]); |
||
1237 | av_freep(&hx->top_borders[0]); |
||
1238 | av_freep(&hx->bipred_scratchpad); |
||
1239 | av_freep(&hx->edge_emu_buffer); |
||
1240 | av_freep(&hx->dc_val_base); |
||
1241 | av_freep(&hx->me.scratchpad); |
||
1242 | av_freep(&hx->er.mb_index2xy); |
||
1243 | av_freep(&hx->er.error_status_table); |
||
1244 | av_freep(&hx->er.er_temp_buffer); |
||
1245 | av_freep(&hx->er.mbintra_table); |
||
1246 | av_freep(&hx->er.mbskip_table); |
||
1247 | |||
1248 | if (free_rbsp) { |
||
1249 | av_freep(&hx->rbsp_buffer[1]); |
||
1250 | av_freep(&hx->rbsp_buffer[0]); |
||
1251 | hx->rbsp_buffer_size[0] = 0; |
||
1252 | hx->rbsp_buffer_size[1] = 0; |
||
1253 | } |
||
1254 | if (i) |
||
1255 | av_freep(&h->thread_context[i]); |
||
1256 | } |
||
1257 | } |
||
1258 | |||
1259 | static void init_dequant8_coeff_table(H264Context *h) |
||
1260 | { |
||
1261 | int i, j, q, x; |
||
1262 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); |
||
1263 | |||
1264 | for (i = 0; i < 6; i++) { |
||
1265 | h->dequant8_coeff[i] = h->dequant8_buffer[i]; |
||
1266 | for (j = 0; j < i; j++) |
||
1267 | if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], |
||
1268 | 64 * sizeof(uint8_t))) { |
||
1269 | h->dequant8_coeff[i] = h->dequant8_buffer[j]; |
||
1270 | break; |
||
1271 | } |
||
1272 | if (j < i) |
||
1273 | continue; |
||
1274 | |||
1275 | for (q = 0; q < max_qp + 1; q++) { |
||
1276 | int shift = div6[q]; |
||
1277 | int idx = rem6[q]; |
||
1278 | for (x = 0; x < 64; x++) |
||
1279 | h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] = |
||
1280 | ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] * |
||
1281 | h->pps.scaling_matrix8[i][x]) << shift; |
||
1282 | } |
||
1283 | } |
||
1284 | } |
||
1285 | |||
1286 | static void init_dequant4_coeff_table(H264Context *h) |
||
1287 | { |
||
1288 | int i, j, q, x; |
||
1289 | const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8); |
||
1290 | for (i = 0; i < 6; i++) { |
||
1291 | h->dequant4_coeff[i] = h->dequant4_buffer[i]; |
||
1292 | for (j = 0; j < i; j++) |
||
1293 | if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], |
||
1294 | 16 * sizeof(uint8_t))) { |
||
1295 | h->dequant4_coeff[i] = h->dequant4_buffer[j]; |
||
1296 | break; |
||
1297 | } |
||
1298 | if (j < i) |
||
1299 | continue; |
||
1300 | |||
1301 | for (q = 0; q < max_qp + 1; q++) { |
||
1302 | int shift = div6[q] + 2; |
||
1303 | int idx = rem6[q]; |
||
1304 | for (x = 0; x < 16; x++) |
||
1305 | h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] = |
||
1306 | ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * |
||
1307 | h->pps.scaling_matrix4[i][x]) << shift; |
||
1308 | } |
||
1309 | } |
||
1310 | } |
||
1311 | |||
1312 | static void init_dequant_tables(H264Context *h) |
||
1313 | { |
||
1314 | int i, x; |
||
1315 | init_dequant4_coeff_table(h); |
||
1316 | if (h->pps.transform_8x8_mode) |
||
1317 | init_dequant8_coeff_table(h); |
||
1318 | if (h->sps.transform_bypass) { |
||
1319 | for (i = 0; i < 6; i++) |
||
1320 | for (x = 0; x < 16; x++) |
||
1321 | h->dequant4_coeff[i][0][x] = 1 << 6; |
||
1322 | if (h->pps.transform_8x8_mode) |
||
1323 | for (i = 0; i < 6; i++) |
||
1324 | for (x = 0; x < 64; x++) |
||
1325 | h->dequant8_coeff[i][0][x] = 1 << 6; |
||
1326 | } |
||
1327 | } |
||
1328 | |||
1329 | int ff_h264_alloc_tables(H264Context *h) |
||
1330 | { |
||
1331 | const int big_mb_num = h->mb_stride * (h->mb_height + 1); |
||
1332 | const int row_mb_num = 2*h->mb_stride*FFMAX(h->avctx->thread_count, 1); |
||
1333 | int x, y, i; |
||
1334 | |||
1335 | FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode, |
||
1336 | row_mb_num * 8 * sizeof(uint8_t), fail) |
||
1337 | FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count, |
||
1338 | big_mb_num * 48 * sizeof(uint8_t), fail) |
||
1339 | FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base, |
||
1340 | (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base), fail) |
||
1341 | FF_ALLOCZ_OR_GOTO(h->avctx, h->cbp_table, |
||
1342 | big_mb_num * sizeof(uint16_t), fail) |
||
1343 | FF_ALLOCZ_OR_GOTO(h->avctx, h->chroma_pred_mode_table, |
||
1344 | big_mb_num * sizeof(uint8_t), fail) |
||
1345 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[0], |
||
1346 | 16 * row_mb_num * sizeof(uint8_t), fail); |
||
1347 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[1], |
||
1348 | 16 * row_mb_num * sizeof(uint8_t), fail); |
||
1349 | FF_ALLOCZ_OR_GOTO(h->avctx, h->direct_table, |
||
1350 | 4 * big_mb_num * sizeof(uint8_t), fail); |
||
1351 | FF_ALLOCZ_OR_GOTO(h->avctx, h->list_counts, |
||
1352 | big_mb_num * sizeof(uint8_t), fail) |
||
1353 | |||
1354 | memset(h->slice_table_base, -1, |
||
1355 | (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base)); |
||
1356 | h->slice_table = h->slice_table_base + h->mb_stride * 2 + 1; |
||
1357 | |||
1358 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2b_xy, |
||
1359 | big_mb_num * sizeof(uint32_t), fail); |
||
1360 | FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2br_xy, |
||
1361 | big_mb_num * sizeof(uint32_t), fail); |
||
1362 | for (y = 0; y < h->mb_height; y++) |
||
1363 | for (x = 0; x < h->mb_width; x++) { |
||
1364 | const int mb_xy = x + y * h->mb_stride; |
||
1365 | const int b_xy = 4 * x + 4 * y * h->b_stride; |
||
1366 | |||
1367 | h->mb2b_xy[mb_xy] = b_xy; |
||
1368 | h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * h->mb_stride))); |
||
1369 | } |
||
1370 | |||
1371 | if (!h->dequant4_coeff[0]) |
||
1372 | init_dequant_tables(h); |
||
1373 | |||
1374 | if (!h->DPB) { |
||
1375 | h->DPB = av_mallocz_array(MAX_PICTURE_COUNT, sizeof(*h->DPB)); |
||
1376 | if (!h->DPB) |
||
1377 | return AVERROR(ENOMEM); |
||
1378 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
||
1379 | avcodec_get_frame_defaults(&h->DPB[i].f); |
||
1380 | avcodec_get_frame_defaults(&h->cur_pic.f); |
||
1381 | } |
||
1382 | |||
1383 | return 0; |
||
1384 | |||
1385 | fail: |
||
1386 | free_tables(h, 1); |
||
1387 | return AVERROR(ENOMEM); |
||
1388 | } |
||
1389 | |||
1390 | /** |
||
1391 | * Mimic alloc_tables(), but for every context thread. |
||
1392 | */ |
||
1393 | static void clone_tables(H264Context *dst, H264Context *src, int i) |
||
1394 | { |
||
1395 | dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride; |
||
1396 | dst->non_zero_count = src->non_zero_count; |
||
1397 | dst->slice_table = src->slice_table; |
||
1398 | dst->cbp_table = src->cbp_table; |
||
1399 | dst->mb2b_xy = src->mb2b_xy; |
||
1400 | dst->mb2br_xy = src->mb2br_xy; |
||
1401 | dst->chroma_pred_mode_table = src->chroma_pred_mode_table; |
||
1402 | dst->mvd_table[0] = src->mvd_table[0] + i * 8 * 2 * src->mb_stride; |
||
1403 | dst->mvd_table[1] = src->mvd_table[1] + i * 8 * 2 * src->mb_stride; |
||
1404 | dst->direct_table = src->direct_table; |
||
1405 | dst->list_counts = src->list_counts; |
||
1406 | dst->DPB = src->DPB; |
||
1407 | dst->cur_pic_ptr = src->cur_pic_ptr; |
||
1408 | dst->cur_pic = src->cur_pic; |
||
1409 | dst->bipred_scratchpad = NULL; |
||
1410 | dst->edge_emu_buffer = NULL; |
||
1411 | dst->me.scratchpad = NULL; |
||
1412 | ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma, |
||
1413 | src->sps.chroma_format_idc); |
||
1414 | } |
||
1415 | |||
1416 | /** |
||
1417 | * Init context |
||
1418 | * Allocate buffers which are not shared amongst multiple threads. |
||
1419 | */ |
||
1420 | static int context_init(H264Context *h) |
||
1421 | { |
||
1422 | ERContext *er = &h->er; |
||
1423 | int mb_array_size = h->mb_height * h->mb_stride; |
||
1424 | int y_size = (2 * h->mb_width + 1) * (2 * h->mb_height + 1); |
||
1425 | int c_size = h->mb_stride * (h->mb_height + 1); |
||
1426 | int yc_size = y_size + 2 * c_size; |
||
1427 | int x, y, i; |
||
1428 | |||
1429 | FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[0], |
||
1430 | h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) |
||
1431 | FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[1], |
||
1432 | h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail) |
||
1433 | |||
1434 | h->ref_cache[0][scan8[5] + 1] = |
||
1435 | h->ref_cache[0][scan8[7] + 1] = |
||
1436 | h->ref_cache[0][scan8[13] + 1] = |
||
1437 | h->ref_cache[1][scan8[5] + 1] = |
||
1438 | h->ref_cache[1][scan8[7] + 1] = |
||
1439 | h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE; |
||
1440 | |||
1441 | if (CONFIG_ERROR_RESILIENCE) { |
||
1442 | /* init ER */ |
||
1443 | er->avctx = h->avctx; |
||
1444 | er->dsp = &h->dsp; |
||
1445 | er->decode_mb = h264_er_decode_mb; |
||
1446 | er->opaque = h; |
||
1447 | er->quarter_sample = 1; |
||
1448 | |||
1449 | er->mb_num = h->mb_num; |
||
1450 | er->mb_width = h->mb_width; |
||
1451 | er->mb_height = h->mb_height; |
||
1452 | er->mb_stride = h->mb_stride; |
||
1453 | er->b8_stride = h->mb_width * 2 + 1; |
||
1454 | |||
1455 | FF_ALLOCZ_OR_GOTO(h->avctx, er->mb_index2xy, (h->mb_num + 1) * sizeof(int), |
||
1456 | fail); // error ressilience code looks cleaner with this |
||
1457 | for (y = 0; y < h->mb_height; y++) |
||
1458 | for (x = 0; x < h->mb_width; x++) |
||
1459 | er->mb_index2xy[x + y * h->mb_width] = x + y * h->mb_stride; |
||
1460 | |||
1461 | er->mb_index2xy[h->mb_height * h->mb_width] = (h->mb_height - 1) * |
||
1462 | h->mb_stride + h->mb_width; |
||
1463 | |||
1464 | FF_ALLOCZ_OR_GOTO(h->avctx, er->error_status_table, |
||
1465 | mb_array_size * sizeof(uint8_t), fail); |
||
1466 | |||
1467 | FF_ALLOC_OR_GOTO(h->avctx, er->mbintra_table, mb_array_size, fail); |
||
1468 | memset(er->mbintra_table, 1, mb_array_size); |
||
1469 | |||
1470 | FF_ALLOCZ_OR_GOTO(h->avctx, er->mbskip_table, mb_array_size + 2, fail); |
||
1471 | |||
1472 | FF_ALLOC_OR_GOTO(h->avctx, er->er_temp_buffer, h->mb_height * h->mb_stride, |
||
1473 | fail); |
||
1474 | |||
1475 | FF_ALLOCZ_OR_GOTO(h->avctx, h->dc_val_base, yc_size * sizeof(int16_t), fail); |
||
1476 | er->dc_val[0] = h->dc_val_base + h->mb_width * 2 + 2; |
||
1477 | er->dc_val[1] = h->dc_val_base + y_size + h->mb_stride + 1; |
||
1478 | er->dc_val[2] = er->dc_val[1] + c_size; |
||
1479 | for (i = 0; i < yc_size; i++) |
||
1480 | h->dc_val_base[i] = 1024; |
||
1481 | } |
||
1482 | |||
1483 | return 0; |
||
1484 | |||
1485 | fail: |
||
1486 | return AVERROR(ENOMEM); // free_tables will clean up for us |
||
1487 | } |
||
1488 | |||
1489 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, |
||
1490 | int parse_extradata); |
||
1491 | |||
1492 | int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size) |
||
1493 | { |
||
1494 | AVCodecContext *avctx = h->avctx; |
||
1495 | int ret; |
||
1496 | |||
1497 | if (!buf || size <= 0) |
||
1498 | return -1; |
||
1499 | |||
1500 | if (buf[0] == 1) { |
||
1501 | int i, cnt, nalsize; |
||
1502 | const unsigned char *p = buf; |
||
1503 | |||
1504 | h->is_avc = 1; |
||
1505 | |||
1506 | if (size < 7) { |
||
1507 | av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); |
||
1508 | return AVERROR_INVALIDDATA; |
||
1509 | } |
||
1510 | /* sps and pps in the avcC always have length coded with 2 bytes, |
||
1511 | * so put a fake nal_length_size = 2 while parsing them */ |
||
1512 | h->nal_length_size = 2; |
||
1513 | // Decode sps from avcC |
||
1514 | cnt = *(p + 5) & 0x1f; // Number of sps |
||
1515 | p += 6; |
||
1516 | for (i = 0; i < cnt; i++) { |
||
1517 | nalsize = AV_RB16(p) + 2; |
||
1518 | if(nalsize > size - (p-buf)) |
||
1519 | return AVERROR_INVALIDDATA; |
||
1520 | ret = decode_nal_units(h, p, nalsize, 1); |
||
1521 | if (ret < 0) { |
||
1522 | av_log(avctx, AV_LOG_ERROR, |
||
1523 | "Decoding sps %d from avcC failed\n", i); |
||
1524 | return ret; |
||
1525 | } |
||
1526 | p += nalsize; |
||
1527 | } |
||
1528 | // Decode pps from avcC |
||
1529 | cnt = *(p++); // Number of pps |
||
1530 | for (i = 0; i < cnt; i++) { |
||
1531 | nalsize = AV_RB16(p) + 2; |
||
1532 | if(nalsize > size - (p-buf)) |
||
1533 | return AVERROR_INVALIDDATA; |
||
1534 | ret = decode_nal_units(h, p, nalsize, 1); |
||
1535 | if (ret < 0) { |
||
1536 | av_log(avctx, AV_LOG_ERROR, |
||
1537 | "Decoding pps %d from avcC failed\n", i); |
||
1538 | return ret; |
||
1539 | } |
||
1540 | p += nalsize; |
||
1541 | } |
||
1542 | // Now store right nal length size, that will be used to parse all other nals |
||
1543 | h->nal_length_size = (buf[4] & 0x03) + 1; |
||
1544 | } else { |
||
1545 | h->is_avc = 0; |
||
1546 | ret = decode_nal_units(h, buf, size, 1); |
||
1547 | if (ret < 0) |
||
1548 | return ret; |
||
1549 | } |
||
1550 | return size; |
||
1551 | } |
||
1552 | |||
1553 | av_cold int ff_h264_decode_init(AVCodecContext *avctx) |
||
1554 | { |
||
1555 | H264Context *h = avctx->priv_data; |
||
1556 | int i; |
||
1557 | int ret; |
||
1558 | |||
1559 | h->avctx = avctx; |
||
1560 | |||
1561 | h->bit_depth_luma = 8; |
||
1562 | h->chroma_format_idc = 1; |
||
1563 | |||
1564 | h->avctx->bits_per_raw_sample = 8; |
||
1565 | h->cur_chroma_format_idc = 1; |
||
1566 | |||
1567 | ff_h264dsp_init(&h->h264dsp, 8, 1); |
||
1568 | av_assert0(h->sps.bit_depth_chroma == 0); |
||
1569 | ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
||
1570 | ff_h264qpel_init(&h->h264qpel, 8); |
||
1571 | ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1); |
||
1572 | |||
1573 | h->dequant_coeff_pps = -1; |
||
1574 | h->current_sps_id = -1; |
||
1575 | |||
1576 | /* needed so that IDCT permutation is known early */ |
||
1577 | if (CONFIG_ERROR_RESILIENCE) |
||
1578 | ff_dsputil_init(&h->dsp, h->avctx); |
||
1579 | ff_videodsp_init(&h->vdsp, 8); |
||
1580 | |||
1581 | memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t)); |
||
1582 | memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t)); |
||
1583 | |||
1584 | h->picture_structure = PICT_FRAME; |
||
1585 | h->slice_context_count = 1; |
||
1586 | h->workaround_bugs = avctx->workaround_bugs; |
||
1587 | h->flags = avctx->flags; |
||
1588 | |||
1589 | /* set defaults */ |
||
1590 | // s->decode_mb = ff_h263_decode_mb; |
||
1591 | if (!avctx->has_b_frames) |
||
1592 | h->low_delay = 1; |
||
1593 | |||
1594 | avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; |
||
1595 | |||
1596 | ff_h264_decode_init_vlc(); |
||
1597 | |||
1598 | ff_init_cabac_states(); |
||
1599 | |||
1600 | h->pixel_shift = 0; |
||
1601 | h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; |
||
1602 | |||
1603 | h->thread_context[0] = h; |
||
1604 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
||
1605 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
||
1606 | h->last_pocs[i] = INT_MIN; |
||
1607 | h->prev_poc_msb = 1 << 16; |
||
1608 | h->prev_frame_num = -1; |
||
1609 | h->x264_build = -1; |
||
1610 | h->sei_fpa.frame_packing_arrangement_cancel_flag = -1; |
||
1611 | ff_h264_reset_sei(h); |
||
1612 | if (avctx->codec_id == AV_CODEC_ID_H264) { |
||
1613 | if (avctx->ticks_per_frame == 1) { |
||
1614 | if(h->avctx->time_base.den < INT_MAX/2) { |
||
1615 | h->avctx->time_base.den *= 2; |
||
1616 | } else |
||
1617 | h->avctx->time_base.num /= 2; |
||
1618 | } |
||
1619 | avctx->ticks_per_frame = 2; |
||
1620 | } |
||
1621 | |||
1622 | if (avctx->extradata_size > 0 && avctx->extradata) { |
||
1623 | ret = ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size); |
||
1624 | if (ret < 0) { |
||
1625 | ff_h264_free_context(h); |
||
1626 | return ret; |
||
1627 | } |
||
1628 | } |
||
1629 | |||
1630 | if (h->sps.bitstream_restriction_flag && |
||
1631 | h->avctx->has_b_frames < h->sps.num_reorder_frames) { |
||
1632 | h->avctx->has_b_frames = h->sps.num_reorder_frames; |
||
1633 | h->low_delay = 0; |
||
1634 | } |
||
1635 | |||
1636 | avctx->internal->allocate_progress = 1; |
||
1637 | |||
1638 | flush_change(h); |
||
1639 | |||
1640 | return 0; |
||
1641 | } |
||
1642 | |||
1643 | #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size)))) |
||
1644 | #undef REBASE_PICTURE |
||
1645 | #define REBASE_PICTURE(pic, new_ctx, old_ctx) \ |
||
1646 | ((pic && pic >= old_ctx->DPB && \ |
||
1647 | pic < old_ctx->DPB + MAX_PICTURE_COUNT) ? \ |
||
1648 | &new_ctx->DPB[pic - old_ctx->DPB] : NULL) |
||
1649 | |||
1650 | static void copy_picture_range(Picture **to, Picture **from, int count, |
||
1651 | H264Context *new_base, |
||
1652 | H264Context *old_base) |
||
1653 | { |
||
1654 | int i; |
||
1655 | |||
1656 | for (i = 0; i < count; i++) { |
||
1657 | assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || |
||
1658 | IN_RANGE(from[i], old_base->DPB, |
||
1659 | sizeof(Picture) * MAX_PICTURE_COUNT) || |
||
1660 | !from[i])); |
||
1661 | to[i] = REBASE_PICTURE(from[i], new_base, old_base); |
||
1662 | } |
||
1663 | } |
||
1664 | |||
1665 | static void copy_parameter_set(void **to, void **from, int count, int size) |
||
1666 | { |
||
1667 | int i; |
||
1668 | |||
1669 | for (i = 0; i < count; i++) { |
||
1670 | if (to[i] && !from[i]) |
||
1671 | av_freep(&to[i]); |
||
1672 | else if (from[i] && !to[i]) |
||
1673 | to[i] = av_malloc(size); |
||
1674 | |||
1675 | if (from[i]) |
||
1676 | memcpy(to[i], from[i], size); |
||
1677 | } |
||
1678 | } |
||
1679 | |||
1680 | static int decode_init_thread_copy(AVCodecContext *avctx) |
||
1681 | { |
||
1682 | H264Context *h = avctx->priv_data; |
||
1683 | |||
1684 | if (!avctx->internal->is_copy) |
||
1685 | return 0; |
||
1686 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
||
1687 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); |
||
1688 | |||
1689 | h->rbsp_buffer[0] = NULL; |
||
1690 | h->rbsp_buffer[1] = NULL; |
||
1691 | h->rbsp_buffer_size[0] = 0; |
||
1692 | h->rbsp_buffer_size[1] = 0; |
||
1693 | h->context_initialized = 0; |
||
1694 | |||
1695 | return 0; |
||
1696 | } |
||
1697 | |||
1698 | #define copy_fields(to, from, start_field, end_field) \ |
||
1699 | memcpy(&to->start_field, &from->start_field, \ |
||
1700 | (char *)&to->end_field - (char *)&to->start_field) |
||
1701 | |||
1702 | static int h264_slice_header_init(H264Context *, int); |
||
1703 | |||
1704 | static int h264_set_parameter_from_sps(H264Context *h); |
||
1705 | |||
1706 | static int decode_update_thread_context(AVCodecContext *dst, |
||
1707 | const AVCodecContext *src) |
||
1708 | { |
||
1709 | H264Context *h = dst->priv_data, *h1 = src->priv_data; |
||
1710 | int inited = h->context_initialized, err = 0; |
||
1711 | int context_reinitialized = 0; |
||
1712 | int i, ret; |
||
1713 | |||
1714 | if (dst == src) |
||
1715 | return 0; |
||
1716 | |||
1717 | if (inited && |
||
1718 | (h->width != h1->width || |
||
1719 | h->height != h1->height || |
||
1720 | h->mb_width != h1->mb_width || |
||
1721 | h->mb_height != h1->mb_height || |
||
1722 | h->sps.bit_depth_luma != h1->sps.bit_depth_luma || |
||
1723 | h->sps.chroma_format_idc != h1->sps.chroma_format_idc || |
||
1724 | h->sps.colorspace != h1->sps.colorspace)) { |
||
1725 | |||
1726 | /* set bits_per_raw_sample to the previous value. the check for changed |
||
1727 | * bit depth in h264_set_parameter_from_sps() uses it and sets it to |
||
1728 | * the current value */ |
||
1729 | h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; |
||
1730 | |||
1731 | av_freep(&h->bipred_scratchpad); |
||
1732 | |||
1733 | h->width = h1->width; |
||
1734 | h->height = h1->height; |
||
1735 | h->mb_height = h1->mb_height; |
||
1736 | h->mb_width = h1->mb_width; |
||
1737 | h->mb_num = h1->mb_num; |
||
1738 | h->mb_stride = h1->mb_stride; |
||
1739 | h->b_stride = h1->b_stride; |
||
1740 | // SPS/PPS |
||
1741 | copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers, |
||
1742 | MAX_SPS_COUNT, sizeof(SPS)); |
||
1743 | h->sps = h1->sps; |
||
1744 | copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers, |
||
1745 | MAX_PPS_COUNT, sizeof(PPS)); |
||
1746 | h->pps = h1->pps; |
||
1747 | |||
1748 | if ((err = h264_slice_header_init(h, 1)) < 0) { |
||
1749 | av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed"); |
||
1750 | return err; |
||
1751 | } |
||
1752 | context_reinitialized = 1; |
||
1753 | |||
1754 | #if 0 |
||
1755 | h264_set_parameter_from_sps(h); |
||
1756 | //Note we set context_reinitialized which will cause h264_set_parameter_from_sps to be reexecuted |
||
1757 | h->cur_chroma_format_idc = h1->cur_chroma_format_idc; |
||
1758 | #endif |
||
1759 | } |
||
1760 | /* update linesize on resize for h264. The h264 decoder doesn't |
||
1761 | * necessarily call ff_MPV_frame_start in the new thread */ |
||
1762 | h->linesize = h1->linesize; |
||
1763 | h->uvlinesize = h1->uvlinesize; |
||
1764 | |||
1765 | /* copy block_offset since frame_start may not be called */ |
||
1766 | memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset)); |
||
1767 | |||
1768 | if (!inited) { |
||
1769 | for (i = 0; i < MAX_SPS_COUNT; i++) |
||
1770 | av_freep(h->sps_buffers + i); |
||
1771 | |||
1772 | for (i = 0; i < MAX_PPS_COUNT; i++) |
||
1773 | av_freep(h->pps_buffers + i); |
||
1774 | |||
1775 | av_freep(&h->rbsp_buffer[0]); |
||
1776 | av_freep(&h->rbsp_buffer[1]); |
||
1777 | memcpy(h, h1, offsetof(H264Context, intra_pcm_ptr)); |
||
1778 | memcpy(&h->cabac, &h1->cabac, |
||
1779 | sizeof(H264Context) - offsetof(H264Context, cabac)); |
||
1780 | av_assert0((void*)&h->cabac == &h->mb_padding + 1); |
||
1781 | |||
1782 | memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); |
||
1783 | memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); |
||
1784 | |||
1785 | memset(&h->er, 0, sizeof(h->er)); |
||
1786 | memset(&h->me, 0, sizeof(h->me)); |
||
1787 | memset(&h->mb, 0, sizeof(h->mb)); |
||
1788 | memset(&h->mb_luma_dc, 0, sizeof(h->mb_luma_dc)); |
||
1789 | memset(&h->mb_padding, 0, sizeof(h->mb_padding)); |
||
1790 | |||
1791 | h->avctx = dst; |
||
1792 | h->DPB = NULL; |
||
1793 | h->qscale_table_pool = NULL; |
||
1794 | h->mb_type_pool = NULL; |
||
1795 | h->ref_index_pool = NULL; |
||
1796 | h->motion_val_pool = NULL; |
||
1797 | for (i = 0; i < 2; i++) { |
||
1798 | h->rbsp_buffer[i] = NULL; |
||
1799 | h->rbsp_buffer_size[i] = 0; |
||
1800 | } |
||
1801 | |||
1802 | if (h1->context_initialized) { |
||
1803 | h->context_initialized = 0; |
||
1804 | |||
1805 | memset(&h->cur_pic, 0, sizeof(h->cur_pic)); |
||
1806 | avcodec_get_frame_defaults(&h->cur_pic.f); |
||
1807 | h->cur_pic.tf.f = &h->cur_pic.f; |
||
1808 | |||
1809 | ret = ff_h264_alloc_tables(h); |
||
1810 | if (ret < 0) { |
||
1811 | av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n"); |
||
1812 | return ret; |
||
1813 | } |
||
1814 | ret = context_init(h); |
||
1815 | if (ret < 0) { |
||
1816 | av_log(dst, AV_LOG_ERROR, "context_init() failed.\n"); |
||
1817 | return ret; |
||
1818 | } |
||
1819 | } |
||
1820 | |||
1821 | h->bipred_scratchpad = NULL; |
||
1822 | h->edge_emu_buffer = NULL; |
||
1823 | |||
1824 | h->thread_context[0] = h; |
||
1825 | h->context_initialized = h1->context_initialized; |
||
1826 | } |
||
1827 | |||
1828 | h->avctx->coded_height = h1->avctx->coded_height; |
||
1829 | h->avctx->coded_width = h1->avctx->coded_width; |
||
1830 | h->avctx->width = h1->avctx->width; |
||
1831 | h->avctx->height = h1->avctx->height; |
||
1832 | h->coded_picture_number = h1->coded_picture_number; |
||
1833 | h->first_field = h1->first_field; |
||
1834 | h->picture_structure = h1->picture_structure; |
||
1835 | h->qscale = h1->qscale; |
||
1836 | h->droppable = h1->droppable; |
||
1837 | h->data_partitioning = h1->data_partitioning; |
||
1838 | h->low_delay = h1->low_delay; |
||
1839 | |||
1840 | for (i = 0; h->DPB && i < MAX_PICTURE_COUNT; i++) { |
||
1841 | unref_picture(h, &h->DPB[i]); |
||
1842 | if (h1->DPB[i].f.data[0] && |
||
1843 | (ret = ref_picture(h, &h->DPB[i], &h1->DPB[i])) < 0) |
||
1844 | return ret; |
||
1845 | } |
||
1846 | |||
1847 | h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1); |
||
1848 | unref_picture(h, &h->cur_pic); |
||
1849 | if (h1->cur_pic.f.buf[0] && (ret = ref_picture(h, &h->cur_pic, &h1->cur_pic)) < 0) |
||
1850 | return ret; |
||
1851 | |||
1852 | h->workaround_bugs = h1->workaround_bugs; |
||
1853 | h->low_delay = h1->low_delay; |
||
1854 | h->droppable = h1->droppable; |
||
1855 | |||
1856 | // extradata/NAL handling |
||
1857 | h->is_avc = h1->is_avc; |
||
1858 | |||
1859 | // SPS/PPS |
||
1860 | copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers, |
||
1861 | MAX_SPS_COUNT, sizeof(SPS)); |
||
1862 | h->sps = h1->sps; |
||
1863 | copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers, |
||
1864 | MAX_PPS_COUNT, sizeof(PPS)); |
||
1865 | h->pps = h1->pps; |
||
1866 | |||
1867 | // Dequantization matrices |
||
1868 | // FIXME these are big - can they be only copied when PPS changes? |
||
1869 | copy_fields(h, h1, dequant4_buffer, dequant4_coeff); |
||
1870 | |||
1871 | for (i = 0; i < 6; i++) |
||
1872 | h->dequant4_coeff[i] = h->dequant4_buffer[0] + |
||
1873 | (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); |
||
1874 | |||
1875 | for (i = 0; i < 6; i++) |
||
1876 | h->dequant8_coeff[i] = h->dequant8_buffer[0] + |
||
1877 | (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); |
||
1878 | |||
1879 | h->dequant_coeff_pps = h1->dequant_coeff_pps; |
||
1880 | |||
1881 | // POC timing |
||
1882 | copy_fields(h, h1, poc_lsb, redundant_pic_count); |
||
1883 | |||
1884 | // reference lists |
||
1885 | copy_fields(h, h1, short_ref, cabac_init_idc); |
||
1886 | |||
1887 | copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1); |
||
1888 | copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1); |
||
1889 | copy_picture_range(h->delayed_pic, h1->delayed_pic, |
||
1890 | MAX_DELAYED_PIC_COUNT + 2, h, h1); |
||
1891 | |||
1892 | h->sync = h1->sync; |
||
1893 | |||
1894 | if (context_reinitialized) |
||
1895 | h264_set_parameter_from_sps(h); |
||
1896 | |||
1897 | if (!h->cur_pic_ptr) |
||
1898 | return 0; |
||
1899 | |||
1900 | if (!h->droppable) { |
||
1901 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
||
1902 | h->prev_poc_msb = h->poc_msb; |
||
1903 | h->prev_poc_lsb = h->poc_lsb; |
||
1904 | } |
||
1905 | h->prev_frame_num_offset = h->frame_num_offset; |
||
1906 | h->prev_frame_num = h->frame_num; |
||
1907 | h->outputed_poc = h->next_outputed_poc; |
||
1908 | |||
1909 | return err; |
||
1910 | } |
||
1911 | |||
1912 | static int h264_frame_start(H264Context *h) |
||
1913 | { |
||
1914 | Picture *pic; |
||
1915 | int i, ret; |
||
1916 | const int pixel_shift = h->pixel_shift; |
||
1917 | int c[4] = { |
||
1918 | 1<<(h->sps.bit_depth_luma-1), |
||
1919 | 1<<(h->sps.bit_depth_chroma-1), |
||
1920 | 1<<(h->sps.bit_depth_chroma-1), |
||
1921 | -1 |
||
1922 | }; |
||
1923 | |||
1924 | if (!ff_thread_can_start_frame(h->avctx)) { |
||
1925 | av_log(h->avctx, AV_LOG_ERROR, "Attempt to start a frame outside SETUP state\n"); |
||
1926 | return -1; |
||
1927 | } |
||
1928 | |||
1929 | release_unused_pictures(h, 1); |
||
1930 | h->cur_pic_ptr = NULL; |
||
1931 | |||
1932 | i = find_unused_picture(h); |
||
1933 | if (i < 0) { |
||
1934 | av_log(h->avctx, AV_LOG_ERROR, "no frame buffer available\n"); |
||
1935 | return i; |
||
1936 | } |
||
1937 | pic = &h->DPB[i]; |
||
1938 | |||
1939 | pic->reference = h->droppable ? 0 : h->picture_structure; |
||
1940 | pic->f.coded_picture_number = h->coded_picture_number++; |
||
1941 | pic->field_picture = h->picture_structure != PICT_FRAME; |
||
1942 | |||
1943 | /* |
||
1944 | * Zero key_frame here; IDR markings per slice in frame or fields are ORed |
||
1945 | * in later. |
||
1946 | * See decode_nal_units(). |
||
1947 | */ |
||
1948 | pic->f.key_frame = 0; |
||
1949 | pic->sync = 0; |
||
1950 | pic->mmco_reset = 0; |
||
1951 | |||
1952 | if ((ret = alloc_picture(h, pic)) < 0) |
||
1953 | return ret; |
||
1954 | if(!h->sync && !h->avctx->hwaccel && |
||
1955 | !(h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)) |
||
1956 | avpriv_color_frame(&pic->f, c); |
||
1957 | |||
1958 | h->cur_pic_ptr = pic; |
||
1959 | unref_picture(h, &h->cur_pic); |
||
1960 | if ((ret = ref_picture(h, &h->cur_pic, h->cur_pic_ptr)) < 0) |
||
1961 | return ret; |
||
1962 | |||
1963 | if (CONFIG_ERROR_RESILIENCE) { |
||
1964 | ff_er_frame_start(&h->er); |
||
1965 | h->er.last_pic = |
||
1966 | h->er.next_pic = NULL; |
||
1967 | } |
||
1968 | |||
1969 | assert(h->linesize && h->uvlinesize); |
||
1970 | |||
1971 | for (i = 0; i < 16; i++) { |
||
1972 | h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3); |
||
1973 | h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3); |
||
1974 | } |
||
1975 | for (i = 0; i < 16; i++) { |
||
1976 | h->block_offset[16 + i] = |
||
1977 | h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); |
||
1978 | h->block_offset[48 + 16 + i] = |
||
1979 | h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3); |
||
1980 | } |
||
1981 | |||
1982 | // s->decode = (h->flags & CODEC_FLAG_PSNR) || !s->encoding || |
||
1983 | // h->cur_pic.reference /* || h->contains_intra */ || 1; |
||
1984 | |||
1985 | /* We mark the current picture as non-reference after allocating it, so |
||
1986 | * that if we break out due to an error it can be released automatically |
||
1987 | * in the next ff_MPV_frame_start(). |
||
1988 | */ |
||
1989 | h->cur_pic_ptr->reference = 0; |
||
1990 | |||
1991 | h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX; |
||
1992 | |||
1993 | h->next_output_pic = NULL; |
||
1994 | |||
1995 | assert(h->cur_pic_ptr->long_ref == 0); |
||
1996 | |||
1997 | return 0; |
||
1998 | } |
||
1999 | |||
2000 | /** |
||
2001 | * Run setup operations that must be run after slice header decoding. |
||
2002 | * This includes finding the next displayed frame. |
||
2003 | * |
||
2004 | * @param h h264 master context |
||
2005 | * @param setup_finished enough NALs have been read that we can call |
||
2006 | * ff_thread_finish_setup() |
||
2007 | */ |
||
2008 | static void decode_postinit(H264Context *h, int setup_finished) |
||
2009 | { |
||
2010 | Picture *out = h->cur_pic_ptr; |
||
2011 | Picture *cur = h->cur_pic_ptr; |
||
2012 | int i, pics, out_of_order, out_idx; |
||
2013 | |||
2014 | h->cur_pic_ptr->f.pict_type = h->pict_type; |
||
2015 | |||
2016 | if (h->next_output_pic) |
||
2017 | return; |
||
2018 | |||
2019 | if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) { |
||
2020 | /* FIXME: if we have two PAFF fields in one packet, we can't start |
||
2021 | * the next thread here. If we have one field per packet, we can. |
||
2022 | * The check in decode_nal_units() is not good enough to find this |
||
2023 | * yet, so we assume the worst for now. */ |
||
2024 | // if (setup_finished) |
||
2025 | // ff_thread_finish_setup(h->avctx); |
||
2026 | return; |
||
2027 | } |
||
2028 | |||
2029 | cur->f.interlaced_frame = 0; |
||
2030 | cur->f.repeat_pict = 0; |
||
2031 | |||
2032 | /* Signal interlacing information externally. */ |
||
2033 | /* Prioritize picture timing SEI information over used |
||
2034 | * decoding process if it exists. */ |
||
2035 | |||
2036 | if (h->sps.pic_struct_present_flag) { |
||
2037 | switch (h->sei_pic_struct) { |
||
2038 | case SEI_PIC_STRUCT_FRAME: |
||
2039 | break; |
||
2040 | case SEI_PIC_STRUCT_TOP_FIELD: |
||
2041 | case SEI_PIC_STRUCT_BOTTOM_FIELD: |
||
2042 | cur->f.interlaced_frame = 1; |
||
2043 | break; |
||
2044 | case SEI_PIC_STRUCT_TOP_BOTTOM: |
||
2045 | case SEI_PIC_STRUCT_BOTTOM_TOP: |
||
2046 | if (FIELD_OR_MBAFF_PICTURE(h)) |
||
2047 | cur->f.interlaced_frame = 1; |
||
2048 | else |
||
2049 | // try to flag soft telecine progressive |
||
2050 | cur->f.interlaced_frame = h->prev_interlaced_frame; |
||
2051 | break; |
||
2052 | case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: |
||
2053 | case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: |
||
2054 | /* Signal the possibility of telecined film externally |
||
2055 | * (pic_struct 5,6). From these hints, let the applications |
||
2056 | * decide if they apply deinterlacing. */ |
||
2057 | cur->f.repeat_pict = 1; |
||
2058 | break; |
||
2059 | case SEI_PIC_STRUCT_FRAME_DOUBLING: |
||
2060 | cur->f.repeat_pict = 2; |
||
2061 | break; |
||
2062 | case SEI_PIC_STRUCT_FRAME_TRIPLING: |
||
2063 | cur->f.repeat_pict = 4; |
||
2064 | break; |
||
2065 | } |
||
2066 | |||
2067 | if ((h->sei_ct_type & 3) && |
||
2068 | h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) |
||
2069 | cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0; |
||
2070 | } else { |
||
2071 | /* Derive interlacing flag from used decoding process. */ |
||
2072 | cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE(h); |
||
2073 | } |
||
2074 | h->prev_interlaced_frame = cur->f.interlaced_frame; |
||
2075 | |||
2076 | if (cur->field_poc[0] != cur->field_poc[1]) { |
||
2077 | /* Derive top_field_first from field pocs. */ |
||
2078 | cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1]; |
||
2079 | } else { |
||
2080 | if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) { |
||
2081 | /* Use picture timing SEI information. Even if it is a |
||
2082 | * information of a past frame, better than nothing. */ |
||
2083 | if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM || |
||
2084 | h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) |
||
2085 | cur->f.top_field_first = 1; |
||
2086 | else |
||
2087 | cur->f.top_field_first = 0; |
||
2088 | } else { |
||
2089 | /* Most likely progressive */ |
||
2090 | cur->f.top_field_first = 0; |
||
2091 | } |
||
2092 | } |
||
2093 | |||
2094 | cur->mmco_reset = h->mmco_reset; |
||
2095 | h->mmco_reset = 0; |
||
2096 | // FIXME do something with unavailable reference frames |
||
2097 | |||
2098 | /* Sort B-frames into display order */ |
||
2099 | |||
2100 | if (h->sps.bitstream_restriction_flag && |
||
2101 | h->avctx->has_b_frames < h->sps.num_reorder_frames) { |
||
2102 | h->avctx->has_b_frames = h->sps.num_reorder_frames; |
||
2103 | h->low_delay = 0; |
||
2104 | } |
||
2105 | |||
2106 | if (h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT && |
||
2107 | !h->sps.bitstream_restriction_flag) { |
||
2108 | h->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1; |
||
2109 | h->low_delay = 0; |
||
2110 | } |
||
2111 | |||
2112 | for (i = 0; 1; i++) { |
||
2113 | if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){ |
||
2114 | if(i) |
||
2115 | h->last_pocs[i-1] = cur->poc; |
||
2116 | break; |
||
2117 | } else if(i) { |
||
2118 | h->last_pocs[i-1]= h->last_pocs[i]; |
||
2119 | } |
||
2120 | } |
||
2121 | out_of_order = MAX_DELAYED_PIC_COUNT - i; |
||
2122 | if( cur->f.pict_type == AV_PICTURE_TYPE_B |
||
2123 | || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2)) |
||
2124 | out_of_order = FFMAX(out_of_order, 1); |
||
2125 | if (out_of_order == MAX_DELAYED_PIC_COUNT) { |
||
2126 | av_log(h->avctx, AV_LOG_VERBOSE, "Invalid POC %d<%d\n", cur->poc, h->last_pocs[0]); |
||
2127 | for (i = 1; i < MAX_DELAYED_PIC_COUNT; i++) |
||
2128 | h->last_pocs[i] = INT_MIN; |
||
2129 | h->last_pocs[0] = cur->poc; |
||
2130 | cur->mmco_reset = 1; |
||
2131 | } else if(h->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){ |
||
2132 | av_log(h->avctx, AV_LOG_VERBOSE, "Increasing reorder buffer to %d\n", out_of_order); |
||
2133 | h->avctx->has_b_frames = out_of_order; |
||
2134 | h->low_delay = 0; |
||
2135 | } |
||
2136 | |||
2137 | pics = 0; |
||
2138 | while (h->delayed_pic[pics]) |
||
2139 | pics++; |
||
2140 | |||
2141 | av_assert0(pics <= MAX_DELAYED_PIC_COUNT); |
||
2142 | |||
2143 | h->delayed_pic[pics++] = cur; |
||
2144 | if (cur->reference == 0) |
||
2145 | cur->reference = DELAYED_PIC_REF; |
||
2146 | |||
2147 | out = h->delayed_pic[0]; |
||
2148 | out_idx = 0; |
||
2149 | for (i = 1; h->delayed_pic[i] && |
||
2150 | !h->delayed_pic[i]->f.key_frame && |
||
2151 | !h->delayed_pic[i]->mmco_reset; |
||
2152 | i++) |
||
2153 | if (h->delayed_pic[i]->poc < out->poc) { |
||
2154 | out = h->delayed_pic[i]; |
||
2155 | out_idx = i; |
||
2156 | } |
||
2157 | if (h->avctx->has_b_frames == 0 && |
||
2158 | (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) |
||
2159 | h->next_outputed_poc = INT_MIN; |
||
2160 | out_of_order = out->poc < h->next_outputed_poc; |
||
2161 | |||
2162 | if (out_of_order || pics > h->avctx->has_b_frames) { |
||
2163 | out->reference &= ~DELAYED_PIC_REF; |
||
2164 | // for frame threading, the owner must be the second field's thread or |
||
2165 | // else the first thread can release the picture and reuse it unsafely |
||
2166 | for (i = out_idx; h->delayed_pic[i]; i++) |
||
2167 | h->delayed_pic[i] = h->delayed_pic[i + 1]; |
||
2168 | } |
||
2169 | if (!out_of_order && pics > h->avctx->has_b_frames) { |
||
2170 | h->next_output_pic = out; |
||
2171 | if (out_idx == 0 && h->delayed_pic[0] && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) { |
||
2172 | h->next_outputed_poc = INT_MIN; |
||
2173 | } else |
||
2174 | h->next_outputed_poc = out->poc; |
||
2175 | } else { |
||
2176 | av_log(h->avctx, AV_LOG_DEBUG, "no picture %s\n", out_of_order ? "ooo" : ""); |
||
2177 | } |
||
2178 | |||
2179 | if (h->next_output_pic && h->next_output_pic->sync) { |
||
2180 | h->sync |= 2; |
||
2181 | } |
||
2182 | |||
2183 | if (setup_finished && !h->avctx->hwaccel) |
||
2184 | ff_thread_finish_setup(h->avctx); |
||
2185 | } |
||
2186 | |||
2187 | static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, |
||
2188 | uint8_t *src_cb, uint8_t *src_cr, |
||
2189 | int linesize, int uvlinesize, |
||
2190 | int simple) |
||
2191 | { |
||
2192 | uint8_t *top_border; |
||
2193 | int top_idx = 1; |
||
2194 | const int pixel_shift = h->pixel_shift; |
||
2195 | int chroma444 = CHROMA444(h); |
||
2196 | int chroma422 = CHROMA422(h); |
||
2197 | |||
2198 | src_y -= linesize; |
||
2199 | src_cb -= uvlinesize; |
||
2200 | src_cr -= uvlinesize; |
||
2201 | |||
2202 | if (!simple && FRAME_MBAFF(h)) { |
||
2203 | if (h->mb_y & 1) { |
||
2204 | if (!MB_MBAFF(h)) { |
||
2205 | top_border = h->top_borders[0][h->mb_x]; |
||
2206 | AV_COPY128(top_border, src_y + 15 * linesize); |
||
2207 | if (pixel_shift) |
||
2208 | AV_COPY128(top_border + 16, src_y + 15 * linesize + 16); |
||
2209 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
||
2210 | if (chroma444) { |
||
2211 | if (pixel_shift) { |
||
2212 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); |
||
2213 | AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16); |
||
2214 | AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize); |
||
2215 | AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16); |
||
2216 | } else { |
||
2217 | AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize); |
||
2218 | AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize); |
||
2219 | } |
||
2220 | } else if (chroma422) { |
||
2221 | if (pixel_shift) { |
||
2222 | AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize); |
||
2223 | AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize); |
||
2224 | } else { |
||
2225 | AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize); |
||
2226 | AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize); |
||
2227 | } |
||
2228 | } else { |
||
2229 | if (pixel_shift) { |
||
2230 | AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize); |
||
2231 | AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize); |
||
2232 | } else { |
||
2233 | AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); |
||
2234 | AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); |
||
2235 | } |
||
2236 | } |
||
2237 | } |
||
2238 | } |
||
2239 | } else if (MB_MBAFF(h)) { |
||
2240 | top_idx = 0; |
||
2241 | } else |
||
2242 | return; |
||
2243 | } |
||
2244 | |||
2245 | top_border = h->top_borders[top_idx][h->mb_x]; |
||
2246 | /* There are two lines saved, the line above the top macroblock |
||
2247 | * of a pair, and the line above the bottom macroblock. */ |
||
2248 | AV_COPY128(top_border, src_y + 16 * linesize); |
||
2249 | if (pixel_shift) |
||
2250 | AV_COPY128(top_border + 16, src_y + 16 * linesize + 16); |
||
2251 | |||
2252 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
||
2253 | if (chroma444) { |
||
2254 | if (pixel_shift) { |
||
2255 | AV_COPY128(top_border + 32, src_cb + 16 * linesize); |
||
2256 | AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16); |
||
2257 | AV_COPY128(top_border + 64, src_cr + 16 * linesize); |
||
2258 | AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16); |
||
2259 | } else { |
||
2260 | AV_COPY128(top_border + 16, src_cb + 16 * linesize); |
||
2261 | AV_COPY128(top_border + 32, src_cr + 16 * linesize); |
||
2262 | } |
||
2263 | } else if (chroma422) { |
||
2264 | if (pixel_shift) { |
||
2265 | AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize); |
||
2266 | AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize); |
||
2267 | } else { |
||
2268 | AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize); |
||
2269 | AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize); |
||
2270 | } |
||
2271 | } else { |
||
2272 | if (pixel_shift) { |
||
2273 | AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize); |
||
2274 | AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize); |
||
2275 | } else { |
||
2276 | AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize); |
||
2277 | AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize); |
||
2278 | } |
||
2279 | } |
||
2280 | } |
||
2281 | } |
||
2282 | |||
2283 | static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y, |
||
2284 | uint8_t *src_cb, uint8_t *src_cr, |
||
2285 | int linesize, int uvlinesize, |
||
2286 | int xchg, int chroma444, |
||
2287 | int simple, int pixel_shift) |
||
2288 | { |
||
2289 | int deblock_topleft; |
||
2290 | int deblock_top; |
||
2291 | int top_idx = 1; |
||
2292 | uint8_t *top_border_m1; |
||
2293 | uint8_t *top_border; |
||
2294 | |||
2295 | if (!simple && FRAME_MBAFF(h)) { |
||
2296 | if (h->mb_y & 1) { |
||
2297 | if (!MB_MBAFF(h)) |
||
2298 | return; |
||
2299 | } else { |
||
2300 | top_idx = MB_MBAFF(h) ? 0 : 1; |
||
2301 | } |
||
2302 | } |
||
2303 | |||
2304 | if (h->deblocking_filter == 2) { |
||
2305 | deblock_topleft = h->slice_table[h->mb_xy - 1 - h->mb_stride] == h->slice_num; |
||
2306 | deblock_top = h->top_type; |
||
2307 | } else { |
||
2308 | deblock_topleft = (h->mb_x > 0); |
||
2309 | deblock_top = (h->mb_y > !!MB_FIELD(h)); |
||
2310 | } |
||
2311 | |||
2312 | src_y -= linesize + 1 + pixel_shift; |
||
2313 | src_cb -= uvlinesize + 1 + pixel_shift; |
||
2314 | src_cr -= uvlinesize + 1 + pixel_shift; |
||
2315 | |||
2316 | top_border_m1 = h->top_borders[top_idx][h->mb_x - 1]; |
||
2317 | top_border = h->top_borders[top_idx][h->mb_x]; |
||
2318 | |||
2319 | #define XCHG(a, b, xchg) \ |
||
2320 | if (pixel_shift) { \ |
||
2321 | if (xchg) { \ |
||
2322 | AV_SWAP64(b + 0, a + 0); \ |
||
2323 | AV_SWAP64(b + 8, a + 8); \ |
||
2324 | } else { \ |
||
2325 | AV_COPY128(b, a); \ |
||
2326 | } \ |
||
2327 | } else if (xchg) \ |
||
2328 | AV_SWAP64(b, a); \ |
||
2329 | else \ |
||
2330 | AV_COPY64(b, a); |
||
2331 | |||
2332 | if (deblock_top) { |
||
2333 | if (deblock_topleft) { |
||
2334 | XCHG(top_border_m1 + (8 << pixel_shift), |
||
2335 | src_y - (7 << pixel_shift), 1); |
||
2336 | } |
||
2337 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
||
2338 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); |
||
2339 | if (h->mb_x + 1 < h->mb_width) { |
||
2340 | XCHG(h->top_borders[top_idx][h->mb_x + 1], |
||
2341 | src_y + (17 << pixel_shift), 1); |
||
2342 | } |
||
2343 | if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { |
||
2344 | if (chroma444) { |
||
2345 | if (deblock_topleft) { |
||
2346 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
||
2347 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
||
2348 | } |
||
2349 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); |
||
2350 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); |
||
2351 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); |
||
2352 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); |
||
2353 | if (h->mb_x + 1 < h->mb_width) { |
||
2354 | XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); |
||
2355 | XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); |
||
2356 | } |
||
2357 | } else { |
||
2358 | if (deblock_topleft) { |
||
2359 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
||
2360 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
||
2361 | } |
||
2362 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
||
2363 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); |
||
2364 | } |
||
2365 | } |
||
2366 | } |
||
2367 | } |
||
2368 | |||
2369 | static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, |
||
2370 | int index) |
||
2371 | { |
||
2372 | if (high_bit_depth) { |
||
2373 | return AV_RN32A(((int32_t *)mb) + index); |
||
2374 | } else |
||
2375 | return AV_RN16A(mb + index); |
||
2376 | } |
||
2377 | |||
2378 | static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, |
||
2379 | int index, int value) |
||
2380 | { |
||
2381 | if (high_bit_depth) { |
||
2382 | AV_WN32A(((int32_t *)mb) + index, value); |
||
2383 | } else |
||
2384 | AV_WN16A(mb + index, value); |
||
2385 | } |
||
2386 | |||
2387 | static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, |
||
2388 | int mb_type, int is_h264, |
||
2389 | int simple, |
||
2390 | int transform_bypass, |
||
2391 | int pixel_shift, |
||
2392 | int *block_offset, |
||
2393 | int linesize, |
||
2394 | uint8_t *dest_y, int p) |
||
2395 | { |
||
2396 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); |
||
2397 | void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); |
||
2398 | int i; |
||
2399 | int qscale = p == 0 ? h->qscale : h->chroma_qp[p - 1]; |
||
2400 | block_offset += 16 * p; |
||
2401 | if (IS_INTRA4x4(mb_type)) { |
||
2402 | if (IS_8x8DCT(mb_type)) { |
||
2403 | if (transform_bypass) { |
||
2404 | idct_dc_add = |
||
2405 | idct_add = h->h264dsp.h264_add_pixels8_clear; |
||
2406 | } else { |
||
2407 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; |
||
2408 | idct_add = h->h264dsp.h264_idct8_add; |
||
2409 | } |
||
2410 | for (i = 0; i < 16; i += 4) { |
||
2411 | uint8_t *const ptr = dest_y + block_offset[i]; |
||
2412 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; |
||
2413 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { |
||
2414 | h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
||
2415 | } else { |
||
2416 | const int nnz = h->non_zero_count_cache[scan8[i + p * 16]]; |
||
2417 | h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000, |
||
2418 | (h->topright_samples_available << i) & 0x4000, linesize); |
||
2419 | if (nnz) { |
||
2420 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) |
||
2421 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
||
2422 | else |
||
2423 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
||
2424 | } |
||
2425 | } |
||
2426 | } |
||
2427 | } else { |
||
2428 | if (transform_bypass) { |
||
2429 | idct_dc_add = |
||
2430 | idct_add = h->h264dsp.h264_add_pixels4_clear; |
||
2431 | } else { |
||
2432 | idct_dc_add = h->h264dsp.h264_idct_dc_add; |
||
2433 | idct_add = h->h264dsp.h264_idct_add; |
||
2434 | } |
||
2435 | for (i = 0; i < 16; i++) { |
||
2436 | uint8_t *const ptr = dest_y + block_offset[i]; |
||
2437 | const int dir = h->intra4x4_pred_mode_cache[scan8[i]]; |
||
2438 | |||
2439 | if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) { |
||
2440 | h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
||
2441 | } else { |
||
2442 | uint8_t *topright; |
||
2443 | int nnz, tr; |
||
2444 | uint64_t tr_high; |
||
2445 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { |
||
2446 | const int topright_avail = (h->topright_samples_available << i) & 0x8000; |
||
2447 | av_assert2(h->mb_y || linesize <= block_offset[i]); |
||
2448 | if (!topright_avail) { |
||
2449 | if (pixel_shift) { |
||
2450 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; |
||
2451 | topright = (uint8_t *)&tr_high; |
||
2452 | } else { |
||
2453 | tr = ptr[3 - linesize] * 0x01010101u; |
||
2454 | topright = (uint8_t *)&tr; |
||
2455 | } |
||
2456 | } else |
||
2457 | topright = ptr + (4 << pixel_shift) - linesize; |
||
2458 | } else |
||
2459 | topright = NULL; |
||
2460 | |||
2461 | h->hpc.pred4x4[dir](ptr, topright, linesize); |
||
2462 | nnz = h->non_zero_count_cache[scan8[i + p * 16]]; |
||
2463 | if (nnz) { |
||
2464 | if (is_h264) { |
||
2465 | if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) |
||
2466 | idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
||
2467 | else |
||
2468 | idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
||
2469 | } else if (CONFIG_SVQ3_DECODER) |
||
2470 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0); |
||
2471 | } |
||
2472 | } |
||
2473 | } |
||
2474 | } |
||
2475 | } else { |
||
2476 | h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize); |
||
2477 | if (is_h264) { |
||
2478 | if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
||
2479 | if (!transform_bypass) |
||
2480 | h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift), |
||
2481 | h->mb_luma_dc[p], |
||
2482 | h->dequant4_coeff[p][qscale][0]); |
||
2483 | else { |
||
2484 | static const uint8_t dc_mapping[16] = { |
||
2485 | |||
2486 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, |
||
2487 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, |
||
2488 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 |
||
2489 | }; |
||
2490 | for (i = 0; i < 16; i++) |
||
2491 | dctcoef_set(h->mb + (p * 256 << pixel_shift), |
||
2492 | pixel_shift, dc_mapping[i], |
||
2493 | dctcoef_get(h->mb_luma_dc[p], |
||
2494 | pixel_shift, i)); |
||
2495 | } |
||
2496 | } |
||
2497 | } else if (CONFIG_SVQ3_DECODER) |
||
2498 | ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256, |
||
2499 | h->mb_luma_dc[p], qscale); |
||
2500 | } |
||
2501 | } |
||
2502 | |||
2503 | static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, |
||
2504 | int is_h264, int simple, |
||
2505 | int transform_bypass, |
||
2506 | int pixel_shift, |
||
2507 | int *block_offset, |
||
2508 | int linesize, |
||
2509 | uint8_t *dest_y, int p) |
||
2510 | { |
||
2511 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); |
||
2512 | int i; |
||
2513 | block_offset += 16 * p; |
||
2514 | if (!IS_INTRA4x4(mb_type)) { |
||
2515 | if (is_h264) { |
||
2516 | if (IS_INTRA16x16(mb_type)) { |
||
2517 | if (transform_bypass) { |
||
2518 | if (h->sps.profile_idc == 244 && |
||
2519 | (h->intra16x16_pred_mode == VERT_PRED8x8 || |
||
2520 | h->intra16x16_pred_mode == HOR_PRED8x8)) { |
||
2521 | h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, |
||
2522 | h->mb + (p * 256 << pixel_shift), |
||
2523 | linesize); |
||
2524 | } else { |
||
2525 | for (i = 0; i < 16; i++) |
||
2526 | if (h->non_zero_count_cache[scan8[i + p * 16]] || |
||
2527 | dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) |
||
2528 | h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], |
||
2529 | h->mb + (i * 16 + p * 256 << pixel_shift), |
||
2530 | linesize); |
||
2531 | } |
||
2532 | } else { |
||
2533 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, |
||
2534 | h->mb + (p * 256 << pixel_shift), |
||
2535 | linesize, |
||
2536 | h->non_zero_count_cache + p * 5 * 8); |
||
2537 | } |
||
2538 | } else if (h->cbp & 15) { |
||
2539 | if (transform_bypass) { |
||
2540 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
||
2541 | idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear |
||
2542 | : h->h264dsp.h264_add_pixels4_clear; |
||
2543 | for (i = 0; i < 16; i += di) |
||
2544 | if (h->non_zero_count_cache[scan8[i + p * 16]]) |
||
2545 | idct_add(dest_y + block_offset[i], |
||
2546 | h->mb + (i * 16 + p * 256 << pixel_shift), |
||
2547 | linesize); |
||
2548 | } else { |
||
2549 | if (IS_8x8DCT(mb_type)) |
||
2550 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, |
||
2551 | h->mb + (p * 256 << pixel_shift), |
||
2552 | linesize, |
||
2553 | h->non_zero_count_cache + p * 5 * 8); |
||
2554 | else |
||
2555 | h->h264dsp.h264_idct_add16(dest_y, block_offset, |
||
2556 | h->mb + (p * 256 << pixel_shift), |
||
2557 | linesize, |
||
2558 | h->non_zero_count_cache + p * 5 * 8); |
||
2559 | } |
||
2560 | } |
||
2561 | } else if (CONFIG_SVQ3_DECODER) { |
||
2562 | for (i = 0; i < 16; i++) |
||
2563 | if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) { |
||
2564 | // FIXME benchmark weird rule, & below |
||
2565 | uint8_t *const ptr = dest_y + block_offset[i]; |
||
2566 | ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, |
||
2567 | h->qscale, IS_INTRA(mb_type) ? 1 : 0); |
||
2568 | } |
||
2569 | } |
||
2570 | } |
||
2571 | } |
||
2572 | |||
2573 | #define BITS 8 |
||
2574 | #define SIMPLE 1 |
||
2575 | #include "h264_mb_template.c" |
||
2576 | |||
2577 | #undef BITS |
||
2578 | #define BITS 16 |
||
2579 | #include "h264_mb_template.c" |
||
2580 | |||
2581 | #undef SIMPLE |
||
2582 | #define SIMPLE 0 |
||
2583 | #include "h264_mb_template.c" |
||
2584 | |||
2585 | void ff_h264_hl_decode_mb(H264Context *h) |
||
2586 | { |
||
2587 | const int mb_xy = h->mb_xy; |
||
2588 | const int mb_type = h->cur_pic.mb_type[mb_xy]; |
||
2589 | int is_complex = CONFIG_SMALL || h->is_complex || |
||
2590 | IS_INTRA_PCM(mb_type) || h->qscale == 0; |
||
2591 | |||
2592 | if (CHROMA444(h)) { |
||
2593 | if (is_complex || h->pixel_shift) |
||
2594 | hl_decode_mb_444_complex(h); |
||
2595 | else |
||
2596 | hl_decode_mb_444_simple_8(h); |
||
2597 | } else if (is_complex) { |
||
2598 | hl_decode_mb_complex(h); |
||
2599 | } else if (h->pixel_shift) { |
||
2600 | hl_decode_mb_simple_16(h); |
||
2601 | } else |
||
2602 | hl_decode_mb_simple_8(h); |
||
2603 | } |
||
2604 | |||
2605 | int ff_pred_weight_table(H264Context *h) |
||
2606 | { |
||
2607 | int list, i; |
||
2608 | int luma_def, chroma_def; |
||
2609 | |||
2610 | h->use_weight = 0; |
||
2611 | h->use_weight_chroma = 0; |
||
2612 | h->luma_log2_weight_denom = get_ue_golomb(&h->gb); |
||
2613 | if (h->sps.chroma_format_idc) |
||
2614 | h->chroma_log2_weight_denom = get_ue_golomb(&h->gb); |
||
2615 | luma_def = 1 << h->luma_log2_weight_denom; |
||
2616 | chroma_def = 1 << h->chroma_log2_weight_denom; |
||
2617 | |||
2618 | for (list = 0; list < 2; list++) { |
||
2619 | h->luma_weight_flag[list] = 0; |
||
2620 | h->chroma_weight_flag[list] = 0; |
||
2621 | for (i = 0; i < h->ref_count[list]; i++) { |
||
2622 | int luma_weight_flag, chroma_weight_flag; |
||
2623 | |||
2624 | luma_weight_flag = get_bits1(&h->gb); |
||
2625 | if (luma_weight_flag) { |
||
2626 | h->luma_weight[i][list][0] = get_se_golomb(&h->gb); |
||
2627 | h->luma_weight[i][list][1] = get_se_golomb(&h->gb); |
||
2628 | if (h->luma_weight[i][list][0] != luma_def || |
||
2629 | h->luma_weight[i][list][1] != 0) { |
||
2630 | h->use_weight = 1; |
||
2631 | h->luma_weight_flag[list] = 1; |
||
2632 | } |
||
2633 | } else { |
||
2634 | h->luma_weight[i][list][0] = luma_def; |
||
2635 | h->luma_weight[i][list][1] = 0; |
||
2636 | } |
||
2637 | |||
2638 | if (h->sps.chroma_format_idc) { |
||
2639 | chroma_weight_flag = get_bits1(&h->gb); |
||
2640 | if (chroma_weight_flag) { |
||
2641 | int j; |
||
2642 | for (j = 0; j < 2; j++) { |
||
2643 | h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); |
||
2644 | h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); |
||
2645 | if (h->chroma_weight[i][list][j][0] != chroma_def || |
||
2646 | h->chroma_weight[i][list][j][1] != 0) { |
||
2647 | h->use_weight_chroma = 1; |
||
2648 | h->chroma_weight_flag[list] = 1; |
||
2649 | } |
||
2650 | } |
||
2651 | } else { |
||
2652 | int j; |
||
2653 | for (j = 0; j < 2; j++) { |
||
2654 | h->chroma_weight[i][list][j][0] = chroma_def; |
||
2655 | h->chroma_weight[i][list][j][1] = 0; |
||
2656 | } |
||
2657 | } |
||
2658 | } |
||
2659 | } |
||
2660 | if (h->slice_type_nos != AV_PICTURE_TYPE_B) |
||
2661 | break; |
||
2662 | } |
||
2663 | h->use_weight = h->use_weight || h->use_weight_chroma; |
||
2664 | return 0; |
||
2665 | } |
||
2666 | |||
2667 | /** |
||
2668 | * Initialize implicit_weight table. |
||
2669 | * @param field 0/1 initialize the weight for interlaced MBAFF |
||
2670 | * -1 initializes the rest |
||
2671 | */ |
||
2672 | static void implicit_weight_table(H264Context *h, int field) |
||
2673 | { |
||
2674 | int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; |
||
2675 | |||
2676 | for (i = 0; i < 2; i++) { |
||
2677 | h->luma_weight_flag[i] = 0; |
||
2678 | h->chroma_weight_flag[i] = 0; |
||
2679 | } |
||
2680 | |||
2681 | if (field < 0) { |
||
2682 | if (h->picture_structure == PICT_FRAME) { |
||
2683 | cur_poc = h->cur_pic_ptr->poc; |
||
2684 | } else { |
||
2685 | cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1]; |
||
2686 | } |
||
2687 | if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) && |
||
2688 | h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) { |
||
2689 | h->use_weight = 0; |
||
2690 | h->use_weight_chroma = 0; |
||
2691 | return; |
||
2692 | } |
||
2693 | ref_start = 0; |
||
2694 | ref_count0 = h->ref_count[0]; |
||
2695 | ref_count1 = h->ref_count[1]; |
||
2696 | } else { |
||
2697 | cur_poc = h->cur_pic_ptr->field_poc[field]; |
||
2698 | ref_start = 16; |
||
2699 | ref_count0 = 16 + 2 * h->ref_count[0]; |
||
2700 | ref_count1 = 16 + 2 * h->ref_count[1]; |
||
2701 | } |
||
2702 | |||
2703 | h->use_weight = 2; |
||
2704 | h->use_weight_chroma = 2; |
||
2705 | h->luma_log2_weight_denom = 5; |
||
2706 | h->chroma_log2_weight_denom = 5; |
||
2707 | |||
2708 | for (ref0 = ref_start; ref0 < ref_count0; ref0++) { |
||
2709 | int poc0 = h->ref_list[0][ref0].poc; |
||
2710 | for (ref1 = ref_start; ref1 < ref_count1; ref1++) { |
||
2711 | int w = 32; |
||
2712 | if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) { |
||
2713 | int poc1 = h->ref_list[1][ref1].poc; |
||
2714 | int td = av_clip(poc1 - poc0, -128, 127); |
||
2715 | if (td) { |
||
2716 | int tb = av_clip(cur_poc - poc0, -128, 127); |
||
2717 | int tx = (16384 + (FFABS(td) >> 1)) / td; |
||
2718 | int dist_scale_factor = (tb * tx + 32) >> 8; |
||
2719 | if (dist_scale_factor >= -64 && dist_scale_factor <= 128) |
||
2720 | w = 64 - dist_scale_factor; |
||
2721 | } |
||
2722 | } |
||
2723 | if (field < 0) { |
||
2724 | h->implicit_weight[ref0][ref1][0] = |
||
2725 | h->implicit_weight[ref0][ref1][1] = w; |
||
2726 | } else { |
||
2727 | h->implicit_weight[ref0][ref1][field] = w; |
||
2728 | } |
||
2729 | } |
||
2730 | } |
||
2731 | } |
||
2732 | |||
2733 | /** |
||
2734 | * instantaneous decoder refresh. |
||
2735 | */ |
||
2736 | static void idr(H264Context *h) |
||
2737 | { |
||
2738 | int i; |
||
2739 | ff_h264_remove_all_refs(h); |
||
2740 | h->prev_frame_num = 0; |
||
2741 | h->prev_frame_num_offset = 0; |
||
2742 | h->prev_poc_msb = 1<<16; |
||
2743 | h->prev_poc_lsb = 0; |
||
2744 | for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) |
||
2745 | h->last_pocs[i] = INT_MIN; |
||
2746 | } |
||
2747 | |||
2748 | /* forget old pics after a seek */ |
||
2749 | static void flush_change(H264Context *h) |
||
2750 | { |
||
2751 | int i, j; |
||
2752 | |||
2753 | h->outputed_poc = h->next_outputed_poc = INT_MIN; |
||
2754 | h->prev_interlaced_frame = 1; |
||
2755 | idr(h); |
||
2756 | |||
2757 | h->prev_frame_num = -1; |
||
2758 | if (h->cur_pic_ptr) { |
||
2759 | h->cur_pic_ptr->reference = 0; |
||
2760 | for (j=i=0; h->delayed_pic[i]; i++) |
||
2761 | if (h->delayed_pic[i] != h->cur_pic_ptr) |
||
2762 | h->delayed_pic[j++] = h->delayed_pic[i]; |
||
2763 | h->delayed_pic[j] = NULL; |
||
2764 | } |
||
2765 | h->first_field = 0; |
||
2766 | memset(h->ref_list[0], 0, sizeof(h->ref_list[0])); |
||
2767 | memset(h->ref_list[1], 0, sizeof(h->ref_list[1])); |
||
2768 | memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0])); |
||
2769 | memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1])); |
||
2770 | ff_h264_reset_sei(h); |
||
2771 | h->recovery_frame= -1; |
||
2772 | h->sync= 0; |
||
2773 | h->list_count = 0; |
||
2774 | h->current_slice = 0; |
||
2775 | h->mmco_reset = 1; |
||
2776 | } |
||
2777 | |||
2778 | /* forget old pics after a seek */ |
||
2779 | static void flush_dpb(AVCodecContext *avctx) |
||
2780 | { |
||
2781 | H264Context *h = avctx->priv_data; |
||
2782 | int i; |
||
2783 | |||
2784 | for (i = 0; i <= MAX_DELAYED_PIC_COUNT; i++) { |
||
2785 | if (h->delayed_pic[i]) |
||
2786 | h->delayed_pic[i]->reference = 0; |
||
2787 | h->delayed_pic[i] = NULL; |
||
2788 | } |
||
2789 | |||
2790 | flush_change(h); |
||
2791 | |||
2792 | if (h->DPB) |
||
2793 | for (i = 0; i < MAX_PICTURE_COUNT; i++) |
||
2794 | unref_picture(h, &h->DPB[i]); |
||
2795 | h->cur_pic_ptr = NULL; |
||
2796 | unref_picture(h, &h->cur_pic); |
||
2797 | |||
2798 | h->mb_x = h->mb_y = 0; |
||
2799 | |||
2800 | h->parse_context.state = -1; |
||
2801 | h->parse_context.frame_start_found = 0; |
||
2802 | h->parse_context.overread = 0; |
||
2803 | h->parse_context.overread_index = 0; |
||
2804 | h->parse_context.index = 0; |
||
2805 | h->parse_context.last_index = 0; |
||
2806 | } |
||
2807 | |||
2808 | int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc) |
||
2809 | { |
||
2810 | const int max_frame_num = 1 << h->sps.log2_max_frame_num; |
||
2811 | int field_poc[2]; |
||
2812 | |||
2813 | h->frame_num_offset = h->prev_frame_num_offset; |
||
2814 | if (h->frame_num < h->prev_frame_num) |
||
2815 | h->frame_num_offset += max_frame_num; |
||
2816 | |||
2817 | if (h->sps.poc_type == 0) { |
||
2818 | const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb; |
||
2819 | |||
2820 | if (h->poc_lsb < h->prev_poc_lsb && |
||
2821 | h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2) |
||
2822 | h->poc_msb = h->prev_poc_msb + max_poc_lsb; |
||
2823 | else if (h->poc_lsb > h->prev_poc_lsb && |
||
2824 | h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2) |
||
2825 | h->poc_msb = h->prev_poc_msb - max_poc_lsb; |
||
2826 | else |
||
2827 | h->poc_msb = h->prev_poc_msb; |
||
2828 | field_poc[0] = |
||
2829 | field_poc[1] = h->poc_msb + h->poc_lsb; |
||
2830 | if (h->picture_structure == PICT_FRAME) |
||
2831 | field_poc[1] += h->delta_poc_bottom; |
||
2832 | } else if (h->sps.poc_type == 1) { |
||
2833 | int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; |
||
2834 | int i; |
||
2835 | |||
2836 | if (h->sps.poc_cycle_length != 0) |
||
2837 | abs_frame_num = h->frame_num_offset + h->frame_num; |
||
2838 | else |
||
2839 | abs_frame_num = 0; |
||
2840 | |||
2841 | if (h->nal_ref_idc == 0 && abs_frame_num > 0) |
||
2842 | abs_frame_num--; |
||
2843 | |||
2844 | expected_delta_per_poc_cycle = 0; |
||
2845 | for (i = 0; i < h->sps.poc_cycle_length; i++) |
||
2846 | // FIXME integrate during sps parse |
||
2847 | expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i]; |
||
2848 | |||
2849 | if (abs_frame_num > 0) { |
||
2850 | int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; |
||
2851 | int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; |
||
2852 | |||
2853 | expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; |
||
2854 | for (i = 0; i <= frame_num_in_poc_cycle; i++) |
||
2855 | expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i]; |
||
2856 | } else |
||
2857 | expectedpoc = 0; |
||
2858 | |||
2859 | if (h->nal_ref_idc == 0) |
||
2860 | expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; |
||
2861 | |||
2862 | field_poc[0] = expectedpoc + h->delta_poc[0]; |
||
2863 | field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; |
||
2864 | |||
2865 | if (h->picture_structure == PICT_FRAME) |
||
2866 | field_poc[1] += h->delta_poc[1]; |
||
2867 | } else { |
||
2868 | int poc = 2 * (h->frame_num_offset + h->frame_num); |
||
2869 | |||
2870 | if (!h->nal_ref_idc) |
||
2871 | poc--; |
||
2872 | |||
2873 | field_poc[0] = poc; |
||
2874 | field_poc[1] = poc; |
||
2875 | } |
||
2876 | |||
2877 | if (h->picture_structure != PICT_BOTTOM_FIELD) |
||
2878 | pic_field_poc[0] = field_poc[0]; |
||
2879 | if (h->picture_structure != PICT_TOP_FIELD) |
||
2880 | pic_field_poc[1] = field_poc[1]; |
||
2881 | *pic_poc = FFMIN(pic_field_poc[0], pic_field_poc[1]); |
||
2882 | |||
2883 | return 0; |
||
2884 | } |
||
2885 | |||
2886 | /** |
||
2887 | * initialize scan tables |
||
2888 | */ |
||
2889 | static void init_scan_tables(H264Context *h) |
||
2890 | { |
||
2891 | int i; |
||
2892 | for (i = 0; i < 16; i++) { |
||
2893 | #define T(x) (x >> 2) | ((x << 2) & 0xF) |
||
2894 | h->zigzag_scan[i] = T(zigzag_scan[i]); |
||
2895 | h->field_scan[i] = T(field_scan[i]); |
||
2896 | #undef T |
||
2897 | } |
||
2898 | for (i = 0; i < 64; i++) { |
||
2899 | #define T(x) (x >> 3) | ((x & 7) << 3) |
||
2900 | h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); |
||
2901 | h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); |
||
2902 | h->field_scan8x8[i] = T(field_scan8x8[i]); |
||
2903 | h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); |
||
2904 | #undef T |
||
2905 | } |
||
2906 | if (h->sps.transform_bypass) { // FIXME same ugly |
||
2907 | memcpy(h->zigzag_scan_q0 , zigzag_scan , sizeof(h->zigzag_scan_q0 )); |
||
2908 | memcpy(h->zigzag_scan8x8_q0 , ff_zigzag_direct , sizeof(h->zigzag_scan8x8_q0 )); |
||
2909 | memcpy(h->zigzag_scan8x8_cavlc_q0 , zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0)); |
||
2910 | memcpy(h->field_scan_q0 , field_scan , sizeof(h->field_scan_q0 )); |
||
2911 | memcpy(h->field_scan8x8_q0 , field_scan8x8 , sizeof(h->field_scan8x8_q0 )); |
||
2912 | memcpy(h->field_scan8x8_cavlc_q0 , field_scan8x8_cavlc , sizeof(h->field_scan8x8_cavlc_q0 )); |
||
2913 | } else { |
||
2914 | memcpy(h->zigzag_scan_q0 , h->zigzag_scan , sizeof(h->zigzag_scan_q0 )); |
||
2915 | memcpy(h->zigzag_scan8x8_q0 , h->zigzag_scan8x8 , sizeof(h->zigzag_scan8x8_q0 )); |
||
2916 | memcpy(h->zigzag_scan8x8_cavlc_q0 , h->zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0)); |
||
2917 | memcpy(h->field_scan_q0 , h->field_scan , sizeof(h->field_scan_q0 )); |
||
2918 | memcpy(h->field_scan8x8_q0 , h->field_scan8x8 , sizeof(h->field_scan8x8_q0 )); |
||
2919 | memcpy(h->field_scan8x8_cavlc_q0 , h->field_scan8x8_cavlc , sizeof(h->field_scan8x8_cavlc_q0 )); |
||
2920 | } |
||
2921 | } |
||
2922 | |||
2923 | static int field_end(H264Context *h, int in_setup) |
||
2924 | { |
||
2925 | AVCodecContext *const avctx = h->avctx; |
||
2926 | int err = 0; |
||
2927 | h->mb_y = 0; |
||
2928 | |||
2929 | if (CONFIG_H264_VDPAU_DECODER && |
||
2930 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
||
2931 | ff_vdpau_h264_set_reference_frames(h); |
||
2932 | |||
2933 | if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) { |
||
2934 | if (!h->droppable) { |
||
2935 | err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
||
2936 | h->prev_poc_msb = h->poc_msb; |
||
2937 | h->prev_poc_lsb = h->poc_lsb; |
||
2938 | } |
||
2939 | h->prev_frame_num_offset = h->frame_num_offset; |
||
2940 | h->prev_frame_num = h->frame_num; |
||
2941 | h->outputed_poc = h->next_outputed_poc; |
||
2942 | } |
||
2943 | |||
2944 | if (avctx->hwaccel) { |
||
2945 | if (avctx->hwaccel->end_frame(avctx) < 0) |
||
2946 | av_log(avctx, AV_LOG_ERROR, |
||
2947 | "hardware accelerator failed to decode picture\n"); |
||
2948 | } |
||
2949 | |||
2950 | if (CONFIG_H264_VDPAU_DECODER && |
||
2951 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
||
2952 | ff_vdpau_h264_picture_complete(h); |
||
2953 | |||
2954 | /* |
||
2955 | * FIXME: Error handling code does not seem to support interlaced |
||
2956 | * when slices span multiple rows |
||
2957 | * The ff_er_add_slice calls don't work right for bottom |
||
2958 | * fields; they cause massive erroneous error concealing |
||
2959 | * Error marking covers both fields (top and bottom). |
||
2960 | * This causes a mismatched s->error_count |
||
2961 | * and a bad error table. Further, the error count goes to |
||
2962 | * INT_MAX when called for bottom field, because mb_y is |
||
2963 | * past end by one (callers fault) and resync_mb_y != 0 |
||
2964 | * causes problems for the first MB line, too. |
||
2965 | */ |
||
2966 | if (CONFIG_ERROR_RESILIENCE && |
||
2967 | !FIELD_PICTURE(h) && h->current_slice && !h->sps.new) { |
||
2968 | h->er.cur_pic = h->cur_pic_ptr; |
||
2969 | ff_er_frame_end(&h->er); |
||
2970 | } |
||
2971 | if (!in_setup && !h->droppable) |
||
2972 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
||
2973 | h->picture_structure == PICT_BOTTOM_FIELD); |
||
2974 | emms_c(); |
||
2975 | |||
2976 | h->current_slice = 0; |
||
2977 | |||
2978 | return err; |
||
2979 | } |
||
2980 | |||
2981 | /** |
||
2982 | * Replicate H264 "master" context to thread contexts. |
||
2983 | */ |
||
2984 | static int clone_slice(H264Context *dst, H264Context *src) |
||
2985 | { |
||
2986 | memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); |
||
2987 | dst->cur_pic_ptr = src->cur_pic_ptr; |
||
2988 | dst->cur_pic = src->cur_pic; |
||
2989 | dst->linesize = src->linesize; |
||
2990 | dst->uvlinesize = src->uvlinesize; |
||
2991 | dst->first_field = src->first_field; |
||
2992 | |||
2993 | dst->prev_poc_msb = src->prev_poc_msb; |
||
2994 | dst->prev_poc_lsb = src->prev_poc_lsb; |
||
2995 | dst->prev_frame_num_offset = src->prev_frame_num_offset; |
||
2996 | dst->prev_frame_num = src->prev_frame_num; |
||
2997 | dst->short_ref_count = src->short_ref_count; |
||
2998 | |||
2999 | memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); |
||
3000 | memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); |
||
3001 | memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); |
||
3002 | |||
3003 | memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); |
||
3004 | memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); |
||
3005 | |||
3006 | return 0; |
||
3007 | } |
||
3008 | |||
3009 | /** |
||
3010 | * Compute profile from profile_idc and constraint_set?_flags. |
||
3011 | * |
||
3012 | * @param sps SPS |
||
3013 | * |
||
3014 | * @return profile as defined by FF_PROFILE_H264_* |
||
3015 | */ |
||
3016 | int ff_h264_get_profile(SPS *sps) |
||
3017 | { |
||
3018 | int profile = sps->profile_idc; |
||
3019 | |||
3020 | switch (sps->profile_idc) { |
||
3021 | case FF_PROFILE_H264_BASELINE: |
||
3022 | // constraint_set1_flag set to 1 |
||
3023 | profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0; |
||
3024 | break; |
||
3025 | case FF_PROFILE_H264_HIGH_10: |
||
3026 | case FF_PROFILE_H264_HIGH_422: |
||
3027 | case FF_PROFILE_H264_HIGH_444_PREDICTIVE: |
||
3028 | // constraint_set3_flag set to 1 |
||
3029 | profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0; |
||
3030 | break; |
||
3031 | } |
||
3032 | |||
3033 | return profile; |
||
3034 | } |
||
3035 | |||
3036 | static int h264_set_parameter_from_sps(H264Context *h) |
||
3037 | { |
||
3038 | if (h->flags & CODEC_FLAG_LOW_DELAY || |
||
3039 | (h->sps.bitstream_restriction_flag && |
||
3040 | !h->sps.num_reorder_frames)) { |
||
3041 | if (h->avctx->has_b_frames > 1 || h->delayed_pic[0]) |
||
3042 | av_log(h->avctx, AV_LOG_WARNING, "Delayed frames seen. " |
||
3043 | "Reenabling low delay requires a codec flush.\n"); |
||
3044 | else |
||
3045 | h->low_delay = 1; |
||
3046 | } |
||
3047 | |||
3048 | if (h->avctx->has_b_frames < 2) |
||
3049 | h->avctx->has_b_frames = !h->low_delay; |
||
3050 | |||
3051 | if (h->sps.bit_depth_luma != h->sps.bit_depth_chroma) { |
||
3052 | avpriv_request_sample(h->avctx, |
||
3053 | "Different chroma and luma bit depth"); |
||
3054 | return AVERROR_PATCHWELCOME; |
||
3055 | } |
||
3056 | |||
3057 | if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma || |
||
3058 | h->cur_chroma_format_idc != h->sps.chroma_format_idc) { |
||
3059 | if (h->avctx->codec && |
||
3060 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU && |
||
3061 | (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) { |
||
3062 | av_log(h->avctx, AV_LOG_ERROR, |
||
3063 | "VDPAU decoding does not support video colorspace.\n"); |
||
3064 | return AVERROR_INVALIDDATA; |
||
3065 | } |
||
3066 | if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 14 && |
||
3067 | h->sps.bit_depth_luma != 11 && h->sps.bit_depth_luma != 13) { |
||
3068 | h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; |
||
3069 | h->cur_chroma_format_idc = h->sps.chroma_format_idc; |
||
3070 | h->pixel_shift = h->sps.bit_depth_luma > 8; |
||
3071 | |||
3072 | ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, |
||
3073 | h->sps.chroma_format_idc); |
||
3074 | ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma); |
||
3075 | ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma); |
||
3076 | ff_h264_pred_init(&h->hpc, h->avctx->codec_id, h->sps.bit_depth_luma, |
||
3077 | h->sps.chroma_format_idc); |
||
3078 | |||
3079 | if (CONFIG_ERROR_RESILIENCE) |
||
3080 | ff_dsputil_init(&h->dsp, h->avctx); |
||
3081 | ff_videodsp_init(&h->vdsp, h->sps.bit_depth_luma); |
||
3082 | } else { |
||
3083 | av_log(h->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", |
||
3084 | h->sps.bit_depth_luma); |
||
3085 | return AVERROR_INVALIDDATA; |
||
3086 | } |
||
3087 | } |
||
3088 | return 0; |
||
3089 | } |
||
3090 | |||
3091 | static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) |
||
3092 | { |
||
3093 | switch (h->sps.bit_depth_luma) { |
||
3094 | case 9: |
||
3095 | if (CHROMA444(h)) { |
||
3096 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
||
3097 | return AV_PIX_FMT_GBRP9; |
||
3098 | } else |
||
3099 | return AV_PIX_FMT_YUV444P9; |
||
3100 | } else if (CHROMA422(h)) |
||
3101 | return AV_PIX_FMT_YUV422P9; |
||
3102 | else |
||
3103 | return AV_PIX_FMT_YUV420P9; |
||
3104 | break; |
||
3105 | case 10: |
||
3106 | if (CHROMA444(h)) { |
||
3107 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
||
3108 | return AV_PIX_FMT_GBRP10; |
||
3109 | } else |
||
3110 | return AV_PIX_FMT_YUV444P10; |
||
3111 | } else if (CHROMA422(h)) |
||
3112 | return AV_PIX_FMT_YUV422P10; |
||
3113 | else |
||
3114 | return AV_PIX_FMT_YUV420P10; |
||
3115 | break; |
||
3116 | case 12: |
||
3117 | if (CHROMA444(h)) { |
||
3118 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
||
3119 | return AV_PIX_FMT_GBRP12; |
||
3120 | } else |
||
3121 | return AV_PIX_FMT_YUV444P12; |
||
3122 | } else if (CHROMA422(h)) |
||
3123 | return AV_PIX_FMT_YUV422P12; |
||
3124 | else |
||
3125 | return AV_PIX_FMT_YUV420P12; |
||
3126 | break; |
||
3127 | case 14: |
||
3128 | if (CHROMA444(h)) { |
||
3129 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
||
3130 | return AV_PIX_FMT_GBRP14; |
||
3131 | } else |
||
3132 | return AV_PIX_FMT_YUV444P14; |
||
3133 | } else if (CHROMA422(h)) |
||
3134 | return AV_PIX_FMT_YUV422P14; |
||
3135 | else |
||
3136 | return AV_PIX_FMT_YUV420P14; |
||
3137 | break; |
||
3138 | case 8: |
||
3139 | if (CHROMA444(h)) { |
||
3140 | if (h->avctx->colorspace == AVCOL_SPC_RGB) { |
||
3141 | av_log(h->avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n"); |
||
3142 | return AV_PIX_FMT_GBR24P; |
||
3143 | } else if (h->avctx->colorspace == AVCOL_SPC_YCGCO) { |
||
3144 | av_log(h->avctx, AV_LOG_WARNING, "Detected unsupported YCgCo colorspace.\n"); |
||
3145 | } |
||
3146 | return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P |
||
3147 | : AV_PIX_FMT_YUV444P; |
||
3148 | } else if (CHROMA422(h)) { |
||
3149 | return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P |
||
3150 | : AV_PIX_FMT_YUV422P; |
||
3151 | } else { |
||
3152 | int i; |
||
3153 | const enum AVPixelFormat * fmt = h->avctx->codec->pix_fmts ? |
||
3154 | h->avctx->codec->pix_fmts : |
||
3155 | h->avctx->color_range == AVCOL_RANGE_JPEG ? |
||
3156 | h264_hwaccel_pixfmt_list_jpeg_420 : |
||
3157 | h264_hwaccel_pixfmt_list_420; |
||
3158 | |||
3159 | for (i=0; fmt[i] != AV_PIX_FMT_NONE; i++) |
||
3160 | if (fmt[i] == h->avctx->pix_fmt && !force_callback) |
||
3161 | return fmt[i]; |
||
3162 | return ff_thread_get_format(h->avctx, fmt); |
||
3163 | } |
||
3164 | break; |
||
3165 | default: |
||
3166 | av_log(h->avctx, AV_LOG_ERROR, |
||
3167 | "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); |
||
3168 | return AVERROR_INVALIDDATA; |
||
3169 | } |
||
3170 | } |
||
3171 | |||
3172 | /* export coded and cropped frame dimensions to AVCodecContext */ |
||
3173 | static int init_dimensions(H264Context *h) |
||
3174 | { |
||
3175 | int width = h->width - (h->sps.crop_right + h->sps.crop_left); |
||
3176 | int height = h->height - (h->sps.crop_top + h->sps.crop_bottom); |
||
3177 | av_assert0(h->sps.crop_right + h->sps.crop_left < (unsigned)h->width); |
||
3178 | av_assert0(h->sps.crop_top + h->sps.crop_bottom < (unsigned)h->height); |
||
3179 | |||
3180 | /* handle container cropping */ |
||
3181 | if (!h->sps.crop && |
||
3182 | FFALIGN(h->avctx->width, 16) == h->width && |
||
3183 | FFALIGN(h->avctx->height, 16) == h->height) { |
||
3184 | width = h->avctx->width; |
||
3185 | height = h->avctx->height; |
||
3186 | } |
||
3187 | |||
3188 | if (width <= 0 || height <= 0) { |
||
3189 | av_log(h->avctx, AV_LOG_ERROR, "Invalid cropped dimensions: %dx%d.\n", |
||
3190 | width, height); |
||
3191 | if (h->avctx->err_recognition & AV_EF_EXPLODE) |
||
3192 | return AVERROR_INVALIDDATA; |
||
3193 | |||
3194 | av_log(h->avctx, AV_LOG_WARNING, "Ignoring cropping information.\n"); |
||
3195 | h->sps.crop_bottom = h->sps.crop_top = h->sps.crop_right = h->sps.crop_left = 0; |
||
3196 | h->sps.crop = 0; |
||
3197 | |||
3198 | width = h->width; |
||
3199 | height = h->height; |
||
3200 | } |
||
3201 | |||
3202 | h->avctx->coded_width = h->width; |
||
3203 | h->avctx->coded_height = h->height; |
||
3204 | h->avctx->width = width; |
||
3205 | h->avctx->height = height; |
||
3206 | |||
3207 | return 0; |
||
3208 | } |
||
3209 | |||
3210 | static int h264_slice_header_init(H264Context *h, int reinit) |
||
3211 | { |
||
3212 | int nb_slices = (HAVE_THREADS && |
||
3213 | h->avctx->active_thread_type & FF_THREAD_SLICE) ? |
||
3214 | h->avctx->thread_count : 1; |
||
3215 | int i, ret; |
||
3216 | |||
3217 | h->avctx->sample_aspect_ratio = h->sps.sar; |
||
3218 | av_assert0(h->avctx->sample_aspect_ratio.den); |
||
3219 | av_pix_fmt_get_chroma_sub_sample(h->avctx->pix_fmt, |
||
3220 | &h->chroma_x_shift, &h->chroma_y_shift); |
||
3221 | |||
3222 | if (h->sps.timing_info_present_flag) { |
||
3223 | int64_t den = h->sps.time_scale; |
||
3224 | if (h->x264_build < 44U) |
||
3225 | den *= 2; |
||
3226 | av_reduce(&h->avctx->time_base.num, &h->avctx->time_base.den, |
||
3227 | h->sps.num_units_in_tick, den, 1 << 30); |
||
3228 | } |
||
3229 | |||
3230 | h->avctx->hwaccel = ff_find_hwaccel(h->avctx->codec->id, h->avctx->pix_fmt); |
||
3231 | |||
3232 | if (reinit) |
||
3233 | free_tables(h, 0); |
||
3234 | h->first_field = 0; |
||
3235 | h->prev_interlaced_frame = 1; |
||
3236 | |||
3237 | init_scan_tables(h); |
||
3238 | ret = ff_h264_alloc_tables(h); |
||
3239 | if (ret < 0) { |
||
3240 | av_log(h->avctx, AV_LOG_ERROR, |
||
3241 | "Could not allocate memory for h264\n"); |
||
3242 | return ret; |
||
3243 | } |
||
3244 | |||
3245 | if (nb_slices > MAX_THREADS || (nb_slices > h->mb_height && h->mb_height)) { |
||
3246 | int max_slices; |
||
3247 | if (h->mb_height) |
||
3248 | max_slices = FFMIN(MAX_THREADS, h->mb_height); |
||
3249 | else |
||
3250 | max_slices = MAX_THREADS; |
||
3251 | av_log(h->avctx, AV_LOG_WARNING, "too many threads/slices (%d)," |
||
3252 | " reducing to %d\n", nb_slices, max_slices); |
||
3253 | nb_slices = max_slices; |
||
3254 | } |
||
3255 | h->slice_context_count = nb_slices; |
||
3256 | |||
3257 | if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_SLICE)) { |
||
3258 | ret = context_init(h); |
||
3259 | if (ret < 0) { |
||
3260 | av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n"); |
||
3261 | return ret; |
||
3262 | } |
||
3263 | } else { |
||
3264 | for (i = 1; i < h->slice_context_count; i++) { |
||
3265 | H264Context *c; |
||
3266 | c = h->thread_context[i] = av_mallocz(sizeof(H264Context)); |
||
3267 | c->avctx = h->avctx; |
||
3268 | if (CONFIG_ERROR_RESILIENCE) { |
||
3269 | c->dsp = h->dsp; |
||
3270 | } |
||
3271 | c->vdsp = h->vdsp; |
||
3272 | c->h264dsp = h->h264dsp; |
||
3273 | c->h264qpel = h->h264qpel; |
||
3274 | c->h264chroma = h->h264chroma; |
||
3275 | c->sps = h->sps; |
||
3276 | c->pps = h->pps; |
||
3277 | c->pixel_shift = h->pixel_shift; |
||
3278 | c->cur_chroma_format_idc = h->cur_chroma_format_idc; |
||
3279 | c->width = h->width; |
||
3280 | c->height = h->height; |
||
3281 | c->linesize = h->linesize; |
||
3282 | c->uvlinesize = h->uvlinesize; |
||
3283 | c->chroma_x_shift = h->chroma_x_shift; |
||
3284 | c->chroma_y_shift = h->chroma_y_shift; |
||
3285 | c->qscale = h->qscale; |
||
3286 | c->droppable = h->droppable; |
||
3287 | c->data_partitioning = h->data_partitioning; |
||
3288 | c->low_delay = h->low_delay; |
||
3289 | c->mb_width = h->mb_width; |
||
3290 | c->mb_height = h->mb_height; |
||
3291 | c->mb_stride = h->mb_stride; |
||
3292 | c->mb_num = h->mb_num; |
||
3293 | c->flags = h->flags; |
||
3294 | c->workaround_bugs = h->workaround_bugs; |
||
3295 | c->pict_type = h->pict_type; |
||
3296 | |||
3297 | init_scan_tables(c); |
||
3298 | clone_tables(c, h, i); |
||
3299 | c->context_initialized = 1; |
||
3300 | } |
||
3301 | |||
3302 | for (i = 0; i < h->slice_context_count; i++) |
||
3303 | if ((ret = context_init(h->thread_context[i])) < 0) { |
||
3304 | av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n"); |
||
3305 | return ret; |
||
3306 | } |
||
3307 | } |
||
3308 | |||
3309 | h->context_initialized = 1; |
||
3310 | |||
3311 | return 0; |
||
3312 | } |
||
3313 | |||
3314 | int ff_set_ref_count(H264Context *h) |
||
3315 | { |
||
3316 | int num_ref_idx_active_override_flag; |
||
3317 | |||
3318 | // set defaults, might be overridden a few lines later |
||
3319 | h->ref_count[0] = h->pps.ref_count[0]; |
||
3320 | h->ref_count[1] = h->pps.ref_count[1]; |
||
3321 | |||
3322 | if (h->slice_type_nos != AV_PICTURE_TYPE_I) { |
||
3323 | unsigned max[2]; |
||
3324 | max[0] = max[1] = h->picture_structure == PICT_FRAME ? 15 : 31; |
||
3325 | |||
3326 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) |
||
3327 | h->direct_spatial_mv_pred = get_bits1(&h->gb); |
||
3328 | num_ref_idx_active_override_flag = get_bits1(&h->gb); |
||
3329 | |||
3330 | if (num_ref_idx_active_override_flag) { |
||
3331 | h->ref_count[0] = get_ue_golomb(&h->gb) + 1; |
||
3332 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) { |
||
3333 | h->ref_count[1] = get_ue_golomb(&h->gb) + 1; |
||
3334 | } else |
||
3335 | // full range is spec-ok in this case, even for frames |
||
3336 | h->ref_count[1] = 1; |
||
3337 | } |
||
3338 | |||
3339 | if (h->ref_count[0]-1 > max[0] || h->ref_count[1]-1 > max[1]){ |
||
3340 | av_log(h->avctx, AV_LOG_ERROR, "reference overflow %u > %u or %u > %u\n", h->ref_count[0]-1, max[0], h->ref_count[1]-1, max[1]); |
||
3341 | h->ref_count[0] = h->ref_count[1] = 0; |
||
3342 | return AVERROR_INVALIDDATA; |
||
3343 | } |
||
3344 | |||
3345 | if (h->slice_type_nos == AV_PICTURE_TYPE_B) |
||
3346 | h->list_count = 2; |
||
3347 | else |
||
3348 | h->list_count = 1; |
||
3349 | } else { |
||
3350 | h->list_count = 0; |
||
3351 | h->ref_count[0] = h->ref_count[1] = 0; |
||
3352 | } |
||
3353 | |||
3354 | return 0; |
||
3355 | } |
||
3356 | |||
3357 | /** |
||
3358 | * Decode a slice header. |
||
3359 | * This will also call ff_MPV_common_init() and frame_start() as needed. |
||
3360 | * |
||
3361 | * @param h h264context |
||
3362 | * @param h0 h264 master context (differs from 'h' when doing sliced based |
||
3363 | * parallel decoding) |
||
3364 | * |
||
3365 | * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded |
||
3366 | */ |
||
3367 | static int decode_slice_header(H264Context *h, H264Context *h0) |
||
3368 | { |
||
3369 | unsigned int first_mb_in_slice; |
||
3370 | unsigned int pps_id; |
||
3371 | int ret; |
||
3372 | unsigned int slice_type, tmp, i, j; |
||
3373 | int last_pic_structure, last_pic_droppable; |
||
3374 | int must_reinit; |
||
3375 | int needs_reinit = 0; |
||
3376 | int field_pic_flag, bottom_field_flag; |
||
3377 | |||
3378 | h->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab; |
||
3379 | h->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab; |
||
3380 | |||
3381 | first_mb_in_slice = get_ue_golomb_long(&h->gb); |
||
3382 | |||
3383 | if (first_mb_in_slice == 0) { // FIXME better field boundary detection |
||
3384 | if (h0->current_slice && FIELD_PICTURE(h)) { |
||
3385 | field_end(h, 1); |
||
3386 | } |
||
3387 | |||
3388 | h0->current_slice = 0; |
||
3389 | if (!h0->first_field) { |
||
3390 | if (h->cur_pic_ptr && !h->droppable) { |
||
3391 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
||
3392 | h->picture_structure == PICT_BOTTOM_FIELD); |
||
3393 | } |
||
3394 | h->cur_pic_ptr = NULL; |
||
3395 | } |
||
3396 | } |
||
3397 | |||
3398 | slice_type = get_ue_golomb_31(&h->gb); |
||
3399 | if (slice_type > 9) { |
||
3400 | av_log(h->avctx, AV_LOG_ERROR, |
||
3401 | "slice type too large (%d) at %d %d\n", |
||
3402 | slice_type, h->mb_x, h->mb_y); |
||
3403 | return AVERROR_INVALIDDATA; |
||
3404 | } |
||
3405 | if (slice_type > 4) { |
||
3406 | slice_type -= 5; |
||
3407 | h->slice_type_fixed = 1; |
||
3408 | } else |
||
3409 | h->slice_type_fixed = 0; |
||
3410 | |||
3411 | slice_type = golomb_to_pict_type[slice_type]; |
||
3412 | h->slice_type = slice_type; |
||
3413 | h->slice_type_nos = slice_type & 3; |
||
3414 | |||
3415 | // to make a few old functions happy, it's wrong though |
||
3416 | h->pict_type = h->slice_type; |
||
3417 | |||
3418 | pps_id = get_ue_golomb(&h->gb); |
||
3419 | if (pps_id >= MAX_PPS_COUNT) { |
||
3420 | av_log(h->avctx, AV_LOG_ERROR, "pps_id %d out of range\n", pps_id); |
||
3421 | return AVERROR_INVALIDDATA; |
||
3422 | } |
||
3423 | if (!h0->pps_buffers[pps_id]) { |
||
3424 | av_log(h->avctx, AV_LOG_ERROR, |
||
3425 | "non-existing PPS %u referenced\n", |
||
3426 | pps_id); |
||
3427 | return AVERROR_INVALIDDATA; |
||
3428 | } |
||
3429 | h->pps = *h0->pps_buffers[pps_id]; |
||
3430 | |||
3431 | if (!h0->sps_buffers[h->pps.sps_id]) { |
||
3432 | av_log(h->avctx, AV_LOG_ERROR, |
||
3433 | "non-existing SPS %u referenced\n", |
||
3434 | h->pps.sps_id); |
||
3435 | return AVERROR_INVALIDDATA; |
||
3436 | } |
||
3437 | |||
3438 | if (h->pps.sps_id != h->current_sps_id || |
||
3439 | h0->sps_buffers[h->pps.sps_id]->new) { |
||
3440 | h0->sps_buffers[h->pps.sps_id]->new = 0; |
||
3441 | |||
3442 | h->current_sps_id = h->pps.sps_id; |
||
3443 | h->sps = *h0->sps_buffers[h->pps.sps_id]; |
||
3444 | |||
3445 | if (h->mb_width != h->sps.mb_width || |
||
3446 | h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) || |
||
3447 | h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma || |
||
3448 | h->cur_chroma_format_idc != h->sps.chroma_format_idc |
||
3449 | ) |
||
3450 | needs_reinit = 1; |
||
3451 | |||
3452 | if (h->bit_depth_luma != h->sps.bit_depth_luma || |
||
3453 | h->chroma_format_idc != h->sps.chroma_format_idc) { |
||
3454 | h->bit_depth_luma = h->sps.bit_depth_luma; |
||
3455 | h->chroma_format_idc = h->sps.chroma_format_idc; |
||
3456 | needs_reinit = 1; |
||
3457 | } |
||
3458 | if ((ret = h264_set_parameter_from_sps(h)) < 0) |
||
3459 | return ret; |
||
3460 | } |
||
3461 | |||
3462 | h->avctx->profile = ff_h264_get_profile(&h->sps); |
||
3463 | h->avctx->level = h->sps.level_idc; |
||
3464 | h->avctx->refs = h->sps.ref_frame_count; |
||
3465 | |||
3466 | must_reinit = (h->context_initialized && |
||
3467 | ( 16*h->sps.mb_width != h->avctx->coded_width |
||
3468 | || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != h->avctx->coded_height |
||
3469 | || h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma |
||
3470 | || h->cur_chroma_format_idc != h->sps.chroma_format_idc |
||
3471 | || av_cmp_q(h->sps.sar, h->avctx->sample_aspect_ratio) |
||
3472 | || h->mb_width != h->sps.mb_width |
||
3473 | || h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) |
||
3474 | )); |
||
3475 | if (h0->avctx->pix_fmt != get_pixel_format(h0, 0)) |
||
3476 | must_reinit = 1; |
||
3477 | |||
3478 | h->mb_width = h->sps.mb_width; |
||
3479 | h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); |
||
3480 | h->mb_num = h->mb_width * h->mb_height; |
||
3481 | h->mb_stride = h->mb_width + 1; |
||
3482 | |||
3483 | h->b_stride = h->mb_width * 4; |
||
3484 | |||
3485 | h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p |
||
3486 | |||
3487 | h->width = 16 * h->mb_width; |
||
3488 | h->height = 16 * h->mb_height; |
||
3489 | |||
3490 | ret = init_dimensions(h); |
||
3491 | if (ret < 0) |
||
3492 | return ret; |
||
3493 | |||
3494 | if (h->sps.video_signal_type_present_flag) { |
||
3495 | h->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG |
||
3496 | : AVCOL_RANGE_MPEG; |
||
3497 | if (h->sps.colour_description_present_flag) { |
||
3498 | if (h->avctx->colorspace != h->sps.colorspace) |
||
3499 | needs_reinit = 1; |
||
3500 | h->avctx->color_primaries = h->sps.color_primaries; |
||
3501 | h->avctx->color_trc = h->sps.color_trc; |
||
3502 | h->avctx->colorspace = h->sps.colorspace; |
||
3503 | } |
||
3504 | } |
||
3505 | |||
3506 | if (h->context_initialized && |
||
3507 | (h->width != h->avctx->coded_width || |
||
3508 | h->height != h->avctx->coded_height || |
||
3509 | must_reinit || |
||
3510 | needs_reinit)) { |
||
3511 | if (h != h0) { |
||
3512 | av_log(h->avctx, AV_LOG_ERROR, "changing width/height on " |
||
3513 | "slice %d\n", h0->current_slice + 1); |
||
3514 | return AVERROR_INVALIDDATA; |
||
3515 | } |
||
3516 | |||
3517 | flush_change(h); |
||
3518 | |||
3519 | if ((ret = get_pixel_format(h, 1)) < 0) |
||
3520 | return ret; |
||
3521 | h->avctx->pix_fmt = ret; |
||
3522 | |||
3523 | av_log(h->avctx, AV_LOG_INFO, "Reinit context to %dx%d, " |
||
3524 | "pix_fmt: %s\n", h->width, h->height, av_get_pix_fmt_name(h->avctx->pix_fmt)); |
||
3525 | |||
3526 | if ((ret = h264_slice_header_init(h, 1)) < 0) { |
||
3527 | av_log(h->avctx, AV_LOG_ERROR, |
||
3528 | "h264_slice_header_init() failed\n"); |
||
3529 | return ret; |
||
3530 | } |
||
3531 | } |
||
3532 | if (!h->context_initialized) { |
||
3533 | if (h != h0) { |
||
3534 | av_log(h->avctx, AV_LOG_ERROR, |
||
3535 | "Cannot (re-)initialize context during parallel decoding.\n"); |
||
3536 | return AVERROR_PATCHWELCOME; |
||
3537 | } |
||
3538 | |||
3539 | if ((ret = get_pixel_format(h, 1)) < 0) |
||
3540 | return ret; |
||
3541 | h->avctx->pix_fmt = ret; |
||
3542 | |||
3543 | if ((ret = h264_slice_header_init(h, 0)) < 0) { |
||
3544 | av_log(h->avctx, AV_LOG_ERROR, |
||
3545 | "h264_slice_header_init() failed\n"); |
||
3546 | return ret; |
||
3547 | } |
||
3548 | } |
||
3549 | |||
3550 | if (h == h0 && h->dequant_coeff_pps != pps_id) { |
||
3551 | h->dequant_coeff_pps = pps_id; |
||
3552 | init_dequant_tables(h); |
||
3553 | } |
||
3554 | |||
3555 | h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num); |
||
3556 | |||
3557 | h->mb_mbaff = 0; |
||
3558 | h->mb_aff_frame = 0; |
||
3559 | last_pic_structure = h0->picture_structure; |
||
3560 | last_pic_droppable = h0->droppable; |
||
3561 | h->droppable = h->nal_ref_idc == 0; |
||
3562 | if (h->sps.frame_mbs_only_flag) { |
||
3563 | h->picture_structure = PICT_FRAME; |
||
3564 | } else { |
||
3565 | if (!h->sps.direct_8x8_inference_flag && slice_type == AV_PICTURE_TYPE_B) { |
||
3566 | av_log(h->avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n"); |
||
3567 | return -1; |
||
3568 | } |
||
3569 | field_pic_flag = get_bits1(&h->gb); |
||
3570 | if (field_pic_flag) { |
||
3571 | bottom_field_flag = get_bits1(&h->gb); |
||
3572 | h->picture_structure = PICT_TOP_FIELD + bottom_field_flag; |
||
3573 | } else { |
||
3574 | h->picture_structure = PICT_FRAME; |
||
3575 | h->mb_aff_frame = h->sps.mb_aff; |
||
3576 | } |
||
3577 | } |
||
3578 | h->mb_field_decoding_flag = h->picture_structure != PICT_FRAME; |
||
3579 | |||
3580 | if (h0->current_slice != 0) { |
||
3581 | if (last_pic_structure != h->picture_structure || |
||
3582 | last_pic_droppable != h->droppable) { |
||
3583 | av_log(h->avctx, AV_LOG_ERROR, |
||
3584 | "Changing field mode (%d -> %d) between slices is not allowed\n", |
||
3585 | last_pic_structure, h->picture_structure); |
||
3586 | h->picture_structure = last_pic_structure; |
||
3587 | h->droppable = last_pic_droppable; |
||
3588 | return AVERROR_INVALIDDATA; |
||
3589 | } else if (!h0->cur_pic_ptr) { |
||
3590 | av_log(h->avctx, AV_LOG_ERROR, |
||
3591 | "unset cur_pic_ptr on %d. slice\n", |
||
3592 | h0->current_slice + 1); |
||
3593 | return AVERROR_INVALIDDATA; |
||
3594 | } |
||
3595 | } else { |
||
3596 | /* Shorten frame num gaps so we don't have to allocate reference |
||
3597 | * frames just to throw them away */ |
||
3598 | if (h->frame_num != h->prev_frame_num) { |
||
3599 | int unwrap_prev_frame_num = h->prev_frame_num; |
||
3600 | int max_frame_num = 1 << h->sps.log2_max_frame_num; |
||
3601 | |||
3602 | if (unwrap_prev_frame_num > h->frame_num) |
||
3603 | unwrap_prev_frame_num -= max_frame_num; |
||
3604 | |||
3605 | if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) { |
||
3606 | unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1; |
||
3607 | if (unwrap_prev_frame_num < 0) |
||
3608 | unwrap_prev_frame_num += max_frame_num; |
||
3609 | |||
3610 | h->prev_frame_num = unwrap_prev_frame_num; |
||
3611 | } |
||
3612 | } |
||
3613 | |||
3614 | /* See if we have a decoded first field looking for a pair... |
||
3615 | * Here, we're using that to see if we should mark previously |
||
3616 | * decode frames as "finished". |
||
3617 | * We have to do that before the "dummy" in-between frame allocation, |
||
3618 | * since that can modify h->cur_pic_ptr. */ |
||
3619 | if (h0->first_field) { |
||
3620 | assert(h0->cur_pic_ptr); |
||
3621 | assert(h0->cur_pic_ptr->f.data[0]); |
||
3622 | assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF); |
||
3623 | |||
3624 | /* Mark old field/frame as completed */ |
||
3625 | if (h0->cur_pic_ptr->tf.owner == h0->avctx) { |
||
3626 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
||
3627 | last_pic_structure == PICT_BOTTOM_FIELD); |
||
3628 | } |
||
3629 | |||
3630 | /* figure out if we have a complementary field pair */ |
||
3631 | if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) { |
||
3632 | /* Previous field is unmatched. Don't display it, but let it |
||
3633 | * remain for reference if marked as such. */ |
||
3634 | if (last_pic_structure != PICT_FRAME) { |
||
3635 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
||
3636 | last_pic_structure == PICT_TOP_FIELD); |
||
3637 | } |
||
3638 | } else { |
||
3639 | if (h0->cur_pic_ptr->frame_num != h->frame_num) { |
||
3640 | /* This and previous field were reference, but had |
||
3641 | * different frame_nums. Consider this field first in |
||
3642 | * pair. Throw away previous field except for reference |
||
3643 | * purposes. */ |
||
3644 | if (last_pic_structure != PICT_FRAME) { |
||
3645 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
||
3646 | last_pic_structure == PICT_TOP_FIELD); |
||
3647 | } |
||
3648 | } else { |
||
3649 | /* Second field in complementary pair */ |
||
3650 | if (!((last_pic_structure == PICT_TOP_FIELD && |
||
3651 | h->picture_structure == PICT_BOTTOM_FIELD) || |
||
3652 | (last_pic_structure == PICT_BOTTOM_FIELD && |
||
3653 | h->picture_structure == PICT_TOP_FIELD))) { |
||
3654 | av_log(h->avctx, AV_LOG_ERROR, |
||
3655 | "Invalid field mode combination %d/%d\n", |
||
3656 | last_pic_structure, h->picture_structure); |
||
3657 | h->picture_structure = last_pic_structure; |
||
3658 | h->droppable = last_pic_droppable; |
||
3659 | return AVERROR_INVALIDDATA; |
||
3660 | } else if (last_pic_droppable != h->droppable) { |
||
3661 | avpriv_request_sample(h->avctx, |
||
3662 | "Found reference and non-reference fields in the same frame, which"); |
||
3663 | h->picture_structure = last_pic_structure; |
||
3664 | h->droppable = last_pic_droppable; |
||
3665 | return AVERROR_PATCHWELCOME; |
||
3666 | } |
||
3667 | } |
||
3668 | } |
||
3669 | } |
||
3670 | |||
3671 | while (h->frame_num != h->prev_frame_num && !h0->first_field && |
||
3672 | h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) { |
||
3673 | Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL; |
||
3674 | av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", |
||
3675 | h->frame_num, h->prev_frame_num); |
||
3676 | if (!h->sps.gaps_in_frame_num_allowed_flag) |
||
3677 | for(i=0; i |
||
3678 | h->last_pocs[i] = INT_MIN; |
||
3679 | ret = h264_frame_start(h); |
||
3680 | if (ret < 0) |
||
3681 | return ret; |
||
3682 | h->prev_frame_num++; |
||
3683 | h->prev_frame_num %= 1 << h->sps.log2_max_frame_num; |
||
3684 | h->cur_pic_ptr->frame_num = h->prev_frame_num; |
||
3685 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0); |
||
3686 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1); |
||
3687 | ret = ff_generate_sliding_window_mmcos(h, 1); |
||
3688 | if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE)) |
||
3689 | return ret; |
||
3690 | ret = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); |
||
3691 | if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE)) |
||
3692 | return ret; |
||
3693 | /* Error concealment: If a ref is missing, copy the previous ref |
||
3694 | * in its place. |
||
3695 | * FIXME: Avoiding a memcpy would be nice, but ref handling makes |
||
3696 | * many assumptions about there being no actual duplicates. |
||
3697 | * FIXME: This does not copy padding for out-of-frame motion |
||
3698 | * vectors. Given we are concealing a lost frame, this probably |
||
3699 | * is not noticeable by comparison, but it should be fixed. */ |
||
3700 | if (h->short_ref_count) { |
||
3701 | if (prev) { |
||
3702 | av_image_copy(h->short_ref[0]->f.data, |
||
3703 | h->short_ref[0]->f.linesize, |
||
3704 | (const uint8_t **)prev->f.data, |
||
3705 | prev->f.linesize, |
||
3706 | h->avctx->pix_fmt, |
||
3707 | h->mb_width * 16, |
||
3708 | h->mb_height * 16); |
||
3709 | h->short_ref[0]->poc = prev->poc + 2; |
||
3710 | } |
||
3711 | h->short_ref[0]->frame_num = h->prev_frame_num; |
||
3712 | } |
||
3713 | } |
||
3714 | |||
3715 | /* See if we have a decoded first field looking for a pair... |
||
3716 | * We're using that to see whether to continue decoding in that |
||
3717 | * frame, or to allocate a new one. */ |
||
3718 | if (h0->first_field) { |
||
3719 | assert(h0->cur_pic_ptr); |
||
3720 | assert(h0->cur_pic_ptr->f.data[0]); |
||
3721 | assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF); |
||
3722 | |||
3723 | /* figure out if we have a complementary field pair */ |
||
3724 | if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) { |
||
3725 | /* Previous field is unmatched. Don't display it, but let it |
||
3726 | * remain for reference if marked as such. */ |
||
3727 | h0->cur_pic_ptr = NULL; |
||
3728 | h0->first_field = FIELD_PICTURE(h); |
||
3729 | } else { |
||
3730 | if (h0->cur_pic_ptr->frame_num != h->frame_num) { |
||
3731 | ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX, |
||
3732 | h0->picture_structure==PICT_BOTTOM_FIELD); |
||
3733 | /* This and the previous field had different frame_nums. |
||
3734 | * Consider this field first in pair. Throw away previous |
||
3735 | * one except for reference purposes. */ |
||
3736 | h0->first_field = 1; |
||
3737 | h0->cur_pic_ptr = NULL; |
||
3738 | } else { |
||
3739 | /* Second field in complementary pair */ |
||
3740 | h0->first_field = 0; |
||
3741 | } |
||
3742 | } |
||
3743 | } else { |
||
3744 | /* Frame or first field in a potentially complementary pair */ |
||
3745 | h0->first_field = FIELD_PICTURE(h); |
||
3746 | } |
||
3747 | |||
3748 | if (!FIELD_PICTURE(h) || h0->first_field) { |
||
3749 | if (h264_frame_start(h) < 0) { |
||
3750 | h0->first_field = 0; |
||
3751 | return AVERROR_INVALIDDATA; |
||
3752 | } |
||
3753 | } else { |
||
3754 | release_unused_pictures(h, 0); |
||
3755 | } |
||
3756 | /* Some macroblocks can be accessed before they're available in case |
||
3757 | * of lost slices, MBAFF or threading. */ |
||
3758 | if (FIELD_PICTURE(h)) { |
||
3759 | for(i = (h->picture_structure == PICT_BOTTOM_FIELD); i |
||
3760 | memset(h->slice_table + i*h->mb_stride, -1, (h->mb_stride - (i+1==h->mb_height)) * sizeof(*h->slice_table)); |
||
3761 | } else { |
||
3762 | memset(h->slice_table, -1, |
||
3763 | (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table)); |
||
3764 | } |
||
3765 | h0->last_slice_type = -1; |
||
3766 | } |
||
3767 | if (h != h0 && (ret = clone_slice(h, h0)) < 0) |
||
3768 | return ret; |
||
3769 | |||
3770 | /* can't be in alloc_tables because linesize isn't known there. |
||
3771 | * FIXME: redo bipred weight to not require extra buffer? */ |
||
3772 | for (i = 0; i < h->slice_context_count; i++) |
||
3773 | if (h->thread_context[i]) { |
||
3774 | ret = alloc_scratch_buffers(h->thread_context[i], h->linesize); |
||
3775 | if (ret < 0) |
||
3776 | return ret; |
||
3777 | } |
||
3778 | |||
3779 | h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup |
||
3780 | |||
3781 | av_assert1(h->mb_num == h->mb_width * h->mb_height); |
||
3782 | if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num || |
||
3783 | first_mb_in_slice >= h->mb_num) { |
||
3784 | av_log(h->avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); |
||
3785 | return AVERROR_INVALIDDATA; |
||
3786 | } |
||
3787 | h->resync_mb_x = h->mb_x = first_mb_in_slice % h->mb_width; |
||
3788 | h->resync_mb_y = h->mb_y = (first_mb_in_slice / h->mb_width) << |
||
3789 | FIELD_OR_MBAFF_PICTURE(h); |
||
3790 | if (h->picture_structure == PICT_BOTTOM_FIELD) |
||
3791 | h->resync_mb_y = h->mb_y = h->mb_y + 1; |
||
3792 | av_assert1(h->mb_y < h->mb_height); |
||
3793 | |||
3794 | if (h->picture_structure == PICT_FRAME) { |
||
3795 | h->curr_pic_num = h->frame_num; |
||
3796 | h->max_pic_num = 1 << h->sps.log2_max_frame_num; |
||
3797 | } else { |
||
3798 | h->curr_pic_num = 2 * h->frame_num + 1; |
||
3799 | h->max_pic_num = 1 << (h->sps.log2_max_frame_num + 1); |
||
3800 | } |
||
3801 | |||
3802 | if (h->nal_unit_type == NAL_IDR_SLICE) |
||
3803 | get_ue_golomb(&h->gb); /* idr_pic_id */ |
||
3804 | |||
3805 | if (h->sps.poc_type == 0) { |
||
3806 | h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb); |
||
3807 | |||
3808 | if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME) |
||
3809 | h->delta_poc_bottom = get_se_golomb(&h->gb); |
||
3810 | } |
||
3811 | |||
3812 | if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) { |
||
3813 | h->delta_poc[0] = get_se_golomb(&h->gb); |
||
3814 | |||
3815 | if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME) |
||
3816 | h->delta_poc[1] = get_se_golomb(&h->gb); |
||
3817 | } |
||
3818 | |||
3819 | ff_init_poc(h, h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc); |
||
3820 | |||
3821 | if (h->pps.redundant_pic_cnt_present) |
||
3822 | h->redundant_pic_count = get_ue_golomb(&h->gb); |
||
3823 | |||
3824 | ret = ff_set_ref_count(h); |
||
3825 | if (ret < 0) |
||
3826 | return ret; |
||
3827 | |||
3828 | if (slice_type != AV_PICTURE_TYPE_I && |
||
3829 | (h0->current_slice == 0 || |
||
3830 | slice_type != h0->last_slice_type || |
||
3831 | memcmp(h0->last_ref_count, h0->ref_count, sizeof(h0->ref_count)))) { |
||
3832 | |||
3833 | ff_h264_fill_default_ref_list(h); |
||
3834 | } |
||
3835 | |||
3836 | if (h->slice_type_nos != AV_PICTURE_TYPE_I) { |
||
3837 | ret = ff_h264_decode_ref_pic_list_reordering(h); |
||
3838 | if (ret < 0) { |
||
3839 | h->ref_count[1] = h->ref_count[0] = 0; |
||
3840 | return ret; |
||
3841 | } |
||
3842 | } |
||
3843 | |||
3844 | if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || |
||
3845 | (h->pps.weighted_bipred_idc == 1 && |
||
3846 | h->slice_type_nos == AV_PICTURE_TYPE_B)) |
||
3847 | ff_pred_weight_table(h); |
||
3848 | else if (h->pps.weighted_bipred_idc == 2 && |
||
3849 | h->slice_type_nos == AV_PICTURE_TYPE_B) { |
||
3850 | implicit_weight_table(h, -1); |
||
3851 | } else { |
||
3852 | h->use_weight = 0; |
||
3853 | for (i = 0; i < 2; i++) { |
||
3854 | h->luma_weight_flag[i] = 0; |
||
3855 | h->chroma_weight_flag[i] = 0; |
||
3856 | } |
||
3857 | } |
||
3858 | |||
3859 | // If frame-mt is enabled, only update mmco tables for the first slice |
||
3860 | // in a field. Subsequent slices can temporarily clobber h->mmco_index |
||
3861 | // or h->mmco, which will cause ref list mix-ups and decoding errors |
||
3862 | // further down the line. This may break decoding if the first slice is |
||
3863 | // corrupt, thus we only do this if frame-mt is enabled. |
||
3864 | if (h->nal_ref_idc) { |
||
3865 | ret = ff_h264_decode_ref_pic_marking(h0, &h->gb, |
||
3866 | !(h->avctx->active_thread_type & FF_THREAD_FRAME) || |
||
3867 | h0->current_slice == 0); |
||
3868 | if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE)) |
||
3869 | return AVERROR_INVALIDDATA; |
||
3870 | } |
||
3871 | |||
3872 | if (FRAME_MBAFF(h)) { |
||
3873 | ff_h264_fill_mbaff_ref_list(h); |
||
3874 | |||
3875 | if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { |
||
3876 | implicit_weight_table(h, 0); |
||
3877 | implicit_weight_table(h, 1); |
||
3878 | } |
||
3879 | } |
||
3880 | |||
3881 | if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred) |
||
3882 | ff_h264_direct_dist_scale_factor(h); |
||
3883 | ff_h264_direct_ref_list_init(h); |
||
3884 | |||
3885 | if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) { |
||
3886 | tmp = get_ue_golomb_31(&h->gb); |
||
3887 | if (tmp > 2) { |
||
3888 | av_log(h->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); |
||
3889 | return AVERROR_INVALIDDATA; |
||
3890 | } |
||
3891 | h->cabac_init_idc = tmp; |
||
3892 | } |
||
3893 | |||
3894 | h->last_qscale_diff = 0; |
||
3895 | tmp = h->pps.init_qp + get_se_golomb(&h->gb); |
||
3896 | if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) { |
||
3897 | av_log(h->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); |
||
3898 | return AVERROR_INVALIDDATA; |
||
3899 | } |
||
3900 | h->qscale = tmp; |
||
3901 | h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale); |
||
3902 | h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale); |
||
3903 | // FIXME qscale / qp ... stuff |
||
3904 | if (h->slice_type == AV_PICTURE_TYPE_SP) |
||
3905 | get_bits1(&h->gb); /* sp_for_switch_flag */ |
||
3906 | if (h->slice_type == AV_PICTURE_TYPE_SP || |
||
3907 | h->slice_type == AV_PICTURE_TYPE_SI) |
||
3908 | get_se_golomb(&h->gb); /* slice_qs_delta */ |
||
3909 | |||
3910 | h->deblocking_filter = 1; |
||
3911 | h->slice_alpha_c0_offset = 52; |
||
3912 | h->slice_beta_offset = 52; |
||
3913 | if (h->pps.deblocking_filter_parameters_present) { |
||
3914 | tmp = get_ue_golomb_31(&h->gb); |
||
3915 | if (tmp > 2) { |
||
3916 | av_log(h->avctx, AV_LOG_ERROR, |
||
3917 | "deblocking_filter_idc %u out of range\n", tmp); |
||
3918 | return AVERROR_INVALIDDATA; |
||
3919 | } |
||
3920 | h->deblocking_filter = tmp; |
||
3921 | if (h->deblocking_filter < 2) |
||
3922 | h->deblocking_filter ^= 1; // 1<->0 |
||
3923 | |||
3924 | if (h->deblocking_filter) { |
||
3925 | h->slice_alpha_c0_offset += get_se_golomb(&h->gb) << 1; |
||
3926 | h->slice_beta_offset += get_se_golomb(&h->gb) << 1; |
||
3927 | if (h->slice_alpha_c0_offset > 104U || |
||
3928 | h->slice_beta_offset > 104U) { |
||
3929 | av_log(h->avctx, AV_LOG_ERROR, |
||
3930 | "deblocking filter parameters %d %d out of range\n", |
||
3931 | h->slice_alpha_c0_offset, h->slice_beta_offset); |
||
3932 | return AVERROR_INVALIDDATA; |
||
3933 | } |
||
3934 | } |
||
3935 | } |
||
3936 | |||
3937 | if (h->avctx->skip_loop_filter >= AVDISCARD_ALL || |
||
3938 | (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY && |
||
3939 | h->slice_type_nos != AV_PICTURE_TYPE_I) || |
||
3940 | (h->avctx->skip_loop_filter >= AVDISCARD_BIDIR && |
||
3941 | h->slice_type_nos == AV_PICTURE_TYPE_B) || |
||
3942 | (h->avctx->skip_loop_filter >= AVDISCARD_NONREF && |
||
3943 | h->nal_ref_idc == 0)) |
||
3944 | h->deblocking_filter = 0; |
||
3945 | |||
3946 | if (h->deblocking_filter == 1 && h0->max_contexts > 1) { |
||
3947 | if (h->avctx->flags2 & CODEC_FLAG2_FAST) { |
||
3948 | /* Cheat slightly for speed: |
||
3949 | * Do not bother to deblock across slices. */ |
||
3950 | h->deblocking_filter = 2; |
||
3951 | } else { |
||
3952 | h0->max_contexts = 1; |
||
3953 | if (!h0->single_decode_warning) { |
||
3954 | av_log(h->avctx, AV_LOG_INFO, |
||
3955 | "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); |
||
3956 | h0->single_decode_warning = 1; |
||
3957 | } |
||
3958 | if (h != h0) { |
||
3959 | av_log(h->avctx, AV_LOG_ERROR, |
||
3960 | "Deblocking switched inside frame.\n"); |
||
3961 | return 1; |
||
3962 | } |
||
3963 | } |
||
3964 | } |
||
3965 | h->qp_thresh = 15 + 52 - |
||
3966 | FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - |
||
3967 | FFMAX3(0, |
||
3968 | h->pps.chroma_qp_index_offset[0], |
||
3969 | h->pps.chroma_qp_index_offset[1]) + |
||
3970 | 6 * (h->sps.bit_depth_luma - 8); |
||
3971 | |||
3972 | h0->last_slice_type = slice_type; |
||
3973 | memcpy(h0->last_ref_count, h0->ref_count, sizeof(h0->last_ref_count)); |
||
3974 | h->slice_num = ++h0->current_slice; |
||
3975 | |||
3976 | if (h->slice_num) |
||
3977 | h0->slice_row[(h->slice_num-1)&(MAX_SLICES-1)]= h->resync_mb_y; |
||
3978 | if ( h0->slice_row[h->slice_num&(MAX_SLICES-1)] + 3 >= h->resync_mb_y |
||
3979 | && h0->slice_row[h->slice_num&(MAX_SLICES-1)] <= h->resync_mb_y |
||
3980 | && h->slice_num >= MAX_SLICES) { |
||
3981 | //in case of ASO this check needs to be updated depending on how we decide to assign slice numbers in this case |
||
3982 | av_log(h->avctx, AV_LOG_WARNING, "Possibly too many slices (%d >= %d), increase MAX_SLICES and recompile if there are artifacts\n", h->slice_num, MAX_SLICES); |
||
3983 | } |
||
3984 | |||
3985 | for (j = 0; j < 2; j++) { |
||
3986 | int id_list[16]; |
||
3987 | int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j]; |
||
3988 | for (i = 0; i < 16; i++) { |
||
3989 | id_list[i] = 60; |
||
3990 | if (j < h->list_count && i < h->ref_count[j] && |
||
3991 | h->ref_list[j][i].f.buf[0]) { |
||
3992 | int k; |
||
3993 | AVBuffer *buf = h->ref_list[j][i].f.buf[0]->buffer; |
||
3994 | for (k = 0; k < h->short_ref_count; k++) |
||
3995 | if (h->short_ref[k]->f.buf[0]->buffer == buf) { |
||
3996 | id_list[i] = k; |
||
3997 | break; |
||
3998 | } |
||
3999 | for (k = 0; k < h->long_ref_count; k++) |
||
4000 | if (h->long_ref[k] && h->long_ref[k]->f.buf[0]->buffer == buf) { |
||
4001 | id_list[i] = h->short_ref_count + k; |
||
4002 | break; |
||
4003 | } |
||
4004 | } |
||
4005 | } |
||
4006 | |||
4007 | ref2frm[0] = |
||
4008 | ref2frm[1] = -1; |
||
4009 | for (i = 0; i < 16; i++) |
||
4010 | ref2frm[i + 2] = 4 * id_list[i] + (h->ref_list[j][i].reference & 3); |
||
4011 | ref2frm[18 + 0] = |
||
4012 | ref2frm[18 + 1] = -1; |
||
4013 | for (i = 16; i < 48; i++) |
||
4014 | ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] + |
||
4015 | (h->ref_list[j][i].reference & 3); |
||
4016 | } |
||
4017 | |||
4018 | if (h->ref_count[0]) h->er.last_pic = &h->ref_list[0][0]; |
||
4019 | if (h->ref_count[1]) h->er.next_pic = &h->ref_list[1][0]; |
||
4020 | h->er.ref_count = h->ref_count[0]; |
||
4021 | |||
4022 | if (h->avctx->debug & FF_DEBUG_PICT_INFO) { |
||
4023 | av_log(h->avctx, AV_LOG_DEBUG, |
||
4024 | "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", |
||
4025 | h->slice_num, |
||
4026 | (h->picture_structure == PICT_FRAME ? "F" : h->picture_structure == PICT_TOP_FIELD ? "T" : "B"), |
||
4027 | first_mb_in_slice, |
||
4028 | av_get_picture_type_char(h->slice_type), |
||
4029 | h->slice_type_fixed ? " fix" : "", |
||
4030 | h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", |
||
4031 | pps_id, h->frame_num, |
||
4032 | h->cur_pic_ptr->field_poc[0], |
||
4033 | h->cur_pic_ptr->field_poc[1], |
||
4034 | h->ref_count[0], h->ref_count[1], |
||
4035 | h->qscale, |
||
4036 | h->deblocking_filter, |
||
4037 | h->slice_alpha_c0_offset / 2 - 26, h->slice_beta_offset / 2 - 26, |
||
4038 | h->use_weight, |
||
4039 | h->use_weight == 1 && h->use_weight_chroma ? "c" : "", |
||
4040 | h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""); |
||
4041 | } |
||
4042 | |||
4043 | return 0; |
||
4044 | } |
||
4045 | |||
4046 | int ff_h264_get_slice_type(const H264Context *h) |
||
4047 | { |
||
4048 | switch (h->slice_type) { |
||
4049 | case AV_PICTURE_TYPE_P: |
||
4050 | return 0; |
||
4051 | case AV_PICTURE_TYPE_B: |
||
4052 | return 1; |
||
4053 | case AV_PICTURE_TYPE_I: |
||
4054 | return 2; |
||
4055 | case AV_PICTURE_TYPE_SP: |
||
4056 | return 3; |
||
4057 | case AV_PICTURE_TYPE_SI: |
||
4058 | return 4; |
||
4059 | default: |
||
4060 | return AVERROR_INVALIDDATA; |
||
4061 | } |
||
4062 | } |
||
4063 | |||
4064 | static av_always_inline void fill_filter_caches_inter(H264Context *h, |
||
4065 | int mb_type, int top_xy, |
||
4066 | int left_xy[LEFT_MBS], |
||
4067 | int top_type, |
||
4068 | int left_type[LEFT_MBS], |
||
4069 | int mb_xy, int list) |
||
4070 | { |
||
4071 | int b_stride = h->b_stride; |
||
4072 | int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]]; |
||
4073 | int8_t *ref_cache = &h->ref_cache[list][scan8[0]]; |
||
4074 | if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) { |
||
4075 | if (USES_LIST(top_type, list)) { |
||
4076 | const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; |
||
4077 | const int b8_xy = 4 * top_xy + 2; |
||
4078 | int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2)); |
||
4079 | AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]); |
||
4080 | ref_cache[0 - 1 * 8] = |
||
4081 | ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]]; |
||
4082 | ref_cache[2 - 1 * 8] = |
||
4083 | ref_cache[3 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 1]]; |
||
4084 | } else { |
||
4085 | AV_ZERO128(mv_dst - 1 * 8); |
||
4086 | AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
||
4087 | } |
||
4088 | |||
4089 | if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) { |
||
4090 | if (USES_LIST(left_type[LTOP], list)) { |
||
4091 | const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; |
||
4092 | const int b8_xy = 4 * left_xy[LTOP] + 1; |
||
4093 | int (*ref2frm)[64] =(void*)( h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2)); |
||
4094 | AV_COPY32(mv_dst - 1 + 0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]); |
||
4095 | AV_COPY32(mv_dst - 1 + 8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]); |
||
4096 | AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]); |
||
4097 | AV_COPY32(mv_dst - 1 + 24, h->cur_pic.motion_val[list][b_xy + b_stride * 3]); |
||
4098 | ref_cache[-1 + 0] = |
||
4099 | ref_cache[-1 + 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 0]]; |
||
4100 | ref_cache[-1 + 16] = |
||
4101 | ref_cache[-1 + 24] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 1]]; |
||
4102 | } else { |
||
4103 | AV_ZERO32(mv_dst - 1 + 0); |
||
4104 | AV_ZERO32(mv_dst - 1 + 8); |
||
4105 | AV_ZERO32(mv_dst - 1 + 16); |
||
4106 | AV_ZERO32(mv_dst - 1 + 24); |
||
4107 | ref_cache[-1 + 0] = |
||
4108 | ref_cache[-1 + 8] = |
||
4109 | ref_cache[-1 + 16] = |
||
4110 | ref_cache[-1 + 24] = LIST_NOT_USED; |
||
4111 | } |
||
4112 | } |
||
4113 | } |
||
4114 | |||
4115 | if (!USES_LIST(mb_type, list)) { |
||
4116 | fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4); |
||
4117 | AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
||
4118 | AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
||
4119 | AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
||
4120 | AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u); |
||
4121 | return; |
||
4122 | } |
||
4123 | |||
4124 | { |
||
4125 | int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy]; |
||
4126 | int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2)); |
||
4127 | uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101; |
||
4128 | uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101; |
||
4129 | AV_WN32A(&ref_cache[0 * 8], ref01); |
||
4130 | AV_WN32A(&ref_cache[1 * 8], ref01); |
||
4131 | AV_WN32A(&ref_cache[2 * 8], ref23); |
||
4132 | AV_WN32A(&ref_cache[3 * 8], ref23); |
||
4133 | } |
||
4134 | |||
4135 | { |
||
4136 | int16_t(*mv_src)[2] = &h->cur_pic.motion_val[list][4 * h->mb_x + 4 * h->mb_y * b_stride]; |
||
4137 | AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride); |
||
4138 | AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride); |
||
4139 | AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride); |
||
4140 | AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride); |
||
4141 | } |
||
4142 | } |
||
4143 | |||
4144 | /** |
||
4145 | * |
||
4146 | * @return non zero if the loop filter can be skipped |
||
4147 | */ |
||
4148 | static int fill_filter_caches(H264Context *h, int mb_type) |
||
4149 | { |
||
4150 | const int mb_xy = h->mb_xy; |
||
4151 | int top_xy, left_xy[LEFT_MBS]; |
||
4152 | int top_type, left_type[LEFT_MBS]; |
||
4153 | uint8_t *nnz; |
||
4154 | uint8_t *nnz_cache; |
||
4155 | |||
4156 | top_xy = mb_xy - (h->mb_stride << MB_FIELD(h)); |
||
4157 | |||
4158 | /* Wow, what a mess, why didn't they simplify the interlacing & intra |
||
4159 | * stuff, I can't imagine that these complex rules are worth it. */ |
||
4160 | |||
4161 | left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1; |
||
4162 | if (FRAME_MBAFF(h)) { |
||
4163 | const int left_mb_field_flag = IS_INTERLACED(h->cur_pic.mb_type[mb_xy - 1]); |
||
4164 | const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
||
4165 | if (h->mb_y & 1) { |
||
4166 | if (left_mb_field_flag != curr_mb_field_flag) |
||
4167 | left_xy[LTOP] -= h->mb_stride; |
||
4168 | } else { |
||
4169 | if (curr_mb_field_flag) |
||
4170 | top_xy += h->mb_stride & |
||
4171 | (((h->cur_pic.mb_type[top_xy] >> 7) & 1) - 1); |
||
4172 | if (left_mb_field_flag != curr_mb_field_flag) |
||
4173 | left_xy[LBOT] += h->mb_stride; |
||
4174 | } |
||
4175 | } |
||
4176 | |||
4177 | h->top_mb_xy = top_xy; |
||
4178 | h->left_mb_xy[LTOP] = left_xy[LTOP]; |
||
4179 | h->left_mb_xy[LBOT] = left_xy[LBOT]; |
||
4180 | { |
||
4181 | /* For sufficiently low qp, filtering wouldn't do anything. |
||
4182 | * This is a conservative estimate: could also check beta_offset |
||
4183 | * and more accurate chroma_qp. */ |
||
4184 | int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice |
||
4185 | int qp = h->cur_pic.qscale_table[mb_xy]; |
||
4186 | if (qp <= qp_thresh && |
||
4187 | (left_xy[LTOP] < 0 || |
||
4188 | ((qp + h->cur_pic.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) && |
||
4189 | (top_xy < 0 || |
||
4190 | ((qp + h->cur_pic.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) { |
||
4191 | if (!FRAME_MBAFF(h)) |
||
4192 | return 1; |
||
4193 | if ((left_xy[LTOP] < 0 || |
||
4194 | ((qp + h->cur_pic.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) && |
||
4195 | (top_xy < h->mb_stride || |
||
4196 | ((qp + h->cur_pic.qscale_table[top_xy - h->mb_stride] + 1) >> 1) <= qp_thresh)) |
||
4197 | return 1; |
||
4198 | } |
||
4199 | } |
||
4200 | |||
4201 | top_type = h->cur_pic.mb_type[top_xy]; |
||
4202 | left_type[LTOP] = h->cur_pic.mb_type[left_xy[LTOP]]; |
||
4203 | left_type[LBOT] = h->cur_pic.mb_type[left_xy[LBOT]]; |
||
4204 | if (h->deblocking_filter == 2) { |
||
4205 | if (h->slice_table[top_xy] != h->slice_num) |
||
4206 | top_type = 0; |
||
4207 | if (h->slice_table[left_xy[LBOT]] != h->slice_num) |
||
4208 | left_type[LTOP] = left_type[LBOT] = 0; |
||
4209 | } else { |
||
4210 | if (h->slice_table[top_xy] == 0xFFFF) |
||
4211 | top_type = 0; |
||
4212 | if (h->slice_table[left_xy[LBOT]] == 0xFFFF) |
||
4213 | left_type[LTOP] = left_type[LBOT] = 0; |
||
4214 | } |
||
4215 | h->top_type = top_type; |
||
4216 | h->left_type[LTOP] = left_type[LTOP]; |
||
4217 | h->left_type[LBOT] = left_type[LBOT]; |
||
4218 | |||
4219 | if (IS_INTRA(mb_type)) |
||
4220 | return 0; |
||
4221 | |||
4222 | fill_filter_caches_inter(h, mb_type, top_xy, left_xy, |
||
4223 | top_type, left_type, mb_xy, 0); |
||
4224 | if (h->list_count == 2) |
||
4225 | fill_filter_caches_inter(h, mb_type, top_xy, left_xy, |
||
4226 | top_type, left_type, mb_xy, 1); |
||
4227 | |||
4228 | nnz = h->non_zero_count[mb_xy]; |
||
4229 | nnz_cache = h->non_zero_count_cache; |
||
4230 | AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]); |
||
4231 | AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]); |
||
4232 | AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]); |
||
4233 | AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]); |
||
4234 | h->cbp = h->cbp_table[mb_xy]; |
||
4235 | |||
4236 | if (top_type) { |
||
4237 | nnz = h->non_zero_count[top_xy]; |
||
4238 | AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]); |
||
4239 | } |
||
4240 | |||
4241 | if (left_type[LTOP]) { |
||
4242 | nnz = h->non_zero_count[left_xy[LTOP]]; |
||
4243 | nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4]; |
||
4244 | nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4]; |
||
4245 | nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4]; |
||
4246 | nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4]; |
||
4247 | } |
||
4248 | |||
4249 | /* CAVLC 8x8dct requires NNZ values for residual decoding that differ |
||
4250 | * from what the loop filter needs */ |
||
4251 | if (!CABAC(h) && h->pps.transform_8x8_mode) { |
||
4252 | if (IS_8x8DCT(top_type)) { |
||
4253 | nnz_cache[4 + 8 * 0] = |
||
4254 | nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12; |
||
4255 | nnz_cache[6 + 8 * 0] = |
||
4256 | nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12; |
||
4257 | } |
||
4258 | if (IS_8x8DCT(left_type[LTOP])) { |
||
4259 | nnz_cache[3 + 8 * 1] = |
||
4260 | nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF |
||
4261 | } |
||
4262 | if (IS_8x8DCT(left_type[LBOT])) { |
||
4263 | nnz_cache[3 + 8 * 3] = |
||
4264 | nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF |
||
4265 | } |
||
4266 | |||
4267 | if (IS_8x8DCT(mb_type)) { |
||
4268 | nnz_cache[scan8[0]] = |
||
4269 | nnz_cache[scan8[1]] = |
||
4270 | nnz_cache[scan8[2]] = |
||
4271 | nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12; |
||
4272 | |||
4273 | nnz_cache[scan8[0 + 4]] = |
||
4274 | nnz_cache[scan8[1 + 4]] = |
||
4275 | nnz_cache[scan8[2 + 4]] = |
||
4276 | nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12; |
||
4277 | |||
4278 | nnz_cache[scan8[0 + 8]] = |
||
4279 | nnz_cache[scan8[1 + 8]] = |
||
4280 | nnz_cache[scan8[2 + 8]] = |
||
4281 | nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12; |
||
4282 | |||
4283 | nnz_cache[scan8[0 + 12]] = |
||
4284 | nnz_cache[scan8[1 + 12]] = |
||
4285 | nnz_cache[scan8[2 + 12]] = |
||
4286 | nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12; |
||
4287 | } |
||
4288 | } |
||
4289 | |||
4290 | return 0; |
||
4291 | } |
||
4292 | |||
4293 | static void loop_filter(H264Context *h, int start_x, int end_x) |
||
4294 | { |
||
4295 | uint8_t *dest_y, *dest_cb, *dest_cr; |
||
4296 | int linesize, uvlinesize, mb_x, mb_y; |
||
4297 | const int end_mb_y = h->mb_y + FRAME_MBAFF(h); |
||
4298 | const int old_slice_type = h->slice_type; |
||
4299 | const int pixel_shift = h->pixel_shift; |
||
4300 | const int block_h = 16 >> h->chroma_y_shift; |
||
4301 | |||
4302 | if (h->deblocking_filter) { |
||
4303 | for (mb_x = start_x; mb_x < end_x; mb_x++) |
||
4304 | for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) { |
||
4305 | int mb_xy, mb_type; |
||
4306 | mb_xy = h->mb_xy = mb_x + mb_y * h->mb_stride; |
||
4307 | h->slice_num = h->slice_table[mb_xy]; |
||
4308 | mb_type = h->cur_pic.mb_type[mb_xy]; |
||
4309 | h->list_count = h->list_counts[mb_xy]; |
||
4310 | |||
4311 | if (FRAME_MBAFF(h)) |
||
4312 | h->mb_mbaff = |
||
4313 | h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); |
||
4314 | |||
4315 | h->mb_x = mb_x; |
||
4316 | h->mb_y = mb_y; |
||
4317 | dest_y = h->cur_pic.f.data[0] + |
||
4318 | ((mb_x << pixel_shift) + mb_y * h->linesize) * 16; |
||
4319 | dest_cb = h->cur_pic.f.data[1] + |
||
4320 | (mb_x << pixel_shift) * (8 << CHROMA444(h)) + |
||
4321 | mb_y * h->uvlinesize * block_h; |
||
4322 | dest_cr = h->cur_pic.f.data[2] + |
||
4323 | (mb_x << pixel_shift) * (8 << CHROMA444(h)) + |
||
4324 | mb_y * h->uvlinesize * block_h; |
||
4325 | // FIXME simplify above |
||
4326 | |||
4327 | if (MB_FIELD(h)) { |
||
4328 | linesize = h->mb_linesize = h->linesize * 2; |
||
4329 | uvlinesize = h->mb_uvlinesize = h->uvlinesize * 2; |
||
4330 | if (mb_y & 1) { // FIXME move out of this function? |
||
4331 | dest_y -= h->linesize * 15; |
||
4332 | dest_cb -= h->uvlinesize * (block_h - 1); |
||
4333 | dest_cr -= h->uvlinesize * (block_h - 1); |
||
4334 | } |
||
4335 | } else { |
||
4336 | linesize = h->mb_linesize = h->linesize; |
||
4337 | uvlinesize = h->mb_uvlinesize = h->uvlinesize; |
||
4338 | } |
||
4339 | backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, |
||
4340 | uvlinesize, 0); |
||
4341 | if (fill_filter_caches(h, mb_type)) |
||
4342 | continue; |
||
4343 | h->chroma_qp[0] = get_chroma_qp(h, 0, h->cur_pic.qscale_table[mb_xy]); |
||
4344 | h->chroma_qp[1] = get_chroma_qp(h, 1, h->cur_pic.qscale_table[mb_xy]); |
||
4345 | |||
4346 | if (FRAME_MBAFF(h)) { |
||
4347 | ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, |
||
4348 | linesize, uvlinesize); |
||
4349 | } else { |
||
4350 | ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, |
||
4351 | dest_cr, linesize, uvlinesize); |
||
4352 | } |
||
4353 | } |
||
4354 | } |
||
4355 | h->slice_type = old_slice_type; |
||
4356 | h->mb_x = end_x; |
||
4357 | h->mb_y = end_mb_y - FRAME_MBAFF(h); |
||
4358 | h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale); |
||
4359 | h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale); |
||
4360 | } |
||
4361 | |||
4362 | static void predict_field_decoding_flag(H264Context *h) |
||
4363 | { |
||
4364 | const int mb_xy = h->mb_x + h->mb_y * h->mb_stride; |
||
4365 | int mb_type = (h->slice_table[mb_xy - 1] == h->slice_num) ? |
||
4366 | h->cur_pic.mb_type[mb_xy - 1] : |
||
4367 | (h->slice_table[mb_xy - h->mb_stride] == h->slice_num) ? |
||
4368 | h->cur_pic.mb_type[mb_xy - h->mb_stride] : 0; |
||
4369 | h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; |
||
4370 | } |
||
4371 | |||
4372 | /** |
||
4373 | * Draw edges and report progress for the last MB row. |
||
4374 | */ |
||
4375 | static void decode_finish_row(H264Context *h) |
||
4376 | { |
||
4377 | int top = 16 * (h->mb_y >> FIELD_PICTURE(h)); |
||
4378 | int pic_height = 16 * h->mb_height >> FIELD_PICTURE(h); |
||
4379 | int height = 16 << FRAME_MBAFF(h); |
||
4380 | int deblock_border = (16 + 4) << FRAME_MBAFF(h); |
||
4381 | |||
4382 | if (h->deblocking_filter) { |
||
4383 | if ((top + height) >= pic_height) |
||
4384 | height += deblock_border; |
||
4385 | top -= deblock_border; |
||
4386 | } |
||
4387 | |||
4388 | if (top >= pic_height || (top + height) < 0) |
||
4389 | return; |
||
4390 | |||
4391 | height = FFMIN(height, pic_height - top); |
||
4392 | if (top < 0) { |
||
4393 | height = top + height; |
||
4394 | top = 0; |
||
4395 | } |
||
4396 | |||
4397 | ff_h264_draw_horiz_band(h, top, height); |
||
4398 | |||
4399 | if (h->droppable || h->er.error_occurred) |
||
4400 | return; |
||
4401 | |||
4402 | ff_thread_report_progress(&h->cur_pic_ptr->tf, top + height - 1, |
||
4403 | h->picture_structure == PICT_BOTTOM_FIELD); |
||
4404 | } |
||
4405 | |||
4406 | static void er_add_slice(H264Context *h, int startx, int starty, |
||
4407 | int endx, int endy, int status) |
||
4408 | { |
||
4409 | if (CONFIG_ERROR_RESILIENCE) { |
||
4410 | ERContext *er = &h->er; |
||
4411 | |||
4412 | ff_er_add_slice(er, startx, starty, endx, endy, status); |
||
4413 | } |
||
4414 | } |
||
4415 | |||
4416 | static int decode_slice(struct AVCodecContext *avctx, void *arg) |
||
4417 | { |
||
4418 | H264Context *h = *(void **)arg; |
||
4419 | int lf_x_start = h->mb_x; |
||
4420 | |||
4421 | h->mb_skip_run = -1; |
||
4422 | |||
4423 | av_assert0(h->block_offset[15] == (4 * ((scan8[15] - scan8[0]) & 7) << h->pixel_shift) + 4 * h->linesize * ((scan8[15] - scan8[0]) >> 3)); |
||
4424 | |||
4425 | h->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME || |
||
4426 | avctx->codec_id != AV_CODEC_ID_H264 || |
||
4427 | (CONFIG_GRAY && (h->flags & CODEC_FLAG_GRAY)); |
||
4428 | |||
4429 | if (!(h->avctx->active_thread_type & FF_THREAD_SLICE) && h->picture_structure == PICT_FRAME && h->er.error_status_table) { |
||
4430 | const int start_i = av_clip(h->resync_mb_x + h->resync_mb_y * h->mb_width, 0, h->mb_num - 1); |
||
4431 | if (start_i) { |
||
4432 | int prev_status = h->er.error_status_table[h->er.mb_index2xy[start_i - 1]]; |
||
4433 | prev_status &= ~ VP_START; |
||
4434 | if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END)) |
||
4435 | h->er.error_occurred = 1; |
||
4436 | } |
||
4437 | } |
||
4438 | |||
4439 | if (h->pps.cabac) { |
||
4440 | /* realign */ |
||
4441 | align_get_bits(&h->gb); |
||
4442 | |||
4443 | /* init cabac */ |
||
4444 | ff_init_cabac_decoder(&h->cabac, |
||
4445 | h->gb.buffer + get_bits_count(&h->gb) / 8, |
||
4446 | (get_bits_left(&h->gb) + 7) / 8); |
||
4447 | |||
4448 | ff_h264_init_cabac_states(h); |
||
4449 | |||
4450 | for (;;) { |
||
4451 | // START_TIMER |
||
4452 | int ret = ff_h264_decode_mb_cabac(h); |
||
4453 | int eos; |
||
4454 | // STOP_TIMER("decode_mb_cabac") |
||
4455 | |||
4456 | if (ret >= 0) |
||
4457 | ff_h264_hl_decode_mb(h); |
||
4458 | |||
4459 | // FIXME optimal? or let mb_decode decode 16x32 ? |
||
4460 | if (ret >= 0 && FRAME_MBAFF(h)) { |
||
4461 | h->mb_y++; |
||
4462 | |||
4463 | ret = ff_h264_decode_mb_cabac(h); |
||
4464 | |||
4465 | if (ret >= 0) |
||
4466 | ff_h264_hl_decode_mb(h); |
||
4467 | h->mb_y--; |
||
4468 | } |
||
4469 | eos = get_cabac_terminate(&h->cabac); |
||
4470 | |||
4471 | if ((h->workaround_bugs & FF_BUG_TRUNCATED) && |
||
4472 | h->cabac.bytestream > h->cabac.bytestream_end + 2) { |
||
4473 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1, |
||
4474 | h->mb_y, ER_MB_END); |
||
4475 | if (h->mb_x >= lf_x_start) |
||
4476 | loop_filter(h, lf_x_start, h->mb_x + 1); |
||
4477 | return 0; |
||
4478 | } |
||
4479 | if (h->cabac.bytestream > h->cabac.bytestream_end + 2 ) |
||
4480 | av_log(h->avctx, AV_LOG_DEBUG, "bytestream overread %td\n", h->cabac.bytestream_end - h->cabac.bytestream); |
||
4481 | if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 4) { |
||
4482 | av_log(h->avctx, AV_LOG_ERROR, |
||
4483 | "error while decoding MB %d %d, bytestream (%td)\n", |
||
4484 | h->mb_x, h->mb_y, |
||
4485 | h->cabac.bytestream_end - h->cabac.bytestream); |
||
4486 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x, |
||
4487 | h->mb_y, ER_MB_ERROR); |
||
4488 | return AVERROR_INVALIDDATA; |
||
4489 | } |
||
4490 | |||
4491 | if (++h->mb_x >= h->mb_width) { |
||
4492 | loop_filter(h, lf_x_start, h->mb_x); |
||
4493 | h->mb_x = lf_x_start = 0; |
||
4494 | decode_finish_row(h); |
||
4495 | ++h->mb_y; |
||
4496 | if (FIELD_OR_MBAFF_PICTURE(h)) { |
||
4497 | ++h->mb_y; |
||
4498 | if (FRAME_MBAFF(h) && h->mb_y < h->mb_height) |
||
4499 | predict_field_decoding_flag(h); |
||
4500 | } |
||
4501 | } |
||
4502 | |||
4503 | if (eos || h->mb_y >= h->mb_height) { |
||
4504 | tprintf(h->avctx, "slice end %d %d\n", |
||
4505 | get_bits_count(&h->gb), h->gb.size_in_bits); |
||
4506 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1, |
||
4507 | h->mb_y, ER_MB_END); |
||
4508 | if (h->mb_x > lf_x_start) |
||
4509 | loop_filter(h, lf_x_start, h->mb_x); |
||
4510 | return 0; |
||
4511 | } |
||
4512 | } |
||
4513 | } else { |
||
4514 | for (;;) { |
||
4515 | int ret = ff_h264_decode_mb_cavlc(h); |
||
4516 | |||
4517 | if (ret >= 0) |
||
4518 | ff_h264_hl_decode_mb(h); |
||
4519 | |||
4520 | // FIXME optimal? or let mb_decode decode 16x32 ? |
||
4521 | if (ret >= 0 && FRAME_MBAFF(h)) { |
||
4522 | h->mb_y++; |
||
4523 | ret = ff_h264_decode_mb_cavlc(h); |
||
4524 | |||
4525 | if (ret >= 0) |
||
4526 | ff_h264_hl_decode_mb(h); |
||
4527 | h->mb_y--; |
||
4528 | } |
||
4529 | |||
4530 | if (ret < 0) { |
||
4531 | av_log(h->avctx, AV_LOG_ERROR, |
||
4532 | "error while decoding MB %d %d\n", h->mb_x, h->mb_y); |
||
4533 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x, |
||
4534 | h->mb_y, ER_MB_ERROR); |
||
4535 | return ret; |
||
4536 | } |
||
4537 | |||
4538 | if (++h->mb_x >= h->mb_width) { |
||
4539 | loop_filter(h, lf_x_start, h->mb_x); |
||
4540 | h->mb_x = lf_x_start = 0; |
||
4541 | decode_finish_row(h); |
||
4542 | ++h->mb_y; |
||
4543 | if (FIELD_OR_MBAFF_PICTURE(h)) { |
||
4544 | ++h->mb_y; |
||
4545 | if (FRAME_MBAFF(h) && h->mb_y < h->mb_height) |
||
4546 | predict_field_decoding_flag(h); |
||
4547 | } |
||
4548 | if (h->mb_y >= h->mb_height) { |
||
4549 | tprintf(h->avctx, "slice end %d %d\n", |
||
4550 | get_bits_count(&h->gb), h->gb.size_in_bits); |
||
4551 | |||
4552 | if ( get_bits_left(&h->gb) == 0 |
||
4553 | || get_bits_left(&h->gb) > 0 && !(h->avctx->err_recognition & AV_EF_AGGRESSIVE)) { |
||
4554 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, |
||
4555 | h->mb_x - 1, h->mb_y, |
||
4556 | ER_MB_END); |
||
4557 | |||
4558 | return 0; |
||
4559 | } else { |
||
4560 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, |
||
4561 | h->mb_x, h->mb_y, |
||
4562 | ER_MB_END); |
||
4563 | |||
4564 | return AVERROR_INVALIDDATA; |
||
4565 | } |
||
4566 | } |
||
4567 | } |
||
4568 | |||
4569 | if (get_bits_left(&h->gb) <= 0 && h->mb_skip_run <= 0) { |
||
4570 | tprintf(h->avctx, "slice end %d %d\n", |
||
4571 | get_bits_count(&h->gb), h->gb.size_in_bits); |
||
4572 | |||
4573 | if (get_bits_left(&h->gb) == 0) { |
||
4574 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, |
||
4575 | h->mb_x - 1, h->mb_y, |
||
4576 | ER_MB_END); |
||
4577 | if (h->mb_x > lf_x_start) |
||
4578 | loop_filter(h, lf_x_start, h->mb_x); |
||
4579 | |||
4580 | return 0; |
||
4581 | } else { |
||
4582 | er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x, |
||
4583 | h->mb_y, ER_MB_ERROR); |
||
4584 | |||
4585 | return AVERROR_INVALIDDATA; |
||
4586 | } |
||
4587 | } |
||
4588 | } |
||
4589 | } |
||
4590 | } |
||
4591 | |||
4592 | /** |
||
4593 | * Call decode_slice() for each context. |
||
4594 | * |
||
4595 | * @param h h264 master context |
||
4596 | * @param context_count number of contexts to execute |
||
4597 | */ |
||
4598 | static int execute_decode_slices(H264Context *h, int context_count) |
||
4599 | { |
||
4600 | AVCodecContext *const avctx = h->avctx; |
||
4601 | H264Context *hx; |
||
4602 | int i; |
||
4603 | |||
4604 | if (h->avctx->hwaccel || |
||
4605 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
||
4606 | return 0; |
||
4607 | if (context_count == 1) { |
||
4608 | return decode_slice(avctx, &h); |
||
4609 | } else { |
||
4610 | av_assert0(context_count > 0); |
||
4611 | for (i = 1; i < context_count; i++) { |
||
4612 | hx = h->thread_context[i]; |
||
4613 | if (CONFIG_ERROR_RESILIENCE) { |
||
4614 | hx->er.error_count = 0; |
||
4615 | } |
||
4616 | hx->x264_build = h->x264_build; |
||
4617 | } |
||
4618 | |||
4619 | avctx->execute(avctx, decode_slice, h->thread_context, |
||
4620 | NULL, context_count, sizeof(void *)); |
||
4621 | |||
4622 | /* pull back stuff from slices to master context */ |
||
4623 | hx = h->thread_context[context_count - 1]; |
||
4624 | h->mb_x = hx->mb_x; |
||
4625 | h->mb_y = hx->mb_y; |
||
4626 | h->droppable = hx->droppable; |
||
4627 | h->picture_structure = hx->picture_structure; |
||
4628 | if (CONFIG_ERROR_RESILIENCE) { |
||
4629 | for (i = 1; i < context_count; i++) |
||
4630 | h->er.error_count += h->thread_context[i]->er.error_count; |
||
4631 | } |
||
4632 | } |
||
4633 | |||
4634 | return 0; |
||
4635 | } |
||
4636 | |||
4637 | static const uint8_t start_code[] = { 0x00, 0x00, 0x01 }; |
||
4638 | |||
4639 | static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, |
||
4640 | int parse_extradata) |
||
4641 | { |
||
4642 | AVCodecContext *const avctx = h->avctx; |
||
4643 | H264Context *hx; ///< thread context |
||
4644 | int buf_index; |
||
4645 | int context_count; |
||
4646 | int next_avc; |
||
4647 | int pass = !(avctx->active_thread_type & FF_THREAD_FRAME); |
||
4648 | int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts |
||
4649 | int nal_index; |
||
4650 | int idr_cleared=0; |
||
4651 | int first_slice = 0; |
||
4652 | int ret = 0; |
||
4653 | |||
4654 | h->nal_unit_type= 0; |
||
4655 | |||
4656 | if(!h->slice_context_count) |
||
4657 | h->slice_context_count= 1; |
||
4658 | h->max_contexts = h->slice_context_count; |
||
4659 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) { |
||
4660 | h->current_slice = 0; |
||
4661 | if (!h->first_field) |
||
4662 | h->cur_pic_ptr = NULL; |
||
4663 | ff_h264_reset_sei(h); |
||
4664 | } |
||
4665 | |||
4666 | if (h->nal_length_size == 4) { |
||
4667 | if (buf_size > 8 && AV_RB32(buf) == 1 && AV_RB32(buf+5) > (unsigned)buf_size) { |
||
4668 | h->is_avc = 0; |
||
4669 | }else if(buf_size > 3 && AV_RB32(buf) > 1 && AV_RB32(buf) <= (unsigned)buf_size) |
||
4670 | h->is_avc = 1; |
||
4671 | } |
||
4672 | |||
4673 | for (; pass <= 1; pass++) { |
||
4674 | buf_index = 0; |
||
4675 | context_count = 0; |
||
4676 | next_avc = h->is_avc ? 0 : buf_size; |
||
4677 | nal_index = 0; |
||
4678 | for (;;) { |
||
4679 | int consumed; |
||
4680 | int dst_length; |
||
4681 | int bit_length; |
||
4682 | const uint8_t *ptr; |
||
4683 | int i, nalsize = 0; |
||
4684 | int err; |
||
4685 | |||
4686 | if (buf_index >= next_avc) { |
||
4687 | if (buf_index >= buf_size - h->nal_length_size) |
||
4688 | break; |
||
4689 | nalsize = 0; |
||
4690 | for (i = 0; i < h->nal_length_size; i++) |
||
4691 | nalsize = (nalsize << 8) | buf[buf_index++]; |
||
4692 | if (nalsize <= 0 || nalsize > buf_size - buf_index) { |
||
4693 | av_log(h->avctx, AV_LOG_ERROR, |
||
4694 | "AVC: nal size %d\n", nalsize); |
||
4695 | break; |
||
4696 | } |
||
4697 | next_avc = buf_index + nalsize; |
||
4698 | } else { |
||
4699 | // start code prefix search |
||
4700 | for (; buf_index + 3 < next_avc; buf_index++) |
||
4701 | // This should always succeed in the first iteration. |
||
4702 | if (buf[buf_index] == 0 && |
||
4703 | buf[buf_index + 1] == 0 && |
||
4704 | buf[buf_index + 2] == 1) |
||
4705 | break; |
||
4706 | |||
4707 | if (buf_index + 3 >= buf_size) { |
||
4708 | buf_index = buf_size; |
||
4709 | break; |
||
4710 | } |
||
4711 | |||
4712 | buf_index += 3; |
||
4713 | if (buf_index >= next_avc) |
||
4714 | continue; |
||
4715 | } |
||
4716 | |||
4717 | hx = h->thread_context[context_count]; |
||
4718 | |||
4719 | ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length, |
||
4720 | &consumed, next_avc - buf_index); |
||
4721 | if (ptr == NULL || dst_length < 0) { |
||
4722 | ret = -1; |
||
4723 | goto end; |
||
4724 | } |
||
4725 | i = buf_index + consumed; |
||
4726 | if ((h->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc && |
||
4727 | buf[i] == 0x00 && buf[i + 1] == 0x00 && |
||
4728 | buf[i + 2] == 0x01 && buf[i + 3] == 0xE0) |
||
4729 | h->workaround_bugs |= FF_BUG_TRUNCATED; |
||
4730 | |||
4731 | if (!(h->workaround_bugs & FF_BUG_TRUNCATED)) |
||
4732 | while(dst_length > 0 && ptr[dst_length - 1] == 0) |
||
4733 | dst_length--; |
||
4734 | bit_length = !dst_length ? 0 |
||
4735 | : (8 * dst_length - |
||
4736 | decode_rbsp_trailing(h, ptr + dst_length - 1)); |
||
4737 | |||
4738 | if (h->avctx->debug & FF_DEBUG_STARTCODE) |
||
4739 | av_log(h->avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d pass %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length, pass); |
||
4740 | |||
4741 | if (h->is_avc && (nalsize != consumed) && nalsize) |
||
4742 | av_log(h->avctx, AV_LOG_DEBUG, |
||
4743 | "AVC: Consumed only %d bytes instead of %d\n", |
||
4744 | consumed, nalsize); |
||
4745 | |||
4746 | buf_index += consumed; |
||
4747 | nal_index++; |
||
4748 | |||
4749 | if (pass == 0) { |
||
4750 | /* packets can sometimes contain multiple PPS/SPS, |
||
4751 | * e.g. two PAFF field pictures in one packet, or a demuxer |
||
4752 | * which splits NALs strangely if so, when frame threading we |
||
4753 | * can't start the next thread until we've read all of them */ |
||
4754 | switch (hx->nal_unit_type) { |
||
4755 | case NAL_SPS: |
||
4756 | case NAL_PPS: |
||
4757 | nals_needed = nal_index; |
||
4758 | break; |
||
4759 | case NAL_DPA: |
||
4760 | case NAL_IDR_SLICE: |
||
4761 | case NAL_SLICE: |
||
4762 | init_get_bits(&hx->gb, ptr, bit_length); |
||
4763 | if (!get_ue_golomb(&hx->gb) || !first_slice) |
||
4764 | nals_needed = nal_index; |
||
4765 | if (!first_slice) |
||
4766 | first_slice = hx->nal_unit_type; |
||
4767 | } |
||
4768 | continue; |
||
4769 | } |
||
4770 | |||
4771 | if (!first_slice) |
||
4772 | switch (hx->nal_unit_type) { |
||
4773 | case NAL_DPA: |
||
4774 | case NAL_IDR_SLICE: |
||
4775 | case NAL_SLICE: |
||
4776 | first_slice = hx->nal_unit_type; |
||
4777 | } |
||
4778 | |||
4779 | if (avctx->skip_frame >= AVDISCARD_NONREF && |
||
4780 | h->nal_ref_idc == 0 && |
||
4781 | h->nal_unit_type != NAL_SEI) |
||
4782 | continue; |
||
4783 | |||
4784 | again: |
||
4785 | /* Ignore per frame NAL unit type during extradata |
||
4786 | * parsing. Decoding slices is not possible in codec init |
||
4787 | * with frame-mt */ |
||
4788 | if (parse_extradata) { |
||
4789 | switch (hx->nal_unit_type) { |
||
4790 | case NAL_IDR_SLICE: |
||
4791 | case NAL_SLICE: |
||
4792 | case NAL_DPA: |
||
4793 | case NAL_DPB: |
||
4794 | case NAL_DPC: |
||
4795 | av_log(h->avctx, AV_LOG_WARNING, |
||
4796 | "Ignoring NAL %d in global header/extradata\n", |
||
4797 | hx->nal_unit_type); |
||
4798 | // fall through to next case |
||
4799 | case NAL_AUXILIARY_SLICE: |
||
4800 | hx->nal_unit_type = NAL_FF_IGNORE; |
||
4801 | } |
||
4802 | } |
||
4803 | |||
4804 | err = 0; |
||
4805 | |||
4806 | switch (hx->nal_unit_type) { |
||
4807 | case NAL_IDR_SLICE: |
||
4808 | if (first_slice != NAL_IDR_SLICE) { |
||
4809 | av_log(h->avctx, AV_LOG_ERROR, |
||
4810 | "Invalid mix of idr and non-idr slices\n"); |
||
4811 | ret = -1; |
||
4812 | goto end; |
||
4813 | } |
||
4814 | if(!idr_cleared) |
||
4815 | idr(h); // FIXME ensure we don't lose some frames if there is reordering |
||
4816 | idr_cleared = 1; |
||
4817 | case NAL_SLICE: |
||
4818 | init_get_bits(&hx->gb, ptr, bit_length); |
||
4819 | hx->intra_gb_ptr = |
||
4820 | hx->inter_gb_ptr = &hx->gb; |
||
4821 | hx->data_partitioning = 0; |
||
4822 | |||
4823 | if ((err = decode_slice_header(hx, h))) |
||
4824 | break; |
||
4825 | |||
4826 | if (h->sei_recovery_frame_cnt >= 0 && (h->frame_num != h->sei_recovery_frame_cnt || hx->slice_type_nos != AV_PICTURE_TYPE_I)) |
||
4827 | h->valid_recovery_point = 1; |
||
4828 | |||
4829 | if ( h->sei_recovery_frame_cnt >= 0 |
||
4830 | && ( h->recovery_frame<0 |
||
4831 | || ((h->recovery_frame - h->frame_num) & ((1 << h->sps.log2_max_frame_num)-1)) > h->sei_recovery_frame_cnt)) { |
||
4832 | h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) % |
||
4833 | (1 << h->sps.log2_max_frame_num); |
||
4834 | |||
4835 | if (!h->valid_recovery_point) |
||
4836 | h->recovery_frame = h->frame_num; |
||
4837 | } |
||
4838 | |||
4839 | h->cur_pic_ptr->f.key_frame |= |
||
4840 | (hx->nal_unit_type == NAL_IDR_SLICE); |
||
4841 | |||
4842 | if (h->recovery_frame == h->frame_num) { |
||
4843 | h->cur_pic_ptr->sync |= 1; |
||
4844 | h->recovery_frame = -1; |
||
4845 | } |
||
4846 | |||
4847 | h->sync |= !!h->cur_pic_ptr->f.key_frame; |
||
4848 | h->sync |= 3*!!(avctx->flags2 & CODEC_FLAG2_SHOW_ALL); |
||
4849 | h->cur_pic_ptr->sync |= h->sync; |
||
4850 | |||
4851 | if (h->current_slice == 1) { |
||
4852 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) |
||
4853 | decode_postinit(h, nal_index >= nals_needed); |
||
4854 | |||
4855 | if (h->avctx->hwaccel && |
||
4856 | (ret = h->avctx->hwaccel->start_frame(h->avctx, NULL, 0)) < 0) |
||
4857 | return ret; |
||
4858 | if (CONFIG_H264_VDPAU_DECODER && |
||
4859 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) |
||
4860 | ff_vdpau_h264_picture_start(h); |
||
4861 | } |
||
4862 | |||
4863 | if (hx->redundant_pic_count == 0 && |
||
4864 | (avctx->skip_frame < AVDISCARD_NONREF || |
||
4865 | hx->nal_ref_idc) && |
||
4866 | (avctx->skip_frame < AVDISCARD_BIDIR || |
||
4867 | hx->slice_type_nos != AV_PICTURE_TYPE_B) && |
||
4868 | (avctx->skip_frame < AVDISCARD_NONKEY || |
||
4869 | hx->slice_type_nos == AV_PICTURE_TYPE_I) && |
||
4870 | avctx->skip_frame < AVDISCARD_ALL) { |
||
4871 | if (avctx->hwaccel) { |
||
4872 | ret = avctx->hwaccel->decode_slice(avctx, |
||
4873 | &buf[buf_index - consumed], |
||
4874 | consumed); |
||
4875 | if (ret < 0) |
||
4876 | return ret; |
||
4877 | } else if (CONFIG_H264_VDPAU_DECODER && |
||
4878 | h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) { |
||
4879 | ff_vdpau_add_data_chunk(h->cur_pic_ptr->f.data[0], |
||
4880 | start_code, |
||
4881 | sizeof(start_code)); |
||
4882 | ff_vdpau_add_data_chunk(h->cur_pic_ptr->f.data[0], |
||
4883 | &buf[buf_index - consumed], |
||
4884 | consumed); |
||
4885 | } else |
||
4886 | context_count++; |
||
4887 | } |
||
4888 | break; |
||
4889 | case NAL_DPA: |
||
4890 | init_get_bits(&hx->gb, ptr, bit_length); |
||
4891 | hx->intra_gb_ptr = |
||
4892 | hx->inter_gb_ptr = NULL; |
||
4893 | |||
4894 | if ((err = decode_slice_header(hx, h)) < 0) |
||
4895 | break; |
||
4896 | |||
4897 | hx->data_partitioning = 1; |
||
4898 | break; |
||
4899 | case NAL_DPB: |
||
4900 | init_get_bits(&hx->intra_gb, ptr, bit_length); |
||
4901 | hx->intra_gb_ptr = &hx->intra_gb; |
||
4902 | break; |
||
4903 | case NAL_DPC: |
||
4904 | init_get_bits(&hx->inter_gb, ptr, bit_length); |
||
4905 | hx->inter_gb_ptr = &hx->inter_gb; |
||
4906 | |||
4907 | av_log(h->avctx, AV_LOG_ERROR, "Partitioned H.264 support is incomplete\n"); |
||
4908 | break; |
||
4909 | |||
4910 | if (hx->redundant_pic_count == 0 && |
||
4911 | hx->intra_gb_ptr && |
||
4912 | hx->data_partitioning && |
||
4913 | h->cur_pic_ptr && h->context_initialized && |
||
4914 | (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) && |
||
4915 | (avctx->skip_frame < AVDISCARD_BIDIR || |
||
4916 | hx->slice_type_nos != AV_PICTURE_TYPE_B) && |
||
4917 | (avctx->skip_frame < AVDISCARD_NONKEY || |
||
4918 | hx->slice_type_nos == AV_PICTURE_TYPE_I) && |
||
4919 | avctx->skip_frame < AVDISCARD_ALL) |
||
4920 | context_count++; |
||
4921 | break; |
||
4922 | case NAL_SEI: |
||
4923 | init_get_bits(&h->gb, ptr, bit_length); |
||
4924 | ff_h264_decode_sei(h); |
||
4925 | break; |
||
4926 | case NAL_SPS: |
||
4927 | init_get_bits(&h->gb, ptr, bit_length); |
||
4928 | if (ff_h264_decode_seq_parameter_set(h) < 0 && (h->is_avc ? nalsize : 1)) { |
||
4929 | av_log(h->avctx, AV_LOG_DEBUG, |
||
4930 | "SPS decoding failure, trying again with the complete NAL\n"); |
||
4931 | if (h->is_avc) |
||
4932 | av_assert0(next_avc - buf_index + consumed == nalsize); |
||
4933 | if ((next_avc - buf_index + consumed - 1) >= INT_MAX/8) |
||
4934 | break; |
||
4935 | init_get_bits(&h->gb, &buf[buf_index + 1 - consumed], |
||
4936 | 8*(next_avc - buf_index + consumed - 1)); |
||
4937 | ff_h264_decode_seq_parameter_set(h); |
||
4938 | } |
||
4939 | |||
4940 | break; |
||
4941 | case NAL_PPS: |
||
4942 | init_get_bits(&h->gb, ptr, bit_length); |
||
4943 | ff_h264_decode_picture_parameter_set(h, bit_length); |
||
4944 | break; |
||
4945 | case NAL_AUD: |
||
4946 | case NAL_END_SEQUENCE: |
||
4947 | case NAL_END_STREAM: |
||
4948 | case NAL_FILLER_DATA: |
||
4949 | case NAL_SPS_EXT: |
||
4950 | case NAL_AUXILIARY_SLICE: |
||
4951 | break; |
||
4952 | case NAL_FF_IGNORE: |
||
4953 | break; |
||
4954 | default: |
||
4955 | av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", |
||
4956 | hx->nal_unit_type, bit_length); |
||
4957 | } |
||
4958 | |||
4959 | if (context_count == h->max_contexts) { |
||
4960 | execute_decode_slices(h, context_count); |
||
4961 | context_count = 0; |
||
4962 | } |
||
4963 | |||
4964 | if (err < 0) |
||
4965 | av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n"); |
||
4966 | else if (err == 1) { |
||
4967 | /* Slice could not be decoded in parallel mode, copy down |
||
4968 | * NAL unit stuff to context 0 and restart. Note that |
||
4969 | * rbsp_buffer is not transferred, but since we no longer |
||
4970 | * run in parallel mode this should not be an issue. */ |
||
4971 | h->nal_unit_type = hx->nal_unit_type; |
||
4972 | h->nal_ref_idc = hx->nal_ref_idc; |
||
4973 | hx = h; |
||
4974 | goto again; |
||
4975 | } |
||
4976 | } |
||
4977 | } |
||
4978 | if (context_count) |
||
4979 | execute_decode_slices(h, context_count); |
||
4980 | |||
4981 | end: |
||
4982 | /* clean up */ |
||
4983 | if (h->cur_pic_ptr && !h->droppable) { |
||
4984 | ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, |
||
4985 | h->picture_structure == PICT_BOTTOM_FIELD); |
||
4986 | } |
||
4987 | |||
4988 | return (ret < 0) ? ret : buf_index; |
||
4989 | } |
||
4990 | |||
4991 | /** |
||
4992 | * Return the number of bytes consumed for building the current frame. |
||
4993 | */ |
||
4994 | static int get_consumed_bytes(int pos, int buf_size) |
||
4995 | { |
||
4996 | if (pos == 0) |
||
4997 | pos = 1; // avoid infinite loops (i doubt that is needed but ...) |
||
4998 | if (pos + 10 > buf_size) |
||
4999 | pos = buf_size; // oops ;) |
||
5000 | |||
5001 | return pos; |
||
5002 | } |
||
5003 | |||
5004 | static int output_frame(H264Context *h, AVFrame *dst, Picture *srcp) |
||
5005 | { |
||
5006 | AVFrame *src = &srcp->f; |
||
5007 | int i; |
||
5008 | int ret = av_frame_ref(dst, src); |
||
5009 | if (ret < 0) |
||
5010 | return ret; |
||
5011 | |||
5012 | av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(h), 0); |
||
5013 | |||
5014 | if (!srcp->crop) |
||
5015 | return 0; |
||
5016 | |||
5017 | for (i = 0; i < 3; i++) { |
||
5018 | int hshift = (i > 0) ? h->chroma_x_shift : 0; |
||
5019 | int vshift = (i > 0) ? h->chroma_y_shift : 0; |
||
5020 | int off = ((srcp->crop_left >> hshift) << h->pixel_shift) + |
||
5021 | (srcp->crop_top >> vshift) * dst->linesize[i]; |
||
5022 | dst->data[i] += off; |
||
5023 | } |
||
5024 | return 0; |
||
5025 | } |
||
5026 | |||
5027 | static int decode_frame(AVCodecContext *avctx, void *data, |
||
5028 | int *got_frame, AVPacket *avpkt) |
||
5029 | { |
||
5030 | const uint8_t *buf = avpkt->data; |
||
5031 | int buf_size = avpkt->size; |
||
5032 | H264Context *h = avctx->priv_data; |
||
5033 | AVFrame *pict = data; |
||
5034 | int buf_index = 0; |
||
5035 | Picture *out; |
||
5036 | int i, out_idx; |
||
5037 | int ret; |
||
5038 | |||
5039 | h->flags = avctx->flags; |
||
5040 | |||
5041 | /* end of stream, output what is still in the buffers */ |
||
5042 | if (buf_size == 0) { |
||
5043 | out: |
||
5044 | |||
5045 | h->cur_pic_ptr = NULL; |
||
5046 | h->first_field = 0; |
||
5047 | |||
5048 | // FIXME factorize this with the output code below |
||
5049 | out = h->delayed_pic[0]; |
||
5050 | out_idx = 0; |
||
5051 | for (i = 1; |
||
5052 | h->delayed_pic[i] && |
||
5053 | !h->delayed_pic[i]->f.key_frame && |
||
5054 | !h->delayed_pic[i]->mmco_reset; |
||
5055 | i++) |
||
5056 | if (h->delayed_pic[i]->poc < out->poc) { |
||
5057 | out = h->delayed_pic[i]; |
||
5058 | out_idx = i; |
||
5059 | } |
||
5060 | |||
5061 | for (i = out_idx; h->delayed_pic[i]; i++) |
||
5062 | h->delayed_pic[i] = h->delayed_pic[i + 1]; |
||
5063 | |||
5064 | if (out) { |
||
5065 | out->reference &= ~DELAYED_PIC_REF; |
||
5066 | ret = output_frame(h, pict, out); |
||
5067 | if (ret < 0) |
||
5068 | return ret; |
||
5069 | *got_frame = 1; |
||
5070 | } |
||
5071 | |||
5072 | return buf_index; |
||
5073 | } |
||
5074 | if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){ |
||
5075 | int cnt= buf[5]&0x1f; |
||
5076 | const uint8_t *p= buf+6; |
||
5077 | while(cnt--){ |
||
5078 | int nalsize= AV_RB16(p) + 2; |
||
5079 | if(nalsize > buf_size - (p-buf) || p[2]!=0x67) |
||
5080 | goto not_extra; |
||
5081 | p += nalsize; |
||
5082 | } |
||
5083 | cnt = *(p++); |
||
5084 | if(!cnt) |
||
5085 | goto not_extra; |
||
5086 | while(cnt--){ |
||
5087 | int nalsize= AV_RB16(p) + 2; |
||
5088 | if(nalsize > buf_size - (p-buf) || p[2]!=0x68) |
||
5089 | goto not_extra; |
||
5090 | p += nalsize; |
||
5091 | } |
||
5092 | |||
5093 | return ff_h264_decode_extradata(h, buf, buf_size); |
||
5094 | } |
||
5095 | not_extra: |
||
5096 | |||
5097 | buf_index = decode_nal_units(h, buf, buf_size, 0); |
||
5098 | if (buf_index < 0) |
||
5099 | return AVERROR_INVALIDDATA; |
||
5100 | |||
5101 | if (!h->cur_pic_ptr && h->nal_unit_type == NAL_END_SEQUENCE) { |
||
5102 | av_assert0(buf_index <= buf_size); |
||
5103 | goto out; |
||
5104 | } |
||
5105 | |||
5106 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) { |
||
5107 | if (avctx->skip_frame >= AVDISCARD_NONREF || |
||
5108 | buf_size >= 4 && !memcmp("Q264", buf, 4)) |
||
5109 | return buf_size; |
||
5110 | av_log(avctx, AV_LOG_ERROR, "no frame!\n"); |
||
5111 | return AVERROR_INVALIDDATA; |
||
5112 | } |
||
5113 | |||
5114 | if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) || |
||
5115 | (h->mb_y >= h->mb_height && h->mb_height)) { |
||
5116 | if (avctx->flags2 & CODEC_FLAG2_CHUNKS) |
||
5117 | decode_postinit(h, 1); |
||
5118 | |||
5119 | field_end(h, 0); |
||
5120 | |||
5121 | /* Wait for second field. */ |
||
5122 | *got_frame = 0; |
||
5123 | if (h->next_output_pic && (h->next_output_pic->sync || h->sync>1)) { |
||
5124 | ret = output_frame(h, pict, h->next_output_pic); |
||
5125 | if (ret < 0) |
||
5126 | return ret; |
||
5127 | *got_frame = 1; |
||
5128 | if (CONFIG_MPEGVIDEO) { |
||
5129 | ff_print_debug_info2(h->avctx, h->next_output_pic, pict, h->er.mbskip_table, |
||
5130 | &h->low_delay, |
||
5131 | h->mb_width, h->mb_height, h->mb_stride, 1); |
||
5132 | } |
||
5133 | } |
||
5134 | } |
||
5135 | |||
5136 | assert(pict->data[0] || !*got_frame); |
||
5137 | |||
5138 | return get_consumed_bytes(buf_index, buf_size); |
||
5139 | } |
||
5140 | |||
5141 | av_cold void ff_h264_free_context(H264Context *h) |
||
5142 | { |
||
5143 | int i; |
||
5144 | |||
5145 | free_tables(h, 1); // FIXME cleanup init stuff perhaps |
||
5146 | |||
5147 | for (i = 0; i < MAX_SPS_COUNT; i++) |
||
5148 | av_freep(h->sps_buffers + i); |
||
5149 | |||
5150 | for (i = 0; i < MAX_PPS_COUNT; i++) |
||
5151 | av_freep(h->pps_buffers + i); |
||
5152 | } |
||
5153 | |||
5154 | static av_cold int h264_decode_end(AVCodecContext *avctx) |
||
5155 | { |
||
5156 | H264Context *h = avctx->priv_data; |
||
5157 | |||
5158 | ff_h264_remove_all_refs(h); |
||
5159 | ff_h264_free_context(h); |
||
5160 | |||
5161 | unref_picture(h, &h->cur_pic); |
||
5162 | |||
5163 | return 0; |
||
5164 | } |
||
5165 | |||
5166 | static const AVProfile profiles[] = { |
||
5167 | { FF_PROFILE_H264_BASELINE, "Baseline" }, |
||
5168 | { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" }, |
||
5169 | { FF_PROFILE_H264_MAIN, "Main" }, |
||
5170 | { FF_PROFILE_H264_EXTENDED, "Extended" }, |
||
5171 | { FF_PROFILE_H264_HIGH, "High" }, |
||
5172 | { FF_PROFILE_H264_HIGH_10, "High 10" }, |
||
5173 | { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" }, |
||
5174 | { FF_PROFILE_H264_HIGH_422, "High 4:2:2" }, |
||
5175 | { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" }, |
||
5176 | { FF_PROFILE_H264_HIGH_444, "High 4:4:4" }, |
||
5177 | { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" }, |
||
5178 | { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" }, |
||
5179 | { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" }, |
||
5180 | { FF_PROFILE_UNKNOWN }, |
||
5181 | }; |
||
5182 | |||
5183 | static const AVOption h264_options[] = { |
||
5184 | {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0}, |
||
5185 | {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 4, 0}, |
||
5186 | {NULL} |
||
5187 | }; |
||
5188 | |||
5189 | static const AVClass h264_class = { |
||
5190 | .class_name = "H264 Decoder", |
||
5191 | .item_name = av_default_item_name, |
||
5192 | .option = h264_options, |
||
5193 | .version = LIBAVUTIL_VERSION_INT, |
||
5194 | }; |
||
5195 | |||
5196 | static const AVClass h264_vdpau_class = { |
||
5197 | .class_name = "H264 VDPAU Decoder", |
||
5198 | .item_name = av_default_item_name, |
||
5199 | .option = h264_options, |
||
5200 | .version = LIBAVUTIL_VERSION_INT, |
||
5201 | }; |
||
5202 | |||
5203 | AVCodec ff_h264_decoder = { |
||
5204 | .name = "h264", |
||
5205 | .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), |
||
5206 | .type = AVMEDIA_TYPE_VIDEO, |
||
5207 | .id = AV_CODEC_ID_H264, |
||
5208 | .priv_data_size = sizeof(H264Context), |
||
5209 | .init = ff_h264_decode_init, |
||
5210 | .close = h264_decode_end, |
||
5211 | .decode = decode_frame, |
||
5212 | .capabilities = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | |
||
5213 | CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS | |
||
5214 | CODEC_CAP_FRAME_THREADS, |
||
5215 | .flush = flush_dpb, |
||
5216 | .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy), |
||
5217 | .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context), |
||
5218 | .profiles = NULL_IF_CONFIG_SMALL(profiles), |
||
5219 | .priv_class = &h264_class, |
||
5220 | }; |
||
5221 | |||
5222 | #if CONFIG_H264_VDPAU_DECODER |
||
5223 | AVCodec ff_h264_vdpau_decoder = { |
||
5224 | .name = "h264_vdpau", |
||
5225 | .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), |
||
5226 | .type = AVMEDIA_TYPE_VIDEO, |
||
5227 | .id = AV_CODEC_ID_H264, |
||
5228 | .priv_data_size = sizeof(H264Context), |
||
5229 | .init = ff_h264_decode_init, |
||
5230 | .close = h264_decode_end, |
||
5231 | .decode = decode_frame, |
||
5232 | .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, |
||
5233 | .flush = flush_dpb, |
||
5234 | .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_H264, |
||
5235 | AV_PIX_FMT_NONE}, |
||
5236 | .profiles = NULL_IF_CONFIG_SMALL(profiles), |
||
5237 | .priv_class = &h264_vdpau_class, |
||
5238 | }; |
||
5239 | #endif>>>=>>>>><>>>>>>>>>>>>>>>>>><>><>0 |