Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5361 | serge | 1 | /* |
2 | * Copyright © 2012 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the |
||
6 | * "Software"), to deal in the Software without restriction, including |
||
7 | * without limitation the rights to use, copy, modify, merge, publish, |
||
8 | * distribute, sub license, and/or sell copies of the Software, and to |
||
9 | * permit persons to whom the Software is furnished to do so, subject to |
||
10 | * the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the |
||
13 | * next paragraph) shall be included in all copies or substantial portions |
||
14 | * of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
19 | * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
20 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
21 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
22 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Xiang Haihao |
||
26 | * Zhao Yakui |
||
27 | * |
||
28 | */ |
||
29 | |||
30 | #include |
||
31 | #include |
||
32 | #include |
||
33 | #include |
||
34 | #include |
||
35 | |||
36 | #include "intel_batchbuffer.h" |
||
37 | #include "i965_defines.h" |
||
38 | #include "i965_structs.h" |
||
39 | #include "i965_drv_video.h" |
||
40 | #include "i965_encoder.h" |
||
41 | #include "i965_encoder_utils.h" |
||
42 | #include "gen6_mfc.h" |
||
43 | #include "gen6_vme.h" |
||
44 | #include "intel_media.h" |
||
45 | |||
46 | #define BRC_CLIP(x, min, max) \ |
||
47 | { \ |
||
48 | x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \ |
||
49 | } |
||
50 | |||
51 | #define BRC_P_B_QP_DIFF 4 |
||
52 | #define BRC_I_P_QP_DIFF 2 |
||
53 | #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF) |
||
54 | |||
55 | #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */ |
||
56 | #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */ |
||
57 | |||
58 | #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */ |
||
59 | #define BRC_CY 0.1 /* weight for */ |
||
60 | #define BRC_CX_UNDERFLOW 5. |
||
61 | #define BRC_CX_OVERFLOW -4. |
||
62 | |||
63 | #define BRC_PI_0_5 1.5707963267948966192313216916398 |
||
64 | |||
65 | #ifndef HAVE_LOG2F |
||
66 | #define log2f(x) (logf(x)/(float)M_LN2) |
||
67 | #endif |
||
68 | |||
69 | int intel_avc_enc_slice_type_fixup(int slice_type) |
||
70 | { |
||
71 | if (slice_type == SLICE_TYPE_SP || |
||
72 | slice_type == SLICE_TYPE_P) |
||
73 | slice_type = SLICE_TYPE_P; |
||
74 | else if (slice_type == SLICE_TYPE_SI || |
||
75 | slice_type == SLICE_TYPE_I) |
||
76 | slice_type = SLICE_TYPE_I; |
||
77 | else { |
||
78 | if (slice_type != SLICE_TYPE_B) |
||
79 | WARN_ONCE("Invalid slice type for H.264 encoding!\n"); |
||
80 | |||
81 | slice_type = SLICE_TYPE_B; |
||
82 | } |
||
83 | |||
84 | return slice_type; |
||
85 | } |
||
86 | |||
87 | static void |
||
88 | intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, |
||
89 | struct gen6_mfc_context *mfc_context) |
||
90 | { |
||
91 | VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; |
||
92 | int width_in_mbs = pSequenceParameter->picture_width_in_mbs; |
||
93 | int height_in_mbs = pSequenceParameter->picture_height_in_mbs; |
||
94 | float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ; |
||
95 | int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs; |
||
96 | int intra_mb_size = inter_mb_size * 5.0; |
||
97 | int i; |
||
98 | |||
99 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size; |
||
100 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs; |
||
101 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size; |
||
102 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs; |
||
103 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size; |
||
104 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs; |
||
105 | |||
106 | for(i = 0 ; i < 3; i++) { |
||
107 | mfc_context->bit_rate_control_context[i].QpPrimeY = 26; |
||
108 | mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6; |
||
109 | mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6; |
||
110 | mfc_context->bit_rate_control_context[i].GrowInit = 6; |
||
111 | mfc_context->bit_rate_control_context[i].GrowResistance = 4; |
||
112 | mfc_context->bit_rate_control_context[i].ShrinkInit = 6; |
||
113 | mfc_context->bit_rate_control_context[i].ShrinkResistance = 4; |
||
114 | |||
115 | mfc_context->bit_rate_control_context[i].Correct[0] = 8; |
||
116 | mfc_context->bit_rate_control_context[i].Correct[1] = 4; |
||
117 | mfc_context->bit_rate_control_context[i].Correct[2] = 2; |
||
118 | mfc_context->bit_rate_control_context[i].Correct[3] = 2; |
||
119 | mfc_context->bit_rate_control_context[i].Correct[4] = 4; |
||
120 | mfc_context->bit_rate_control_context[i].Correct[5] = 8; |
||
121 | } |
||
122 | |||
123 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16; |
||
124 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16; |
||
125 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16; |
||
126 | |||
127 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5; |
||
128 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5; |
||
129 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5; |
||
130 | } |
||
131 | |||
132 | static void intel_mfc_brc_init(struct encode_state *encode_state, |
||
133 | struct intel_encoder_context* encoder_context) |
||
134 | { |
||
135 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
136 | VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; |
||
137 | VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer; |
||
138 | VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data; |
||
139 | double bitrate = pSequenceParameter->bits_per_second; |
||
140 | double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick); |
||
141 | int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */ |
||
142 | int intra_period = pSequenceParameter->intra_period; |
||
143 | int ip_period = pSequenceParameter->ip_period; |
||
144 | double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2; |
||
145 | double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2; |
||
146 | double bpf; |
||
147 | |||
148 | if (pSequenceParameter->ip_period) { |
||
149 | pnum = (intra_period + ip_period - 1)/ip_period - 1; |
||
150 | bnum = intra_period - inum - pnum; |
||
151 | } |
||
152 | |||
153 | mfc_context->brc.mode = encoder_context->rate_control_mode; |
||
154 | |||
155 | mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) / |
||
156 | (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum)); |
||
157 | mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I]; |
||
158 | mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I]; |
||
159 | |||
160 | mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum; |
||
161 | mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum; |
||
162 | mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum; |
||
163 | |||
164 | bpf = mfc_context->brc.bits_per_frame = bitrate/framerate; |
||
165 | |||
166 | mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size; |
||
167 | mfc_context->hrd.current_buffer_fullness = |
||
168 | (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)? |
||
169 | pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.; |
||
170 | mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.; |
||
171 | mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size; |
||
172 | mfc_context->hrd.violation_noted = 0; |
||
173 | |||
174 | if ((bpf > qp51_size) && (bpf < qp1_size)) { |
||
175 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size); |
||
176 | } |
||
177 | else if (bpf >= qp1_size) |
||
178 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1; |
||
179 | else if (bpf <= qp51_size) |
||
180 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51; |
||
181 | |||
182 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY; |
||
183 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY; |
||
184 | |||
185 | BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51); |
||
186 | BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51); |
||
187 | BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51); |
||
188 | } |
||
189 | |||
190 | int intel_mfc_update_hrd(struct encode_state *encode_state, |
||
191 | struct gen6_mfc_context *mfc_context, |
||
192 | int frame_bits) |
||
193 | { |
||
194 | double prev_bf = mfc_context->hrd.current_buffer_fullness; |
||
195 | |||
196 | mfc_context->hrd.current_buffer_fullness -= frame_bits; |
||
197 | |||
198 | if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) { |
||
199 | mfc_context->hrd.current_buffer_fullness = prev_bf; |
||
200 | return BRC_UNDERFLOW; |
||
201 | } |
||
202 | |||
203 | mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame; |
||
204 | if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) { |
||
205 | if (mfc_context->brc.mode == VA_RC_VBR) |
||
206 | mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size; |
||
207 | else { |
||
208 | mfc_context->hrd.current_buffer_fullness = prev_bf; |
||
209 | return BRC_OVERFLOW; |
||
210 | } |
||
211 | } |
||
212 | return BRC_NO_HRD_VIOLATION; |
||
213 | } |
||
214 | |||
215 | int intel_mfc_brc_postpack(struct encode_state *encode_state, |
||
216 | struct gen6_mfc_context *mfc_context, |
||
217 | int frame_bits) |
||
218 | { |
||
219 | gen6_brc_status sts = BRC_NO_HRD_VIOLATION; |
||
220 | VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; |
||
221 | int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); |
||
222 | int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY; |
||
223 | int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY; |
||
224 | int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY; |
||
225 | int qp; // quantizer of previously encoded slice of current type |
||
226 | int qpn; // predicted quantizer for next frame of current type in integer format |
||
227 | double qpf; // predicted quantizer for next frame of current type in float format |
||
228 | double delta_qp; // QP correction |
||
229 | int target_frame_size, frame_size_next; |
||
230 | /* Notes: |
||
231 | * x - how far we are from HRD buffer borders |
||
232 | * y - how far we are from target HRD buffer fullness |
||
233 | */ |
||
234 | double x, y; |
||
235 | double frame_size_alpha; |
||
236 | |||
237 | qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY; |
||
238 | |||
239 | target_frame_size = mfc_context->brc.target_frame_size[slicetype]; |
||
240 | if (mfc_context->hrd.buffer_capacity < 5) |
||
241 | frame_size_alpha = 0; |
||
242 | else |
||
243 | frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype]; |
||
244 | if (frame_size_alpha > 30) frame_size_alpha = 30; |
||
245 | frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) / |
||
246 | (double)(frame_size_alpha + 1.); |
||
247 | |||
248 | /* frame_size_next: avoiding negative number and too small value */ |
||
249 | if ((double)frame_size_next < (double)(target_frame_size * 0.25)) |
||
250 | frame_size_next = (int)((double)target_frame_size * 0.25); |
||
251 | |||
252 | qpf = (double)qp * target_frame_size / frame_size_next; |
||
253 | qpn = (int)(qpf + 0.5); |
||
254 | |||
255 | if (qpn == qp) { |
||
256 | /* setting qpn we round qpf making mistakes: now we are trying to compensate this */ |
||
257 | mfc_context->brc.qpf_rounding_accumulator += qpf - qpn; |
||
258 | if (mfc_context->brc.qpf_rounding_accumulator > 1.0) { |
||
259 | qpn++; |
||
260 | mfc_context->brc.qpf_rounding_accumulator = 0.; |
||
261 | } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) { |
||
262 | qpn--; |
||
263 | mfc_context->brc.qpf_rounding_accumulator = 0.; |
||
264 | } |
||
265 | } |
||
266 | /* making sure that QP is not changing too fast */ |
||
267 | if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE; |
||
268 | else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE; |
||
269 | /* making sure that with QP predictions we did do not leave QPs range */ |
||
270 | BRC_CLIP(qpn, 1, 51); |
||
271 | |||
272 | /* checking wthether HRD compliance is still met */ |
||
273 | sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits); |
||
274 | |||
275 | /* calculating QP delta as some function*/ |
||
276 | x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness; |
||
277 | if (x > 0) { |
||
278 | x /= mfc_context->hrd.target_buffer_fullness; |
||
279 | y = mfc_context->hrd.current_buffer_fullness; |
||
280 | } |
||
281 | else { |
||
282 | x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness); |
||
283 | y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness; |
||
284 | } |
||
285 | if (y < 0.01) y = 0.01; |
||
286 | if (x > 1) x = 1; |
||
287 | else if (x < -1) x = -1; |
||
288 | |||
289 | delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x); |
||
290 | qpn = (int)(qpn + delta_qp + 0.5); |
||
291 | |||
292 | /* making sure that with QP predictions we did do not leave QPs range */ |
||
293 | BRC_CLIP(qpn, 1, 51); |
||
294 | |||
295 | if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation |
||
296 | /* correcting QPs of slices of other types */ |
||
297 | if (slicetype == SLICE_TYPE_P) { |
||
298 | if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2) |
||
299 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1; |
||
300 | if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2) |
||
301 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1; |
||
302 | } else if (slicetype == SLICE_TYPE_I) { |
||
303 | if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4) |
||
304 | mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2; |
||
305 | if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2) |
||
306 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2; |
||
307 | } else { // SLICE_TYPE_B |
||
308 | if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2) |
||
309 | mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1; |
||
310 | if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4) |
||
311 | mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2; |
||
312 | } |
||
313 | BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51); |
||
314 | BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51); |
||
315 | BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51); |
||
316 | } else if (sts == BRC_UNDERFLOW) { // underflow |
||
317 | if (qpn <= qp) qpn = qp + 1; |
||
318 | if (qpn > 51) { |
||
319 | qpn = 51; |
||
320 | sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP |
||
321 | } |
||
322 | } else if (sts == BRC_OVERFLOW) { |
||
323 | if (qpn >= qp) qpn = qp - 1; |
||
324 | if (qpn < 1) { // < 0 (?) overflow with minQP |
||
325 | qpn = 1; |
||
326 | sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done |
||
327 | } |
||
328 | } |
||
329 | |||
330 | mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn; |
||
331 | |||
332 | return sts; |
||
333 | } |
||
334 | |||
335 | static void intel_mfc_hrd_context_init(struct encode_state *encode_state, |
||
336 | struct intel_encoder_context *encoder_context) |
||
337 | { |
||
338 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
339 | VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; |
||
340 | unsigned int rate_control_mode = encoder_context->rate_control_mode; |
||
341 | int target_bit_rate = pSequenceParameter->bits_per_second; |
||
342 | |||
343 | // current we only support CBR mode. |
||
344 | if (rate_control_mode == VA_RC_CBR) { |
||
345 | mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10; |
||
346 | mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10; |
||
347 | mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000; |
||
348 | mfc_context->vui_hrd.i_cpb_removal_delay = 2; |
||
349 | mfc_context->vui_hrd.i_frame_number = 0; |
||
350 | |||
351 | mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; |
||
352 | mfc_context->vui_hrd.i_cpb_removal_delay_length = 24; |
||
353 | mfc_context->vui_hrd.i_dpb_output_delay_length = 24; |
||
354 | } |
||
355 | |||
356 | } |
||
357 | |||
358 | void |
||
359 | intel_mfc_hrd_context_update(struct encode_state *encode_state, |
||
360 | struct gen6_mfc_context *mfc_context) |
||
361 | { |
||
362 | mfc_context->vui_hrd.i_frame_number++; |
||
363 | } |
||
364 | |||
365 | int intel_mfc_interlace_check(VADriverContextP ctx, |
||
366 | struct encode_state *encode_state, |
||
367 | struct intel_encoder_context *encoder_context) |
||
368 | { |
||
369 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
370 | VAEncSliceParameterBufferH264 *pSliceParameter; |
||
371 | int i; |
||
372 | int mbCount = 0; |
||
373 | int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; |
||
374 | int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; |
||
375 | |||
376 | for (i = 0; i < encode_state->num_slice_params_ext; i++) { |
||
377 | pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; |
||
378 | mbCount += pSliceParameter->num_macroblocks; |
||
379 | } |
||
380 | |||
381 | if ( mbCount == ( width_in_mbs * height_in_mbs ) ) |
||
382 | return 0; |
||
383 | |||
384 | return 1; |
||
385 | } |
||
386 | |||
387 | /* |
||
388 | * Check whether the parameters related with CBR are updated and decide whether |
||
389 | * it needs to reinitialize the configuration related with CBR. |
||
390 | * Currently it will check the following parameters: |
||
391 | * bits_per_second |
||
392 | * frame_rate |
||
393 | * gop_configuration(intra_period, ip_period, intra_idr_period) |
||
394 | */ |
||
395 | static bool intel_mfc_brc_updated_check(struct encode_state *encode_state, |
||
396 | struct intel_encoder_context *encoder_context) |
||
397 | { |
||
398 | unsigned int rate_control_mode = encoder_context->rate_control_mode; |
||
399 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
400 | double cur_fps, cur_bitrate; |
||
401 | VAEncSequenceParameterBufferH264 *pSequenceParameter; |
||
402 | |||
403 | |||
404 | if (rate_control_mode != VA_RC_CBR) { |
||
405 | return false; |
||
406 | } |
||
407 | |||
408 | pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; |
||
409 | |||
410 | cur_bitrate = pSequenceParameter->bits_per_second; |
||
411 | cur_fps = (double)pSequenceParameter->time_scale / |
||
412 | (2 * (double)pSequenceParameter->num_units_in_tick); |
||
413 | |||
414 | if ((cur_bitrate == mfc_context->brc.saved_bps) && |
||
415 | (cur_fps == mfc_context->brc.saved_fps) && |
||
416 | (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) && |
||
417 | (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) && |
||
418 | (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) { |
||
419 | /* the parameters related with CBR are not updaetd */ |
||
420 | return false; |
||
421 | } |
||
422 | |||
423 | mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period; |
||
424 | mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period; |
||
425 | mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period; |
||
426 | mfc_context->brc.saved_fps = cur_fps; |
||
427 | mfc_context->brc.saved_bps = cur_bitrate; |
||
428 | return true; |
||
429 | } |
||
430 | |||
431 | void intel_mfc_brc_prepare(struct encode_state *encode_state, |
||
432 | struct intel_encoder_context *encoder_context) |
||
433 | { |
||
434 | unsigned int rate_control_mode = encoder_context->rate_control_mode; |
||
435 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
436 | |||
437 | if (rate_control_mode == VA_RC_CBR) { |
||
438 | bool brc_updated; |
||
439 | assert(encoder_context->codec != CODEC_MPEG2); |
||
440 | |||
441 | brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context); |
||
442 | |||
443 | /*Programing bit rate control */ |
||
444 | if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) || |
||
445 | brc_updated) { |
||
446 | intel_mfc_bit_rate_control_context_init(encode_state, mfc_context); |
||
447 | intel_mfc_brc_init(encode_state, encoder_context); |
||
448 | } |
||
449 | |||
450 | /*Programing HRD control */ |
||
451 | if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated ) |
||
452 | intel_mfc_hrd_context_init(encode_state, encoder_context); |
||
453 | } |
||
454 | } |
||
455 | |||
456 | static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length) |
||
457 | { |
||
458 | int i, found; |
||
459 | int leading_zero_cnt, byte_length, zero_byte; |
||
460 | int nal_unit_type; |
||
461 | int skip_cnt = 0; |
||
462 | |||
463 | #define NAL_UNIT_TYPE_MASK 0x1f |
||
464 | #define HW_MAX_SKIP_LENGTH 15 |
||
465 | |||
466 | byte_length = ALIGN(bits_length, 32) >> 3; |
||
467 | |||
468 | |||
469 | leading_zero_cnt = 0; |
||
470 | found = 0; |
||
471 | for(i = 0; i < byte_length - 4; i++) { |
||
472 | if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) || |
||
473 | ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) { |
||
474 | found = 1; |
||
475 | break; |
||
476 | } |
||
477 | leading_zero_cnt++; |
||
478 | } |
||
479 | if (!found) { |
||
480 | /* warning message is complained. But anyway it will be inserted. */ |
||
481 | WARN_ONCE("Invalid packed header data. " |
||
482 | "Can't find the 000001 start_prefix code\n"); |
||
483 | return 0; |
||
484 | } |
||
485 | i = leading_zero_cnt; |
||
486 | |||
487 | zero_byte = 0; |
||
488 | if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1))) |
||
489 | zero_byte = 1; |
||
490 | |||
491 | skip_cnt = leading_zero_cnt + zero_byte + 3; |
||
492 | |||
493 | /* the unit header byte is accounted */ |
||
494 | nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK; |
||
495 | skip_cnt += 1; |
||
496 | |||
497 | if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) { |
||
498 | /* more unit header bytes are accounted for MVC/SVC */ |
||
499 | skip_cnt += 3; |
||
500 | } |
||
501 | if (skip_cnt > HW_MAX_SKIP_LENGTH) { |
||
502 | WARN_ONCE("Too many leading zeros are padded for packed data. " |
||
503 | "It is beyond the HW range.!!!\n"); |
||
504 | } |
||
505 | return skip_cnt; |
||
506 | } |
||
507 | |||
508 | void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx, |
||
509 | struct encode_state *encode_state, |
||
510 | struct intel_encoder_context *encoder_context, |
||
511 | struct intel_batchbuffer *slice_batch) |
||
512 | { |
||
513 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
514 | int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS); |
||
515 | unsigned int rate_control_mode = encoder_context->rate_control_mode; |
||
516 | unsigned int skip_emul_byte_cnt; |
||
517 | |||
518 | if (encode_state->packed_header_data[idx]) { |
||
519 | VAEncPackedHeaderParameterBuffer *param = NULL; |
||
520 | unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; |
||
521 | unsigned int length_in_bits; |
||
522 | |||
523 | assert(encode_state->packed_header_param[idx]); |
||
524 | param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; |
||
525 | length_in_bits = param->bit_length; |
||
526 | |||
527 | skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); |
||
528 | mfc_context->insert_object(ctx, |
||
529 | encoder_context, |
||
530 | header_data, |
||
531 | ALIGN(length_in_bits, 32) >> 5, |
||
532 | length_in_bits & 0x1f, |
||
533 | skip_emul_byte_cnt, |
||
534 | 0, |
||
535 | 0, |
||
536 | !param->has_emulation_bytes, |
||
537 | slice_batch); |
||
538 | } |
||
539 | |||
540 | idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS); |
||
541 | |||
542 | if (encode_state->packed_header_data[idx]) { |
||
543 | VAEncPackedHeaderParameterBuffer *param = NULL; |
||
544 | unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; |
||
545 | unsigned int length_in_bits; |
||
546 | |||
547 | assert(encode_state->packed_header_param[idx]); |
||
548 | param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; |
||
549 | length_in_bits = param->bit_length; |
||
550 | |||
551 | skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); |
||
552 | |||
553 | mfc_context->insert_object(ctx, |
||
554 | encoder_context, |
||
555 | header_data, |
||
556 | ALIGN(length_in_bits, 32) >> 5, |
||
557 | length_in_bits & 0x1f, |
||
558 | skip_emul_byte_cnt, |
||
559 | 0, |
||
560 | 0, |
||
561 | !param->has_emulation_bytes, |
||
562 | slice_batch); |
||
563 | } |
||
564 | |||
565 | idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI); |
||
566 | |||
567 | if (encode_state->packed_header_data[idx]) { |
||
568 | VAEncPackedHeaderParameterBuffer *param = NULL; |
||
569 | unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer; |
||
570 | unsigned int length_in_bits; |
||
571 | |||
572 | assert(encode_state->packed_header_param[idx]); |
||
573 | param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer; |
||
574 | length_in_bits = param->bit_length; |
||
575 | |||
576 | skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); |
||
577 | mfc_context->insert_object(ctx, |
||
578 | encoder_context, |
||
579 | header_data, |
||
580 | ALIGN(length_in_bits, 32) >> 5, |
||
581 | length_in_bits & 0x1f, |
||
582 | skip_emul_byte_cnt, |
||
583 | 0, |
||
584 | 0, |
||
585 | !param->has_emulation_bytes, |
||
586 | slice_batch); |
||
587 | } else if (rate_control_mode == VA_RC_CBR) { |
||
588 | // this is frist AU |
||
589 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
590 | |||
591 | unsigned char *sei_data = NULL; |
||
592 | |||
593 | int length_in_bits = build_avc_sei_buffer_timing( |
||
594 | mfc_context->vui_hrd.i_initial_cpb_removal_delay_length, |
||
595 | mfc_context->vui_hrd.i_initial_cpb_removal_delay, |
||
596 | 0, |
||
597 | mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number, |
||
598 | mfc_context->vui_hrd.i_dpb_output_delay_length, |
||
599 | 0, |
||
600 | &sei_data); |
||
601 | mfc_context->insert_object(ctx, |
||
602 | encoder_context, |
||
603 | (unsigned int *)sei_data, |
||
604 | ALIGN(length_in_bits, 32) >> 5, |
||
605 | length_in_bits & 0x1f, |
||
606 | 4, |
||
607 | 0, |
||
608 | 0, |
||
609 | 1, |
||
610 | slice_batch); |
||
611 | free(sei_data); |
||
612 | } |
||
613 | } |
||
614 | |||
615 | VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, |
||
616 | struct encode_state *encode_state, |
||
617 | struct intel_encoder_context *encoder_context) |
||
618 | { |
||
619 | struct i965_driver_data *i965 = i965_driver_data(ctx); |
||
620 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
621 | struct object_surface *obj_surface; |
||
622 | struct object_buffer *obj_buffer; |
||
623 | GenAvcSurface *gen6_avc_surface; |
||
624 | dri_bo *bo; |
||
625 | VAStatus vaStatus = VA_STATUS_SUCCESS; |
||
626 | int i, j, enable_avc_ildb = 0; |
||
627 | VAEncSliceParameterBufferH264 *slice_param; |
||
628 | struct i965_coded_buffer_segment *coded_buffer_segment; |
||
629 | VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; |
||
630 | int width_in_mbs = pSequenceParameter->picture_width_in_mbs; |
||
631 | int height_in_mbs = pSequenceParameter->picture_height_in_mbs; |
||
632 | |||
633 | if (IS_GEN6(i965->intel.device_info)) { |
||
634 | /* On the SNB it should be fixed to 128 for the DMV buffer */ |
||
635 | width_in_mbs = 128; |
||
636 | } |
||
637 | |||
638 | for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) { |
||
639 | assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer); |
||
640 | slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer; |
||
641 | |||
642 | for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) { |
||
643 | assert((slice_param->slice_type == SLICE_TYPE_I) || |
||
644 | (slice_param->slice_type == SLICE_TYPE_SI) || |
||
645 | (slice_param->slice_type == SLICE_TYPE_P) || |
||
646 | (slice_param->slice_type == SLICE_TYPE_SP) || |
||
647 | (slice_param->slice_type == SLICE_TYPE_B)); |
||
648 | |||
649 | if (slice_param->disable_deblocking_filter_idc != 1) { |
||
650 | enable_avc_ildb = 1; |
||
651 | break; |
||
652 | } |
||
653 | |||
654 | slice_param++; |
||
655 | } |
||
656 | } |
||
657 | |||
658 | /*Setup all the input&output object*/ |
||
659 | |||
660 | /* Setup current frame and current direct mv buffer*/ |
||
661 | obj_surface = encode_state->reconstructed_object; |
||
662 | i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); |
||
663 | |||
664 | if ( obj_surface->private_data == NULL) { |
||
665 | gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); |
||
666 | gen6_avc_surface->dmv_top = |
||
667 | dri_bo_alloc(i965->intel.bufmgr, |
||
668 | "Buffer", |
||
669 | 68 * width_in_mbs * height_in_mbs, |
||
670 | 64); |
||
671 | gen6_avc_surface->dmv_bottom = |
||
672 | dri_bo_alloc(i965->intel.bufmgr, |
||
673 | "Buffer", |
||
674 | 68 * width_in_mbs * height_in_mbs, |
||
675 | 64); |
||
676 | assert(gen6_avc_surface->dmv_top); |
||
677 | assert(gen6_avc_surface->dmv_bottom); |
||
678 | obj_surface->private_data = (void *)gen6_avc_surface; |
||
679 | obj_surface->free_private_data = (void *)gen_free_avc_surface; |
||
680 | } |
||
681 | gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data; |
||
682 | mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top; |
||
683 | mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom; |
||
684 | dri_bo_reference(gen6_avc_surface->dmv_top); |
||
685 | dri_bo_reference(gen6_avc_surface->dmv_bottom); |
||
686 | |||
687 | if (enable_avc_ildb) { |
||
688 | mfc_context->post_deblocking_output.bo = obj_surface->bo; |
||
689 | dri_bo_reference(mfc_context->post_deblocking_output.bo); |
||
690 | } else { |
||
691 | mfc_context->pre_deblocking_output.bo = obj_surface->bo; |
||
692 | dri_bo_reference(mfc_context->pre_deblocking_output.bo); |
||
693 | } |
||
694 | |||
695 | mfc_context->surface_state.width = obj_surface->orig_width; |
||
696 | mfc_context->surface_state.height = obj_surface->orig_height; |
||
697 | mfc_context->surface_state.w_pitch = obj_surface->width; |
||
698 | mfc_context->surface_state.h_pitch = obj_surface->height; |
||
699 | |||
700 | /* Setup reference frames and direct mv buffers*/ |
||
701 | for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) { |
||
702 | obj_surface = encode_state->reference_objects[i]; |
||
703 | |||
704 | if (obj_surface && obj_surface->bo) { |
||
705 | mfc_context->reference_surfaces[i].bo = obj_surface->bo; |
||
706 | dri_bo_reference(obj_surface->bo); |
||
707 | |||
708 | /* Check DMV buffer */ |
||
709 | if ( obj_surface->private_data == NULL) { |
||
710 | |||
711 | gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1); |
||
712 | gen6_avc_surface->dmv_top = |
||
713 | dri_bo_alloc(i965->intel.bufmgr, |
||
714 | "Buffer", |
||
715 | 68 * width_in_mbs * height_in_mbs, |
||
716 | 64); |
||
717 | gen6_avc_surface->dmv_bottom = |
||
718 | dri_bo_alloc(i965->intel.bufmgr, |
||
719 | "Buffer", |
||
720 | 68 * width_in_mbs * height_in_mbs, |
||
721 | 64); |
||
722 | assert(gen6_avc_surface->dmv_top); |
||
723 | assert(gen6_avc_surface->dmv_bottom); |
||
724 | obj_surface->private_data = gen6_avc_surface; |
||
725 | obj_surface->free_private_data = gen_free_avc_surface; |
||
726 | } |
||
727 | |||
728 | gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data; |
||
729 | /* Setup DMV buffer */ |
||
730 | mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top; |
||
731 | mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; |
||
732 | dri_bo_reference(gen6_avc_surface->dmv_top); |
||
733 | dri_bo_reference(gen6_avc_surface->dmv_bottom); |
||
734 | } else { |
||
735 | break; |
||
736 | } |
||
737 | } |
||
738 | |||
739 | mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo; |
||
740 | dri_bo_reference(mfc_context->uncompressed_picture_source.bo); |
||
741 | |||
742 | obj_buffer = encode_state->coded_buf_object; |
||
743 | bo = obj_buffer->buffer_store->bo; |
||
744 | mfc_context->mfc_indirect_pak_bse_object.bo = bo; |
||
745 | mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE; |
||
746 | mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000); |
||
747 | dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo); |
||
748 | |||
749 | dri_bo_map(bo, 1); |
||
750 | coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; |
||
751 | coded_buffer_segment->mapped = 0; |
||
752 | coded_buffer_segment->codec = encoder_context->codec; |
||
753 | dri_bo_unmap(bo); |
||
754 | |||
755 | return vaStatus; |
||
756 | } |
||
757 | /* |
||
758 | * The LUT uses the pair of 4-bit units: (shift, base) structure. |
||
759 | * 2^K * X = value . |
||
760 | * So it is necessary to convert one cost into the nearest LUT format. |
||
761 | * The derivation is: |
||
762 | * 2^K *x = 2^n * (1 + deltaX) |
||
763 | * k + log2(x) = n + log2(1 + deltaX) |
||
764 | * log2(x) = n - k + log2(1 + deltaX) |
||
765 | * As X is in the range of [1, 15] |
||
766 | * 4 > n - k + log2(1 + deltaX) >= 0 |
||
767 | * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX) |
||
768 | * Then we can derive the corresponding K and get the nearest LUT format. |
||
769 | */ |
||
770 | int intel_format_lutvalue(int value, int max) |
||
771 | { |
||
772 | int ret; |
||
773 | int logvalue, temp1, temp2; |
||
774 | |||
775 | if (value <= 0) |
||
776 | return 0; |
||
777 | |||
778 | logvalue = (int)(log2f((float)value)); |
||
779 | if (logvalue < 4) { |
||
780 | ret = value; |
||
781 | } else { |
||
782 | int error, temp_value, base, j, temp_err; |
||
783 | error = value; |
||
784 | j = logvalue - 4 + 1; |
||
785 | ret = -1; |
||
786 | for(; j <= logvalue; j++) { |
||
787 | if (j == 0) { |
||
788 | base = value >> j; |
||
789 | } else { |
||
790 | base = (value + (1 << (j - 1)) - 1) >> j; |
||
791 | } |
||
792 | if (base >= 16) |
||
793 | continue; |
||
794 | |||
795 | temp_value = base << j; |
||
796 | temp_err = abs(value - temp_value); |
||
797 | if (temp_err < error) { |
||
798 | error = temp_err; |
||
799 | ret = (j << 4) | base; |
||
800 | if (temp_err == 0) |
||
801 | break; |
||
802 | } |
||
803 | } |
||
804 | } |
||
805 | temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4); |
||
806 | temp2 = (max & 0xf) << ((max & 0xf0) >> 4); |
||
807 | if (temp1 > temp2) |
||
808 | ret = max; |
||
809 | return ret; |
||
810 | |||
811 | } |
||
812 | |||
813 | |||
814 | #define QP_MAX 52 |
||
815 | |||
816 | |||
817 | static float intel_lambda_qp(int qp) |
||
818 | { |
||
819 | float value, lambdaf; |
||
820 | value = qp; |
||
821 | value = value / 6 - 2; |
||
822 | if (value < 0) |
||
823 | value = 0; |
||
824 | lambdaf = roundf(powf(2, value)); |
||
825 | return lambdaf; |
||
826 | } |
||
827 | |||
828 | |||
829 | void intel_vme_update_mbmv_cost(VADriverContextP ctx, |
||
830 | struct encode_state *encode_state, |
||
831 | struct intel_encoder_context *encoder_context) |
||
832 | { |
||
833 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
834 | struct gen6_vme_context *vme_context = encoder_context->vme_context; |
||
835 | VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; |
||
836 | VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; |
||
837 | int qp, m_cost, j, mv_count; |
||
838 | uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message); |
||
839 | float lambda, m_costf; |
||
840 | |||
841 | int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); |
||
842 | |||
843 | |||
844 | if (encoder_context->rate_control_mode == VA_RC_CQP) |
||
845 | qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; |
||
846 | else |
||
847 | qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; |
||
848 | |||
849 | if (vme_state_message == NULL) |
||
850 | return; |
||
851 | |||
852 | assert(qp <= QP_MAX); |
||
853 | lambda = intel_lambda_qp(qp); |
||
854 | if (slice_type == SLICE_TYPE_I) { |
||
855 | vme_state_message[MODE_INTRA_16X16] = 0; |
||
856 | m_cost = lambda * 4; |
||
857 | vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f); |
||
858 | m_cost = lambda * 16; |
||
859 | vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f); |
||
860 | m_cost = lambda * 3; |
||
861 | vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f); |
||
862 | } else { |
||
863 | m_cost = 0; |
||
864 | vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f); |
||
865 | for (j = 1; j < 3; j++) { |
||
866 | m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; |
||
867 | m_cost = (int)m_costf; |
||
868 | vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); |
||
869 | } |
||
870 | mv_count = 3; |
||
871 | for (j = 4; j <= 64; j *= 2) { |
||
872 | m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; |
||
873 | m_cost = (int)m_costf; |
||
874 | vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f); |
||
875 | mv_count++; |
||
876 | } |
||
877 | |||
878 | if (qp <= 25) { |
||
879 | vme_state_message[MODE_INTRA_16X16] = 0x4a; |
||
880 | vme_state_message[MODE_INTRA_8X8] = 0x4a; |
||
881 | vme_state_message[MODE_INTRA_4X4] = 0x4a; |
||
882 | vme_state_message[MODE_INTRA_NONPRED] = 0x4a; |
||
883 | vme_state_message[MODE_INTER_16X16] = 0x4a; |
||
884 | vme_state_message[MODE_INTER_16X8] = 0x4a; |
||
885 | vme_state_message[MODE_INTER_8X8] = 0x4a; |
||
886 | vme_state_message[MODE_INTER_8X4] = 0x4a; |
||
887 | vme_state_message[MODE_INTER_4X4] = 0x4a; |
||
888 | vme_state_message[MODE_INTER_BWD] = 0x2a; |
||
889 | return; |
||
890 | } |
||
891 | m_costf = lambda * 10; |
||
892 | vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); |
||
893 | m_cost = lambda * 14; |
||
894 | vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f); |
||
895 | m_cost = lambda * 24; |
||
896 | vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f); |
||
897 | m_costf = lambda * 3.5; |
||
898 | m_cost = m_costf; |
||
899 | vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f); |
||
900 | if (slice_type == SLICE_TYPE_P) { |
||
901 | m_costf = lambda * 2.5; |
||
902 | m_cost = m_costf; |
||
903 | vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); |
||
904 | m_costf = lambda * 4; |
||
905 | m_cost = m_costf; |
||
906 | vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); |
||
907 | m_costf = lambda * 1.5; |
||
908 | m_cost = m_costf; |
||
909 | vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); |
||
910 | m_costf = lambda * 3; |
||
911 | m_cost = m_costf; |
||
912 | vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); |
||
913 | m_costf = lambda * 5; |
||
914 | m_cost = m_costf; |
||
915 | vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); |
||
916 | /* BWD is not used in P-frame */ |
||
917 | vme_state_message[MODE_INTER_BWD] = 0; |
||
918 | } else { |
||
919 | m_costf = lambda * 2.5; |
||
920 | m_cost = m_costf; |
||
921 | vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); |
||
922 | m_costf = lambda * 5.5; |
||
923 | m_cost = m_costf; |
||
924 | vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f); |
||
925 | m_costf = lambda * 3.5; |
||
926 | m_cost = m_costf; |
||
927 | vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f); |
||
928 | m_costf = lambda * 5.0; |
||
929 | m_cost = m_costf; |
||
930 | vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f); |
||
931 | m_costf = lambda * 6.5; |
||
932 | m_cost = m_costf; |
||
933 | vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f); |
||
934 | m_costf = lambda * 1.5; |
||
935 | m_cost = m_costf; |
||
936 | vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); |
||
937 | } |
||
938 | } |
||
939 | } |
||
940 | |||
941 | |||
942 | #define MB_SCOREBOARD_A (1 << 0) |
||
943 | #define MB_SCOREBOARD_B (1 << 1) |
||
944 | #define MB_SCOREBOARD_C (1 << 2) |
||
945 | void |
||
946 | gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context) |
||
947 | { |
||
948 | vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1; |
||
949 | vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING; |
||
950 | vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A | |
||
951 | MB_SCOREBOARD_B | |
||
952 | MB_SCOREBOARD_C); |
||
953 | |||
954 | /* In VME prediction the current mb depends on the neighbour |
||
955 | * A/B/C macroblock. So the left/up/up-right dependency should |
||
956 | * be considered. |
||
957 | */ |
||
958 | vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1; |
||
959 | vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0; |
||
960 | vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0; |
||
961 | vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1; |
||
962 | vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1; |
||
963 | vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1; |
||
964 | |||
965 | vme_context->gpe_context.vfe_desc7.dword = 0; |
||
966 | return; |
||
967 | } |
||
968 | |||
969 | /* check whether the mb of (x_index, y_index) is out of bound */ |
||
970 | static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height) |
||
971 | { |
||
972 | int mb_index; |
||
973 | if (x_index < 0 || x_index >= mb_width) |
||
974 | return -1; |
||
975 | if (y_index < 0 || y_index >= mb_height) |
||
976 | return -1; |
||
977 | |||
978 | mb_index = y_index * mb_width + x_index; |
||
979 | if (mb_index < first_mb || mb_index > (first_mb + num_mb)) |
||
980 | return -1; |
||
981 | return 0; |
||
982 | } |
||
983 | |||
984 | void |
||
985 | gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, |
||
986 | struct encode_state *encode_state, |
||
987 | int mb_width, int mb_height, |
||
988 | int kernel, |
||
989 | int transform_8x8_mode_flag, |
||
990 | struct intel_encoder_context *encoder_context) |
||
991 | { |
||
992 | struct gen6_vme_context *vme_context = encoder_context->vme_context; |
||
993 | int mb_row; |
||
994 | int s; |
||
995 | unsigned int *command_ptr; |
||
996 | |||
997 | #define USE_SCOREBOARD (1 << 21) |
||
998 | |||
999 | dri_bo_map(vme_context->vme_batchbuffer.bo, 1); |
||
1000 | command_ptr = vme_context->vme_batchbuffer.bo->virtual; |
||
1001 | |||
1002 | for (s = 0; s < encode_state->num_slice_params_ext; s++) { |
||
1003 | VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; |
||
1004 | int first_mb = pSliceParameter->macroblock_address; |
||
1005 | int num_mb = pSliceParameter->num_macroblocks; |
||
1006 | unsigned int mb_intra_ub, score_dep; |
||
1007 | int x_outer, y_outer, x_inner, y_inner; |
||
1008 | int xtemp_outer = 0; |
||
1009 | |||
1010 | x_outer = first_mb % mb_width; |
||
1011 | y_outer = first_mb / mb_width; |
||
1012 | mb_row = y_outer; |
||
1013 | |||
1014 | for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { |
||
1015 | x_inner = x_outer; |
||
1016 | y_inner = y_outer; |
||
1017 | for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { |
||
1018 | mb_intra_ub = 0; |
||
1019 | score_dep = 0; |
||
1020 | if (x_inner != 0) { |
||
1021 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; |
||
1022 | score_dep |= MB_SCOREBOARD_A; |
||
1023 | } |
||
1024 | if (y_inner != mb_row) { |
||
1025 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; |
||
1026 | score_dep |= MB_SCOREBOARD_B; |
||
1027 | if (x_inner != 0) |
||
1028 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; |
||
1029 | if (x_inner != (mb_width -1)) { |
||
1030 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; |
||
1031 | score_dep |= MB_SCOREBOARD_C; |
||
1032 | } |
||
1033 | } |
||
1034 | |||
1035 | *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); |
||
1036 | *command_ptr++ = kernel; |
||
1037 | *command_ptr++ = USE_SCOREBOARD; |
||
1038 | /* Indirect data */ |
||
1039 | *command_ptr++ = 0; |
||
1040 | /* the (X, Y) term of scoreboard */ |
||
1041 | *command_ptr++ = ((y_inner << 16) | x_inner); |
||
1042 | *command_ptr++ = score_dep; |
||
1043 | /*inline data */ |
||
1044 | *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); |
||
1045 | *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); |
||
1046 | x_inner -= 2; |
||
1047 | y_inner += 1; |
||
1048 | } |
||
1049 | x_outer += 1; |
||
1050 | } |
||
1051 | |||
1052 | xtemp_outer = mb_width - 2; |
||
1053 | if (xtemp_outer < 0) |
||
1054 | xtemp_outer = 0; |
||
1055 | x_outer = xtemp_outer; |
||
1056 | y_outer = first_mb / mb_width; |
||
1057 | for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { |
||
1058 | y_inner = y_outer; |
||
1059 | x_inner = x_outer; |
||
1060 | for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { |
||
1061 | mb_intra_ub = 0; |
||
1062 | score_dep = 0; |
||
1063 | if (x_inner != 0) { |
||
1064 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; |
||
1065 | score_dep |= MB_SCOREBOARD_A; |
||
1066 | } |
||
1067 | if (y_inner != mb_row) { |
||
1068 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; |
||
1069 | score_dep |= MB_SCOREBOARD_B; |
||
1070 | if (x_inner != 0) |
||
1071 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; |
||
1072 | |||
1073 | if (x_inner != (mb_width -1)) { |
||
1074 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; |
||
1075 | score_dep |= MB_SCOREBOARD_C; |
||
1076 | } |
||
1077 | } |
||
1078 | |||
1079 | *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); |
||
1080 | *command_ptr++ = kernel; |
||
1081 | *command_ptr++ = USE_SCOREBOARD; |
||
1082 | /* Indirect data */ |
||
1083 | *command_ptr++ = 0; |
||
1084 | /* the (X, Y) term of scoreboard */ |
||
1085 | *command_ptr++ = ((y_inner << 16) | x_inner); |
||
1086 | *command_ptr++ = score_dep; |
||
1087 | /*inline data */ |
||
1088 | *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); |
||
1089 | *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); |
||
1090 | |||
1091 | x_inner -= 2; |
||
1092 | y_inner += 1; |
||
1093 | } |
||
1094 | x_outer++; |
||
1095 | if (x_outer >= mb_width) { |
||
1096 | y_outer += 1; |
||
1097 | x_outer = xtemp_outer; |
||
1098 | } |
||
1099 | } |
||
1100 | } |
||
1101 | |||
1102 | *command_ptr++ = 0; |
||
1103 | *command_ptr++ = MI_BATCH_BUFFER_END; |
||
1104 | |||
1105 | dri_bo_unmap(vme_context->vme_batchbuffer.bo); |
||
1106 | } |
||
1107 | |||
1108 | static uint8_t |
||
1109 | intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id) |
||
1110 | { |
||
1111 | unsigned int is_long_term = |
||
1112 | !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE); |
||
1113 | unsigned int is_top_field = |
||
1114 | !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD); |
||
1115 | unsigned int is_bottom_field = |
||
1116 | !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD); |
||
1117 | |||
1118 | return ((is_long_term << 6) | |
||
1119 | ((is_top_field ^ is_bottom_field ^ 1) << 5) | |
||
1120 | (frame_store_id << 1) | |
||
1121 | ((is_top_field ^ 1) & is_bottom_field)); |
||
1122 | } |
||
1123 | |||
1124 | void |
||
1125 | intel_mfc_avc_ref_idx_state(VADriverContextP ctx, |
||
1126 | struct encode_state *encode_state, |
||
1127 | struct intel_encoder_context *encoder_context) |
||
1128 | { |
||
1129 | struct gen6_vme_context *vme_context = encoder_context->vme_context; |
||
1130 | struct intel_batchbuffer *batch = encoder_context->base.batch; |
||
1131 | int slice_type; |
||
1132 | struct object_surface *obj_surface; |
||
1133 | unsigned int fref_entry, bref_entry; |
||
1134 | int frame_index, i; |
||
1135 | VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; |
||
1136 | |||
1137 | fref_entry = 0x80808080; |
||
1138 | bref_entry = 0x80808080; |
||
1139 | slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); |
||
1140 | |||
1141 | if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) { |
||
1142 | int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff); |
||
1143 | |||
1144 | if (ref_idx_l0 > 3) { |
||
1145 | WARN_ONCE("ref_idx_l0 is out of range\n"); |
||
1146 | ref_idx_l0 = 0; |
||
1147 | } |
||
1148 | |||
1149 | obj_surface = vme_context->used_reference_objects[0]; |
||
1150 | frame_index = -1; |
||
1151 | for (i = 0; i < 16; i++) { |
||
1152 | if (obj_surface && |
||
1153 | obj_surface == encode_state->reference_objects[i]) { |
||
1154 | frame_index = i; |
||
1155 | break; |
||
1156 | } |
||
1157 | } |
||
1158 | if (frame_index == -1) { |
||
1159 | WARN_ONCE("RefPicList0 is not found in DPB!\n"); |
||
1160 | } else { |
||
1161 | int ref_idx_l0_shift = ref_idx_l0 * 8; |
||
1162 | fref_entry &= ~(0xFF << ref_idx_l0_shift); |
||
1163 | fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift); |
||
1164 | } |
||
1165 | } |
||
1166 | |||
1167 | if (slice_type == SLICE_TYPE_B) { |
||
1168 | int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff); |
||
1169 | |||
1170 | if (ref_idx_l1 > 3) { |
||
1171 | WARN_ONCE("ref_idx_l1 is out of range\n"); |
||
1172 | ref_idx_l1 = 0; |
||
1173 | } |
||
1174 | |||
1175 | obj_surface = vme_context->used_reference_objects[1]; |
||
1176 | frame_index = -1; |
||
1177 | for (i = 0; i < 16; i++) { |
||
1178 | if (obj_surface && |
||
1179 | obj_surface == encode_state->reference_objects[i]) { |
||
1180 | frame_index = i; |
||
1181 | break; |
||
1182 | } |
||
1183 | } |
||
1184 | if (frame_index == -1) { |
||
1185 | WARN_ONCE("RefPicList1 is not found in DPB!\n"); |
||
1186 | } else { |
||
1187 | int ref_idx_l1_shift = ref_idx_l1 * 8; |
||
1188 | bref_entry &= ~(0xFF << ref_idx_l1_shift); |
||
1189 | bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift); |
||
1190 | } |
||
1191 | } |
||
1192 | |||
1193 | BEGIN_BCS_BATCH(batch, 10); |
||
1194 | OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); |
||
1195 | OUT_BCS_BATCH(batch, 0); //Select L0 |
||
1196 | OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference |
||
1197 | for(i = 0; i < 7; i++) { |
||
1198 | OUT_BCS_BATCH(batch, 0x80808080); |
||
1199 | } |
||
1200 | ADVANCE_BCS_BATCH(batch); |
||
1201 | |||
1202 | BEGIN_BCS_BATCH(batch, 10); |
||
1203 | OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); |
||
1204 | OUT_BCS_BATCH(batch, 1); //Select L1 |
||
1205 | OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference |
||
1206 | for(i = 0; i < 7; i++) { |
||
1207 | OUT_BCS_BATCH(batch, 0x80808080); |
||
1208 | } |
||
1209 | ADVANCE_BCS_BATCH(batch); |
||
1210 | } |
||
1211 | |||
1212 | |||
1213 | void intel_vme_mpeg2_state_setup(VADriverContextP ctx, |
||
1214 | struct encode_state *encode_state, |
||
1215 | struct intel_encoder_context *encoder_context) |
||
1216 | { |
||
1217 | struct gen6_vme_context *vme_context = encoder_context->vme_context; |
||
1218 | uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message); |
||
1219 | VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; |
||
1220 | int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16; |
||
1221 | int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16; |
||
1222 | uint32_t mv_x, mv_y; |
||
1223 | VAEncSliceParameterBufferMPEG2 *slice_param = NULL; |
||
1224 | VAEncPictureParameterBufferMPEG2 *pic_param = NULL; |
||
1225 | slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer; |
||
1226 | |||
1227 | if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) { |
||
1228 | mv_x = 512; |
||
1229 | mv_y = 64; |
||
1230 | } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) { |
||
1231 | mv_x = 1024; |
||
1232 | mv_y = 128; |
||
1233 | } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) { |
||
1234 | mv_x = 2048; |
||
1235 | mv_y = 128; |
||
1236 | } else { |
||
1237 | WARN_ONCE("Incorrect Mpeg2 level setting!\n"); |
||
1238 | mv_x = 512; |
||
1239 | mv_y = 64; |
||
1240 | } |
||
1241 | |||
1242 | pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer; |
||
1243 | if (pic_param->picture_type != VAEncPictureTypeIntra) { |
||
1244 | int qp, m_cost, j, mv_count; |
||
1245 | float lambda, m_costf; |
||
1246 | slice_param = (VAEncSliceParameterBufferMPEG2 *) |
||
1247 | encode_state->slice_params_ext[0]->buffer; |
||
1248 | qp = slice_param->quantiser_scale_code; |
||
1249 | lambda = intel_lambda_qp(qp); |
||
1250 | /* No Intra prediction. So it is zero */ |
||
1251 | vme_state_message[MODE_INTRA_8X8] = 0; |
||
1252 | vme_state_message[MODE_INTRA_4X4] = 0; |
||
1253 | vme_state_message[MODE_INTER_MV0] = 0; |
||
1254 | for (j = 1; j < 3; j++) { |
||
1255 | m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; |
||
1256 | m_cost = (int)m_costf; |
||
1257 | vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f); |
||
1258 | } |
||
1259 | mv_count = 3; |
||
1260 | for (j = 4; j <= 64; j *= 2) { |
||
1261 | m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; |
||
1262 | m_cost = (int)m_costf; |
||
1263 | vme_state_message[MODE_INTER_MV0 + mv_count] = |
||
1264 | intel_format_lutvalue(m_cost, 0x6f); |
||
1265 | mv_count++; |
||
1266 | } |
||
1267 | m_cost = lambda; |
||
1268 | /* It can only perform the 16x16 search. So mode cost can be ignored for |
||
1269 | * the other mode. for example: 16x8/8x8 |
||
1270 | */ |
||
1271 | vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f); |
||
1272 | vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); |
||
1273 | |||
1274 | vme_state_message[MODE_INTER_16X8] = 0; |
||
1275 | vme_state_message[MODE_INTER_8X8] = 0; |
||
1276 | vme_state_message[MODE_INTER_8X4] = 0; |
||
1277 | vme_state_message[MODE_INTER_4X4] = 0; |
||
1278 | vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); |
||
1279 | |||
1280 | } |
||
1281 | vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x); |
||
1282 | |||
1283 | vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) | |
||
1284 | width_in_mbs; |
||
1285 | } |
||
1286 | |||
1287 | void |
||
1288 | gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, |
||
1289 | struct encode_state *encode_state, |
||
1290 | int mb_width, int mb_height, |
||
1291 | int kernel, |
||
1292 | struct intel_encoder_context *encoder_context) |
||
1293 | { |
||
1294 | struct gen6_vme_context *vme_context = encoder_context->vme_context; |
||
1295 | unsigned int *command_ptr; |
||
1296 | |||
1297 | #define MPEG2_SCOREBOARD (1 << 21) |
||
1298 | |||
1299 | dri_bo_map(vme_context->vme_batchbuffer.bo, 1); |
||
1300 | command_ptr = vme_context->vme_batchbuffer.bo->virtual; |
||
1301 | |||
1302 | { |
||
1303 | unsigned int mb_intra_ub, score_dep; |
||
1304 | int x_outer, y_outer, x_inner, y_inner; |
||
1305 | int xtemp_outer = 0; |
||
1306 | int first_mb = 0; |
||
1307 | int num_mb = mb_width * mb_height; |
||
1308 | |||
1309 | x_outer = 0; |
||
1310 | y_outer = 0; |
||
1311 | |||
1312 | |||
1313 | for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { |
||
1314 | x_inner = x_outer; |
||
1315 | y_inner = y_outer; |
||
1316 | for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { |
||
1317 | mb_intra_ub = 0; |
||
1318 | score_dep = 0; |
||
1319 | if (x_inner != 0) { |
||
1320 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; |
||
1321 | score_dep |= MB_SCOREBOARD_A; |
||
1322 | } |
||
1323 | if (y_inner != 0) { |
||
1324 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; |
||
1325 | score_dep |= MB_SCOREBOARD_B; |
||
1326 | |||
1327 | if (x_inner != 0) |
||
1328 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; |
||
1329 | |||
1330 | if (x_inner != (mb_width -1)) { |
||
1331 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; |
||
1332 | score_dep |= MB_SCOREBOARD_C; |
||
1333 | } |
||
1334 | } |
||
1335 | |||
1336 | *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); |
||
1337 | *command_ptr++ = kernel; |
||
1338 | *command_ptr++ = MPEG2_SCOREBOARD; |
||
1339 | /* Indirect data */ |
||
1340 | *command_ptr++ = 0; |
||
1341 | /* the (X, Y) term of scoreboard */ |
||
1342 | *command_ptr++ = ((y_inner << 16) | x_inner); |
||
1343 | *command_ptr++ = score_dep; |
||
1344 | /*inline data */ |
||
1345 | *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); |
||
1346 | *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); |
||
1347 | x_inner -= 2; |
||
1348 | y_inner += 1; |
||
1349 | } |
||
1350 | x_outer += 1; |
||
1351 | } |
||
1352 | |||
1353 | xtemp_outer = mb_width - 2; |
||
1354 | if (xtemp_outer < 0) |
||
1355 | xtemp_outer = 0; |
||
1356 | x_outer = xtemp_outer; |
||
1357 | y_outer = 0; |
||
1358 | for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { |
||
1359 | y_inner = y_outer; |
||
1360 | x_inner = x_outer; |
||
1361 | for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) { |
||
1362 | mb_intra_ub = 0; |
||
1363 | score_dep = 0; |
||
1364 | if (x_inner != 0) { |
||
1365 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; |
||
1366 | score_dep |= MB_SCOREBOARD_A; |
||
1367 | } |
||
1368 | if (y_inner != 0) { |
||
1369 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; |
||
1370 | score_dep |= MB_SCOREBOARD_B; |
||
1371 | |||
1372 | if (x_inner != 0) |
||
1373 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; |
||
1374 | |||
1375 | if (x_inner != (mb_width -1)) { |
||
1376 | mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; |
||
1377 | score_dep |= MB_SCOREBOARD_C; |
||
1378 | } |
||
1379 | } |
||
1380 | |||
1381 | *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); |
||
1382 | *command_ptr++ = kernel; |
||
1383 | *command_ptr++ = MPEG2_SCOREBOARD; |
||
1384 | /* Indirect data */ |
||
1385 | *command_ptr++ = 0; |
||
1386 | /* the (X, Y) term of scoreboard */ |
||
1387 | *command_ptr++ = ((y_inner << 16) | x_inner); |
||
1388 | *command_ptr++ = score_dep; |
||
1389 | /*inline data */ |
||
1390 | *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); |
||
1391 | *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8)); |
||
1392 | |||
1393 | x_inner -= 2; |
||
1394 | y_inner += 1; |
||
1395 | } |
||
1396 | x_outer++; |
||
1397 | if (x_outer >= mb_width) { |
||
1398 | y_outer += 1; |
||
1399 | x_outer = xtemp_outer; |
||
1400 | } |
||
1401 | } |
||
1402 | } |
||
1403 | |||
1404 | *command_ptr++ = 0; |
||
1405 | *command_ptr++ = MI_BATCH_BUFFER_END; |
||
1406 | |||
1407 | dri_bo_unmap(vme_context->vme_batchbuffer.bo); |
||
1408 | return; |
||
1409 | } |
||
1410 | |||
1411 | static int |
||
1412 | avc_temporal_find_surface(VAPictureH264 *curr_pic, |
||
1413 | VAPictureH264 *ref_list, |
||
1414 | int num_pictures, |
||
1415 | int dir) |
||
1416 | { |
||
1417 | int i, found = -1, min = 0x7FFFFFFF; |
||
1418 | |||
1419 | for (i = 0; i < num_pictures; i++) { |
||
1420 | int tmp; |
||
1421 | |||
1422 | if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) || |
||
1423 | (ref_list[i].picture_id == VA_INVALID_SURFACE)) |
||
1424 | break; |
||
1425 | |||
1426 | tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt; |
||
1427 | |||
1428 | if (dir) |
||
1429 | tmp = -tmp; |
||
1430 | |||
1431 | if (tmp > 0 && tmp < min) { |
||
1432 | min = tmp; |
||
1433 | found = i; |
||
1434 | } |
||
1435 | } |
||
1436 | |||
1437 | return found; |
||
1438 | } |
||
1439 | |||
1440 | void |
||
1441 | intel_avc_vme_reference_state(VADriverContextP ctx, |
||
1442 | struct encode_state *encode_state, |
||
1443 | struct intel_encoder_context *encoder_context, |
||
1444 | int list_index, |
||
1445 | int surface_index, |
||
1446 | void (* vme_source_surface_state)( |
||
1447 | VADriverContextP ctx, |
||
1448 | int index, |
||
1449 | struct object_surface *obj_surface, |
||
1450 | struct intel_encoder_context *encoder_context)) |
||
1451 | { |
||
1452 | struct gen6_vme_context *vme_context = encoder_context->vme_context; |
||
1453 | struct object_surface *obj_surface = NULL; |
||
1454 | struct i965_driver_data *i965 = i965_driver_data(ctx); |
||
1455 | VASurfaceID ref_surface_id; |
||
1456 | VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; |
||
1457 | VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; |
||
1458 | int max_num_references; |
||
1459 | VAPictureH264 *curr_pic; |
||
1460 | VAPictureH264 *ref_list; |
||
1461 | int ref_idx; |
||
1462 | |||
1463 | if (list_index == 0) { |
||
1464 | max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1; |
||
1465 | ref_list = slice_param->RefPicList0; |
||
1466 | } else { |
||
1467 | max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1; |
||
1468 | ref_list = slice_param->RefPicList1; |
||
1469 | } |
||
1470 | |||
1471 | if (max_num_references == 1) { |
||
1472 | if (list_index == 0) { |
||
1473 | ref_surface_id = slice_param->RefPicList0[0].picture_id; |
||
1474 | vme_context->used_references[0] = &slice_param->RefPicList0[0]; |
||
1475 | } else { |
||
1476 | ref_surface_id = slice_param->RefPicList1[0].picture_id; |
||
1477 | vme_context->used_references[1] = &slice_param->RefPicList1[0]; |
||
1478 | } |
||
1479 | |||
1480 | if (ref_surface_id != VA_INVALID_SURFACE) |
||
1481 | obj_surface = SURFACE(ref_surface_id); |
||
1482 | |||
1483 | if (!obj_surface || |
||
1484 | !obj_surface->bo) { |
||
1485 | obj_surface = encode_state->reference_objects[list_index]; |
||
1486 | vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index]; |
||
1487 | } |
||
1488 | |||
1489 | ref_idx = 0; |
||
1490 | } else { |
||
1491 | curr_pic = &pic_param->CurrPic; |
||
1492 | |||
1493 | /* select the reference frame in temporal space */ |
||
1494 | ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1); |
||
1495 | ref_surface_id = ref_list[ref_idx].picture_id; |
||
1496 | |||
1497 | if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */ |
||
1498 | obj_surface = SURFACE(ref_surface_id); |
||
1499 | |||
1500 | vme_context->used_reference_objects[list_index] = obj_surface; |
||
1501 | vme_context->used_references[list_index] = &ref_list[ref_idx]; |
||
1502 | } |
||
1503 | |||
1504 | if (obj_surface && |
||
1505 | obj_surface->bo) { |
||
1506 | assert(ref_idx >= 0); |
||
1507 | vme_context->used_reference_objects[list_index] = obj_surface; |
||
1508 | vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context); |
||
1509 | vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 | |
||
1510 | ref_idx << 16 | |
||
1511 | ref_idx << 8 | |
||
1512 | ref_idx); |
||
1513 | } else { |
||
1514 | vme_context->used_reference_objects[list_index] = NULL; |
||
1515 | vme_context->used_references[list_index] = NULL; |
||
1516 | vme_context->ref_index_in_mb[list_index] = 0; |
||
1517 | } |
||
1518 | } |
||
1519 | |||
1520 | void intel_avc_slice_insert_packed_data(VADriverContextP ctx, |
||
1521 | struct encode_state *encode_state, |
||
1522 | struct intel_encoder_context *encoder_context, |
||
1523 | int slice_index, |
||
1524 | struct intel_batchbuffer *slice_batch) |
||
1525 | { |
||
1526 | int count, i, start_index; |
||
1527 | unsigned int length_in_bits; |
||
1528 | VAEncPackedHeaderParameterBuffer *param = NULL; |
||
1529 | unsigned int *header_data = NULL; |
||
1530 | struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; |
||
1531 | int slice_header_index; |
||
1532 | |||
1533 | if (encode_state->slice_header_index[slice_index] == 0) |
||
1534 | slice_header_index = -1; |
||
1535 | else |
||
1536 | slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); |
||
1537 | |||
1538 | count = encode_state->slice_rawdata_count[slice_index]; |
||
1539 | start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK); |
||
1540 | |||
1541 | for (i = 0; i < count; i++) { |
||
1542 | unsigned int skip_emul_byte_cnt; |
||
1543 | |||
1544 | header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer; |
||
1545 | |||
1546 | param = (VAEncPackedHeaderParameterBuffer *) |
||
1547 | (encode_state->packed_header_params_ext[start_index + i]->buffer); |
||
1548 | |||
1549 | /* skip the slice header packed data type as it is lastly inserted */ |
||
1550 | if (param->type == VAEncPackedHeaderSlice) |
||
1551 | continue; |
||
1552 | |||
1553 | length_in_bits = param->bit_length; |
||
1554 | |||
1555 | skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); |
||
1556 | |||
1557 | /* as the slice header is still required, the last header flag is set to |
||
1558 | * zero. |
||
1559 | */ |
||
1560 | mfc_context->insert_object(ctx, |
||
1561 | encoder_context, |
||
1562 | header_data, |
||
1563 | ALIGN(length_in_bits, 32) >> 5, |
||
1564 | length_in_bits & 0x1f, |
||
1565 | skip_emul_byte_cnt, |
||
1566 | 0, |
||
1567 | 0, |
||
1568 | !param->has_emulation_bytes, |
||
1569 | slice_batch); |
||
1570 | } |
||
1571 | |||
1572 | if (slice_header_index == -1) { |
||
1573 | unsigned char *slice_header = NULL; |
||
1574 | int slice_header_length_in_bits = 0; |
||
1575 | VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; |
||
1576 | VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; |
||
1577 | VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; |
||
1578 | |||
1579 | /* No slice header data is passed. And the driver needs to generate it */ |
||
1580 | /* For the Normal H264 */ |
||
1581 | slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, |
||
1582 | pPicParameter, |
||
1583 | pSliceParameter, |
||
1584 | &slice_header); |
||
1585 | mfc_context->insert_object(ctx, encoder_context, |
||
1586 | (unsigned int *)slice_header, |
||
1587 | ALIGN(slice_header_length_in_bits, 32) >> 5, |
||
1588 | slice_header_length_in_bits & 0x1f, |
||
1589 | 5, /* first 5 bytes are start code + nal unit type */ |
||
1590 | 1, 0, 1, slice_batch); |
||
1591 | |||
1592 | free(slice_header); |
||
1593 | } else { |
||
1594 | unsigned int skip_emul_byte_cnt; |
||
1595 | |||
1596 | header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer; |
||
1597 | |||
1598 | param = (VAEncPackedHeaderParameterBuffer *) |
||
1599 | (encode_state->packed_header_params_ext[slice_header_index]->buffer); |
||
1600 | length_in_bits = param->bit_length; |
||
1601 | |||
1602 | /* as the slice header is the last header data for one slice, |
||
1603 | * the last header flag is set to one. |
||
1604 | */ |
||
1605 | skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits); |
||
1606 | |||
1607 | mfc_context->insert_object(ctx, |
||
1608 | encoder_context, |
||
1609 | header_data, |
||
1610 | ALIGN(length_in_bits, 32) >> 5, |
||
1611 | length_in_bits & 0x1f, |
||
1612 | skip_emul_byte_cnt, |
||
1613 | 1, |
||
1614 | 0, |
||
1615 | !param->has_emulation_bytes, |
||
1616 | slice_batch); |
||
1617 | } |
||
1618 | |||
1619 | return; |
||
1620 | }>><>><>><>>>><>><>><>><>><>><>>><>><>><>><>><>><>>><>><>><>=>>>>><>><>>><>><>>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>>>><>>>>><>><>><>=>=>>=>>><>><>><>>><>><>=>>=>>>>>>>>=>>>>>>>=>=>>>4)/2; |