Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2013 Ilia Mirkin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #include "nv50/nv84_video.h" |
||
24 | |||
25 | struct iparm { |
||
26 | struct iseqparm { |
||
27 | uint32_t chroma_format_idc; // 00 |
||
28 | uint32_t pad[(0x128 - 0x4) / 4]; |
||
29 | uint32_t log2_max_frame_num_minus4; // 128 |
||
30 | uint32_t pic_order_cnt_type; // 12c |
||
31 | uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130 |
||
32 | uint32_t delta_pic_order_always_zero_flag; // 134 |
||
33 | uint32_t num_ref_frames; // 138 |
||
34 | uint32_t pic_width_in_mbs_minus1; // 13c |
||
35 | uint32_t pic_height_in_map_units_minus1; // 140 |
||
36 | uint32_t frame_mbs_only_flag; // 144 |
||
37 | uint32_t mb_adaptive_frame_field_flag; // 148 |
||
38 | uint32_t direct_8x8_inference_flag; // 14c |
||
39 | } iseqparm; // 000 |
||
40 | struct ipicparm { |
||
41 | uint32_t entropy_coding_mode_flag; // 00 |
||
42 | uint32_t pic_order_present_flag; // 04 |
||
43 | uint32_t num_slice_groups_minus1; // 08 |
||
44 | uint32_t slice_group_map_type; // 0c |
||
45 | uint32_t pad1[0x60 / 4]; |
||
46 | uint32_t u70; // 70 |
||
47 | uint32_t u74; // 74 |
||
48 | uint32_t u78; // 78 |
||
49 | uint32_t num_ref_idx_l0_active_minus1; // 7c |
||
50 | uint32_t num_ref_idx_l1_active_minus1; // 80 |
||
51 | uint32_t weighted_pred_flag; // 84 |
||
52 | uint32_t weighted_bipred_idc; // 88 |
||
53 | uint32_t pic_init_qp_minus26; // 8c |
||
54 | uint32_t chroma_qp_index_offset; // 90 |
||
55 | uint32_t deblocking_filter_control_present_flag; // 94 |
||
56 | uint32_t constrained_intra_pred_flag; // 98 |
||
57 | uint32_t redundant_pic_cnt_present_flag; // 9c |
||
58 | uint32_t transform_8x8_mode_flag; // a0 |
||
59 | uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4]; |
||
60 | uint32_t second_chroma_qp_index_offset; // 1c8 |
||
61 | uint32_t u1cc; // 1cc |
||
62 | uint32_t curr_pic_order_cnt; // 1d0 |
||
63 | uint32_t field_order_cnt[2]; // 1d4 |
||
64 | uint32_t curr_mvidx; // 1dc |
||
65 | struct iref { |
||
66 | uint32_t u00; // 00 |
||
67 | uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom |
||
68 | uint8_t is_long_term; // 08 |
||
69 | uint8_t non_existing; // 09 |
||
70 | uint8_t u0a; // 0a |
||
71 | uint8_t u0b; // 0b |
||
72 | uint32_t frame_idx; // 0c |
||
73 | uint32_t field_order_cnt[2]; // 10 |
||
74 | uint32_t mvidx; // 18 |
||
75 | uint8_t field_pic_flag; // 1c |
||
76 | uint8_t u1d; // 1d |
||
77 | uint8_t u1e; // 1e |
||
78 | uint8_t u1f; // 1f |
||
79 | // 20 |
||
80 | } refs[0x10]; // 1e0 |
||
81 | } ipicparm; // 150 |
||
82 | }; |
||
83 | |||
84 | int |
||
85 | nv84_decoder_bsp(struct nv84_decoder *dec, |
||
86 | struct pipe_h264_picture_desc *desc, |
||
87 | unsigned num_buffers, |
||
88 | const void *const *data, |
||
89 | const unsigned *num_bytes, |
||
90 | struct nv84_video_buffer *dest) |
||
91 | { |
||
92 | struct iparm params; |
||
93 | uint32_t more_params[0x44 / 4] = {0}; |
||
94 | unsigned total_bytes = 0; |
||
95 | int i; |
||
96 | static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0}; |
||
97 | char indexes[17] = {0}; |
||
98 | struct nouveau_pushbuf *push = dec->bsp_pushbuf; |
||
99 | struct nouveau_pushbuf_refn bo_refs[] = { |
||
100 | { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, |
||
101 | { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, |
||
102 | { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, |
||
103 | { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, |
||
104 | }; |
||
105 | |||
106 | nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client); |
||
107 | |||
108 | STATIC_ASSERT(sizeof(struct iparm) == 0x530); |
||
109 | |||
110 | memset(¶ms, 0, sizeof(params)); |
||
111 | |||
112 | dest->frame_num = dest->frame_num_max = desc->frame_num; |
||
113 | |||
114 | for (i = 0; i < 16; i++) { |
||
115 | struct iref *ref = ¶ms.ipicparm.refs[i]; |
||
116 | struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i]; |
||
117 | if (!frame) break; |
||
118 | /* The frame index is relative to the last IDR frame. So once the frame |
||
119 | * num goes back to 0, previous reference frames need to have a negative |
||
120 | * index. |
||
121 | */ |
||
122 | if (desc->frame_num >= frame->frame_num_max) { |
||
123 | frame->frame_num_max = desc->frame_num; |
||
124 | } else { |
||
125 | frame->frame_num -= frame->frame_num_max + 1; |
||
126 | frame->frame_num_max = desc->frame_num; |
||
127 | } |
||
128 | ref->non_existing = 0; |
||
129 | ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) | |
||
130 | (desc->bottom_is_reference[i] ? 2 : 0); |
||
131 | ref->is_long_term = desc->is_long_term[i]; |
||
132 | ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0]; |
||
133 | ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1]; |
||
134 | ref->frame_idx = frame->frame_num; |
||
135 | ref->u00 = ref->mvidx = frame->mvidx; |
||
136 | ref->field_pic_flag = desc->field_pic_flag; |
||
137 | indexes[frame->mvidx] = 1; |
||
138 | } |
||
139 | |||
140 | /* Needs to be adjusted if we ever support non-4:2:0 videos */ |
||
141 | params.iseqparm.chroma_format_idc = 1; |
||
142 | |||
143 | params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1; |
||
144 | if (desc->field_pic_flag || desc->pps->sps->mb_adaptive_frame_field_flag) |
||
145 | params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1; |
||
146 | else |
||
147 | params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1; |
||
148 | |||
149 | if (desc->bottom_field_flag) |
||
150 | params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1]; |
||
151 | else |
||
152 | params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0]; |
||
153 | params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0]; |
||
154 | params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1]; |
||
155 | if (desc->is_reference) { |
||
156 | if (dest->mvidx < 0) { |
||
157 | for (i = 0; i < desc->num_ref_frames + 1; i++) { |
||
158 | if (!indexes[i]) { |
||
159 | dest->mvidx = i; |
||
160 | break; |
||
161 | } |
||
162 | } |
||
163 | assert(i != desc->num_ref_frames + 1); |
||
164 | } |
||
165 | |||
166 | params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx; |
||
167 | } |
||
168 | |||
169 | params.iseqparm.num_ref_frames = desc->num_ref_frames; |
||
170 | params.iseqparm.mb_adaptive_frame_field_flag = desc->pps->sps->mb_adaptive_frame_field_flag; |
||
171 | params.ipicparm.constrained_intra_pred_flag = desc->pps->constrained_intra_pred_flag; |
||
172 | params.ipicparm.weighted_pred_flag = desc->pps->weighted_pred_flag; |
||
173 | params.ipicparm.weighted_bipred_idc = desc->pps->weighted_bipred_idc; |
||
174 | params.iseqparm.frame_mbs_only_flag = desc->pps->sps->frame_mbs_only_flag; |
||
175 | params.ipicparm.transform_8x8_mode_flag = desc->pps->transform_8x8_mode_flag; |
||
176 | params.ipicparm.chroma_qp_index_offset = desc->pps->chroma_qp_index_offset; |
||
177 | params.ipicparm.second_chroma_qp_index_offset = desc->pps->second_chroma_qp_index_offset; |
||
178 | params.ipicparm.pic_init_qp_minus26 = desc->pps->pic_init_qp_minus26; |
||
179 | params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1; |
||
180 | params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1; |
||
181 | params.iseqparm.log2_max_frame_num_minus4 = desc->pps->sps->log2_max_frame_num_minus4; |
||
182 | params.iseqparm.pic_order_cnt_type = desc->pps->sps->pic_order_cnt_type; |
||
183 | params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->pps->sps->log2_max_pic_order_cnt_lsb_minus4; |
||
184 | params.iseqparm.delta_pic_order_always_zero_flag = desc->pps->sps->delta_pic_order_always_zero_flag; |
||
185 | params.iseqparm.direct_8x8_inference_flag = desc->pps->sps->direct_8x8_inference_flag; |
||
186 | params.ipicparm.entropy_coding_mode_flag = desc->pps->entropy_coding_mode_flag; |
||
187 | params.ipicparm.pic_order_present_flag = desc->pps->bottom_field_pic_order_in_frame_present_flag; |
||
188 | params.ipicparm.deblocking_filter_control_present_flag = desc->pps->deblocking_filter_control_present_flag; |
||
189 | params.ipicparm.redundant_pic_cnt_present_flag = desc->pps->redundant_pic_cnt_present_flag; |
||
190 | |||
191 | memcpy(dec->bitstream->map, ¶ms, sizeof(params)); |
||
192 | for (i = 0; i < num_buffers; i++) { |
||
193 | assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700); |
||
194 | memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]); |
||
195 | total_bytes += num_bytes[i]; |
||
196 | } |
||
197 | memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end)); |
||
198 | total_bytes += sizeof(end); |
||
199 | more_params[1] = total_bytes; |
||
200 | memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params)); |
||
201 | |||
202 | PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2); |
||
203 | nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0])); |
||
204 | |||
205 | /* Wait for the fence = 1 */ |
||
206 | BEGIN_NV04(push, SUBC_BSP(0x10), 4); |
||
207 | PUSH_DATAh(push, dec->fence->offset); |
||
208 | PUSH_DATA (push, dec->fence->offset); |
||
209 | PUSH_DATA (push, 1); |
||
210 | PUSH_DATA (push, 1); |
||
211 | |||
212 | /* TODO: Use both halves of bitstream/vpring for alternating frames */ |
||
213 | |||
214 | /* Kick off the BSP */ |
||
215 | BEGIN_NV04(push, SUBC_BSP(0x400), 20); |
||
216 | PUSH_DATA (push, dec->bitstream->offset >> 8); |
||
217 | PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7); |
||
218 | PUSH_DATA (push, dec->bitstream->size / 2 - 0x700); |
||
219 | PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6); |
||
220 | PUSH_DATA (push, 1); |
||
221 | PUSH_DATA (push, dec->mbring->offset >> 8); |
||
222 | PUSH_DATA (push, dec->frame_size); |
||
223 | PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8); |
||
224 | PUSH_DATA (push, dec->vpring->offset >> 8); |
||
225 | PUSH_DATA (push, dec->vpring->size / 2); |
||
226 | PUSH_DATA (push, dec->vpring_residual); |
||
227 | PUSH_DATA (push, dec->vpring_ctrl); |
||
228 | PUSH_DATA (push, 0); |
||
229 | PUSH_DATA (push, dec->vpring_residual); |
||
230 | PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl); |
||
231 | PUSH_DATA (push, dec->vpring_deblock); |
||
232 | PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + |
||
233 | dec->vpring_residual + dec->vpring_deblock) >> 8); |
||
234 | PUSH_DATA (push, 0x654321); |
||
235 | PUSH_DATA (push, 0); |
||
236 | PUSH_DATA (push, 0x100008); |
||
237 | |||
238 | BEGIN_NV04(push, SUBC_BSP(0x620), 2); |
||
239 | PUSH_DATA (push, 0); |
||
240 | PUSH_DATA (push, 0); |
||
241 | |||
242 | BEGIN_NV04(push, SUBC_BSP(0x300), 1); |
||
243 | PUSH_DATA (push, 0); |
||
244 | |||
245 | /* Write fence = 2, intr */ |
||
246 | BEGIN_NV04(push, SUBC_BSP(0x610), 3); |
||
247 | PUSH_DATAh(push, dec->fence->offset); |
||
248 | PUSH_DATA (push, dec->fence->offset); |
||
249 | PUSH_DATA (push, 2); |
||
250 | |||
251 | BEGIN_NV04(push, SUBC_BSP(0x304), 1); |
||
252 | PUSH_DATA (push, 0x101); |
||
253 | PUSH_KICK (push); |
||
254 | return 0; |
||
255 | }>>>>> |