Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/**************************************************************************
2
 *
3
 * Copyright 2011 Advanced Micro Devices, Inc.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
/*
29
 * Authors:
30
 *	Christian König 
31
 *
32
 */
33
 
34
#include 
35
#include 
36
#include 
37
#include 
38
#include 
39
 
40
#include "pipe/p_video_decoder.h"
41
 
42
#include "util/u_memory.h"
43
#include "util/u_video.h"
44
 
45
#include "vl/vl_defines.h"
46
#include "vl/vl_mpeg12_decoder.h"
47
 
48
#include "../../winsys/radeon/drm/radeon_winsys.h"
49
#include "radeon_uvd.h"
50
 
51
#define RUVD_ERR(fmt, args...) \
52
	fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
53
 
54
#define NUM_BUFFERS 4
55
 
56
#define NUM_MPEG2_REFS 6
57
#define NUM_H264_REFS 17
58
 
59
/* UVD buffer representation */
60
struct ruvd_buffer
61
{
62
	struct pb_buffer*		buf;
63
	struct radeon_winsys_cs_handle*	cs_handle;
64
};
65
 
66
/* UVD decoder representation */
67
struct ruvd_decoder {
68
	struct pipe_video_decoder	base;
69
 
70
	ruvd_set_dtb			set_dtb;
71
 
72
	unsigned			stream_handle;
73
	unsigned			frame_number;
74
 
75
	struct radeon_winsys*		ws;
76
	struct radeon_winsys_cs*	cs;
77
 
78
	unsigned			cur_buffer;
79
 
80
	struct ruvd_buffer		msg_fb_buffers[NUM_BUFFERS];
81
	struct ruvd_buffer		bs_buffers[NUM_BUFFERS];
82
	void*				bs_ptr;
83
	unsigned			bs_size;
84
 
85
	struct ruvd_buffer		dpb;
86
};
87
 
88
/* generate an UVD stream handle */
89
static unsigned alloc_stream_handle()
90
{
91
	static unsigned counter = 0;
92
	unsigned stream_handle = 0;
93
	unsigned pid = getpid();
94
	int i;
95
 
96
	for (i = 0; i < 32; ++i)
97
		stream_handle |= ((pid >> i) & 1) << (31 - i);
98
 
99
	stream_handle ^= ++counter;
100
	return stream_handle;
101
}
102
 
103
/* flush IB to the hardware */
104
static void flush(struct ruvd_decoder *dec)
105
{
106
	uint32_t *pm4 =	dec->cs->buf;
107
 
108
	// align IB
109
	while(dec->cs->cdw % 16)
110
		pm4[dec->cs->cdw++] = RUVD_PKT2();
111
 
112
	dec->ws->cs_flush(dec->cs, 0, 0);
113
}
114
 
115
/* add a new set register command to the IB */
116
static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
117
{
118
	uint32_t *pm4 =	dec->cs->buf;
119
	pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0);
120
	pm4[dec->cs->cdw++] = val;
121
}
122
 
123
/* send a command to the VCPU through the GPCOM registers */
124
static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
125
		     struct radeon_winsys_cs_handle* cs_buf, uint32_t off,
126
		     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
127
{
128
	int reloc_idx;
129
 
130
	reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain);
131
	set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
132
	set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
133
	set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
134
}
135
 
136
/* send a message command to the VCPU */
137
static void send_msg(struct ruvd_decoder *dec, struct ruvd_msg *msg)
138
{
139
	struct ruvd_buffer* buf;
140
	void *ptr;
141
 
142
	/* grap a message buffer */
143
	buf = &dec->msg_fb_buffers[dec->cur_buffer];
144
 
145
	/* copy the message into it */
146
	ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE);
147
	if (!ptr)
148
		return;
149
 
150
	memcpy(ptr, msg, sizeof(*msg));
151
	memset(ptr + sizeof(*msg), 0, buf->buf->size - sizeof(*msg));
152
	dec->ws->buffer_unmap(buf->cs_handle);
153
 
154
	/* and send it to the hardware */
155
	send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->cs_handle, 0,
156
		 RADEON_USAGE_READ, RADEON_DOMAIN_VRAM);
157
}
158
 
159
/* create a buffer in the winsys */
160
static bool create_buffer(struct ruvd_decoder *dec,
161
			  struct ruvd_buffer *buffer,
162
			  unsigned size)
163
{
164
	buffer->buf = dec->ws->buffer_create(dec->ws, size, 4096, false,
165
					     RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM);
166
	if (!buffer->buf)
167
		return false;
168
 
169
	buffer->cs_handle = dec->ws->buffer_get_cs_handle(buffer->buf);
170
	if (!buffer->cs_handle)
171
		return false;
172
 
173
	return true;
174
}
175
 
176
/* destroy a buffer */
177
static void destroy_buffer(struct ruvd_buffer *buffer)
178
{
179
	pb_reference(&buffer->buf, NULL);
180
	buffer->cs_handle = NULL;
181
}
182
 
183
/* reallocate a buffer, preserving its content */
184
static bool resize_buffer(struct ruvd_decoder *dec,
185
			  struct ruvd_buffer *new_buf,
186
			  unsigned new_size)
187
{
188
	unsigned bytes = MIN2(new_buf->buf->size, new_size);
189
	struct ruvd_buffer old_buf = *new_buf;
190
	void *src = NULL, *dst = NULL;
191
 
192
	if (!create_buffer(dec, new_buf, new_size))
193
		goto error;
194
 
195
	src = dec->ws->buffer_map(old_buf.cs_handle, dec->cs, PIPE_TRANSFER_READ);
196
	if (!src)
197
		goto error;
198
 
199
	dst = dec->ws->buffer_map(new_buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE);
200
	if (!dst)
201
		goto error;
202
 
203
	memcpy(dst, src, bytes);
204
	if (new_size > bytes) {
205
		new_size -= bytes;
206
		dst += bytes;
207
		memset(dst, 0, new_size);
208
	}
209
	dec->ws->buffer_unmap(new_buf->cs_handle);
210
	dec->ws->buffer_unmap(old_buf.cs_handle);
211
	destroy_buffer(&old_buf);
212
	return true;
213
 
214
error:
215
	if (src) dec->ws->buffer_unmap(old_buf.cs_handle);
216
	destroy_buffer(new_buf);
217
	*new_buf = old_buf;
218
	return false;
219
}
220
 
221
/* clear the buffer with zeros */
222
static void clear_buffer(struct ruvd_decoder *dec,
223
			 struct ruvd_buffer* buffer)
224
{
225
	//TODO: let the GPU do the job
226
	void *ptr = dec->ws->buffer_map(buffer->cs_handle, dec->cs,
227
					PIPE_TRANSFER_WRITE);
228
	if (!ptr)
229
		return;
230
 
231
	memset(ptr, 0, buffer->buf->size);
232
	dec->ws->buffer_unmap(buffer->cs_handle);
233
}
234
 
235
/* cycle to the next set of buffers */
236
static void next_buffer(struct ruvd_decoder *dec)
237
{
238
	++dec->cur_buffer;
239
	dec->cur_buffer %= NUM_BUFFERS;
240
}
241
 
242
/* convert the profile into something UVD understands */
243
static uint32_t profile2stream_type(enum pipe_video_profile profile)
244
{
245
	switch (u_reduce_video_profile(profile)) {
246
	case PIPE_VIDEO_CODEC_MPEG4_AVC:
247
		return RUVD_CODEC_H264;
248
 
249
	case PIPE_VIDEO_CODEC_VC1:
250
		return RUVD_CODEC_VC1;
251
 
252
	case PIPE_VIDEO_CODEC_MPEG12:
253
		return RUVD_CODEC_MPEG2;
254
 
255
	case PIPE_VIDEO_CODEC_MPEG4:
256
		return RUVD_CODEC_MPEG4;
257
 
258
	default:
259
		assert(0);
260
		return 0;
261
	}
262
}
263
 
264
/* calculate size of reference picture buffer */
265
static unsigned calc_dpb_size(enum pipe_video_profile profile,
266
			      unsigned width, unsigned height,
267
			      unsigned max_references)
268
{
269
	unsigned width_in_mb, height_in_mb, image_size, dpb_size;
270
 
271
	// always align them to MB size for dpb calculation
272
	width = align(width, VL_MACROBLOCK_WIDTH);
273
	height = align(height, VL_MACROBLOCK_HEIGHT);
274
 
275
	// always one more for currently decoded picture
276
	max_references += 1;
277
 
278
	// aligned size of a single frame
279
	image_size = width * height;
280
	image_size += image_size / 2;
281
	image_size = align(image_size, 1024);
282
 
283
	// picture width & height in 16 pixel units
284
	width_in_mb = width / VL_MACROBLOCK_WIDTH;
285
	height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
286
 
287
	switch (u_reduce_video_profile(profile)) {
288
	case PIPE_VIDEO_CODEC_MPEG4_AVC:
289
		// the firmware seems to allways assume a minimum of ref frames
290
		max_references = MAX2(NUM_H264_REFS, max_references);
291
 
292
		// reference picture buffer
293
		dpb_size = image_size * max_references;
294
 
295
		// macroblock context buffer
296
		dpb_size += width_in_mb * height_in_mb * max_references * 192;
297
 
298
		// IT surface buffer
299
		dpb_size += width_in_mb * height_in_mb * 32;
300
		break;
301
 
302
	case PIPE_VIDEO_CODEC_VC1:
303
		// reference picture buffer
304
		dpb_size = image_size * max_references;
305
 
306
		// CONTEXT_BUFFER
307
		dpb_size += width_in_mb * height_in_mb * 128;
308
 
309
		// IT surface buffer
310
		dpb_size += width_in_mb * 64;
311
 
312
		// DB surface buffer
313
		dpb_size += width_in_mb * 128;
314
 
315
		// BP
316
		dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
317
		break;
318
 
319
	case PIPE_VIDEO_CODEC_MPEG12:
320
		// reference picture buffer, must be big enough for all frames
321
		dpb_size = image_size * NUM_MPEG2_REFS;
322
		break;
323
 
324
	case PIPE_VIDEO_CODEC_MPEG4:
325
		// reference picture buffer
326
		dpb_size = image_size * max_references;
327
 
328
		// CM
329
		dpb_size += width_in_mb * height_in_mb * 64;
330
 
331
		// IT surface buffer
332
		dpb_size += align(width_in_mb * height_in_mb * 32, 64);
333
		break;
334
 
335
	default:
336
		// something is missing here
337
		assert(0);
338
 
339
		// at least use a sane default value
340
		dpb_size = 32 * 1024 * 1024;
341
		break;
342
	}
343
	return dpb_size;
344
}
345
 
346
/* get h264 specific message bits */
347
static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
348
{
349
	struct ruvd_h264 result;
350
 
351
	memset(&result, 0, sizeof(result));
352
	switch (pic->base.profile) {
353
	case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
354
		result.profile = RUVD_H264_PROFILE_BASELINE;
355
		break;
356
 
357
	case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
358
		result.profile = RUVD_H264_PROFILE_MAIN;
359
		break;
360
 
361
	case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
362
		result.profile = RUVD_H264_PROFILE_HIGH;
363
		break;
364
 
365
	default:
366
		assert(0);
367
		break;
368
	}
369
	if (((dec->base.width * dec->base.height) >> 8) <= 1620)
370
		result.level = 30;
371
	else
372
		result.level = 41;
373
 
374
	result.sps_info_flags = 0;
375
	result.sps_info_flags |= pic->direct_8x8_inference_flag << 0;
376
	result.sps_info_flags |= pic->mb_adaptive_frame_field_flag << 1;
377
	result.sps_info_flags |= pic->frame_mbs_only_flag << 2;
378
	result.sps_info_flags |= pic->delta_pic_order_always_zero_flag << 3;
379
 
380
	result.pps_info_flags = 0;
381
	result.pps_info_flags |= pic->transform_8x8_mode_flag << 0;
382
	result.pps_info_flags |= pic->redundant_pic_cnt_present_flag << 1;
383
	result.pps_info_flags |= pic->constrained_intra_pred_flag << 2;
384
	result.pps_info_flags |= pic->deblocking_filter_control_present_flag << 3;
385
	result.pps_info_flags |= pic->weighted_bipred_idc << 4;
386
	result.pps_info_flags |= pic->weighted_pred_flag << 6;
387
	result.pps_info_flags |= pic->pic_order_present_flag << 7;
388
	result.pps_info_flags |= pic->entropy_coding_mode_flag << 8;
389
 
390
	result.chroma_format = 0x1;
391
	result.bit_depth_luma_minus8 = 0;
392
	result.bit_depth_chroma_minus8 = 0;
393
 
394
	result.log2_max_frame_num_minus4 = pic->log2_max_frame_num_minus4;
395
	result.pic_order_cnt_type = pic->pic_order_cnt_type;
396
	result.log2_max_pic_order_cnt_lsb_minus4 = pic->log2_max_pic_order_cnt_lsb_minus4;
397
	result.num_ref_frames = pic->num_ref_frames;
398
	result.pic_init_qp_minus26 = pic->pic_init_qp_minus26;
399
	result.chroma_qp_index_offset = pic->chroma_qp_index_offset;
400
	result.second_chroma_qp_index_offset = pic->second_chroma_qp_index_offset;
401
 
402
	result.num_slice_groups_minus1 = 0;
403
	result.slice_group_map_type = 0;
404
 
405
	result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
406
	result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
407
 
408
	result.slice_group_change_rate_minus1 = 0;
409
 
410
	memcpy(result.scaling_list_4x4, pic->scaling_lists_4x4, 6*64);
411
	memcpy(result.scaling_list_8x8, pic->scaling_lists_8x8, 2*64);
412
 
413
	result.frame_num = pic->frame_num;
414
	memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
415
	result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
416
	result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
417
	memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
418
 
419
	result.decoded_pic_idx = pic->frame_num;
420
 
421
	return result;
422
}
423
 
424
/* get vc1 specific message bits */
425
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
426
{
427
	struct ruvd_vc1 result;
428
 
429
	memset(&result, 0, sizeof(result));
430
	switch(pic->base.profile) {
431
	case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
432
		result.profile = RUVD_VC1_PROFILE_SIMPLE;
433
		break;
434
 
435
	case PIPE_VIDEO_PROFILE_VC1_MAIN:
436
		result.profile = RUVD_VC1_PROFILE_MAIN;
437
		break;
438
 
439
	case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
440
		result.profile = RUVD_VC1_PROFILE_ADVANCED;
441
		break;
442
	default:
443
		assert(0);
444
	}
445
 
446
	if (pic->base.profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) {
447
		result.level = 0;
448
 
449
		result.sps_info_flags |= pic->postprocflag << 7;
450
		result.sps_info_flags |= pic->pulldown << 6;
451
		result.sps_info_flags |= pic->interlace << 5;
452
		result.sps_info_flags |= pic->tfcntrflag << 4;
453
		result.sps_info_flags |= pic->psf << 1;
454
 
455
		result.pps_info_flags |= pic->panscan_flag << 7;
456
		result.pps_info_flags |= pic->refdist_flag << 6;
457
		result.pps_info_flags |= pic->extended_dmv << 8;
458
		result.pps_info_flags |= pic->range_mapy_flag << 31;
459
		result.pps_info_flags |= pic->range_mapy << 28;
460
		result.pps_info_flags |= pic->range_mapuv_flag << 27;
461
		result.pps_info_flags |= pic->range_mapuv << 24;
462
 
463
	} else {
464
		result.level = 0;
465
		result.pps_info_flags |= pic->multires << 21;
466
		result.pps_info_flags |= pic->syncmarker << 20;
467
		result.pps_info_flags |= pic->rangered << 19;
468
		result.pps_info_flags |= pic->maxbframes << 16;
469
	}
470
 
471
	result.sps_info_flags |= pic->finterpflag << 3;
472
	//(((unsigned int)(pPicParams->advance.reserved1))        << SPS_INFO_VC1_RESERVED_SHIFT)
473
 
474
	result.pps_info_flags |= pic->loopfilter << 5;
475
	result.pps_info_flags |= pic->fastuvmc << 4;
476
	result.pps_info_flags |= pic->extended_mv << 3;
477
	result.pps_info_flags |= pic->dquant << 1;
478
	result.pps_info_flags |= pic->vstransform << 0;
479
	result.pps_info_flags |= pic->overlap << 11;
480
	result.pps_info_flags |= pic->quantizer << 9;
481
 
482
 
483
#if 0
484
uint32_t 	slice_count
485
uint8_t 	picture_type
486
uint8_t 	frame_coding_mode
487
uint8_t 	deblockEnable
488
uint8_t 	pquant
489
#endif
490
 
491
        result.chroma_format  = 1;
492
	return result;
493
}
494
 
495
/* extract the frame number from a referenced video buffer */
496
static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
497
{
498
	uint32_t min = dec->frame_number - NUM_MPEG2_REFS;
499
	uint32_t max = dec->frame_number - 1;
500
	uintptr_t frame;
501
 
502
	/* seems to be the most sane fallback */
503
	if (!ref)
504
		return max;
505
 
506
	/* get the frame number from the associated data */
507
	frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
508
 
509
	/* limit the frame number to a valid range */
510
	return MAX2(MIN2(frame, max), min);
511
}
512
 
513
/* get mpeg2 specific msg bits */
514
static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
515
				       struct pipe_mpeg12_picture_desc *pic)
516
{
517
	const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
518
	struct ruvd_mpeg2 result;
519
	unsigned i;
520
 
521
	memset(&result, 0, sizeof(result));
522
	result.decoded_pic_idx = dec->frame_number;
523
	for (i = 0; i < 2; ++i)
524
		result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
525
 
526
	result.load_intra_quantiser_matrix = 1;
527
	result.load_nonintra_quantiser_matrix = 1;
528
 
529
	for (i = 0; i < 64; ++i) {
530
		result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
531
		result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
532
	}
533
 
534
	result.profile_and_level_indication = 0;
535
	result.chroma_format = 0x1;
536
 
537
	result.picture_coding_type = pic->picture_coding_type;
538
	result.f_code[0][0] = pic->f_code[0][0] + 1;
539
	result.f_code[0][1] = pic->f_code[0][1] + 1;
540
	result.f_code[1][0] = pic->f_code[1][0] + 1;
541
	result.f_code[1][1] = pic->f_code[1][1] + 1;
542
	result.intra_dc_precision = pic->intra_dc_precision;
543
	result.pic_structure = pic->picture_structure;
544
	result.top_field_first = pic->top_field_first;
545
	result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
546
	result.concealment_motion_vectors = pic->concealment_motion_vectors;
547
	result.q_scale_type = pic->q_scale_type;
548
	result.intra_vlc_format = pic->intra_vlc_format;
549
	result.alternate_scan = pic->alternate_scan;
550
 
551
	return result;
552
}
553
 
554
/* get mpeg4 specific msg bits */
555
static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
556
				       struct pipe_mpeg4_picture_desc *pic)
557
{
558
	struct ruvd_mpeg4 result;
559
	unsigned i;
560
 
561
	memset(&result, 0, sizeof(result));
562
	result.decoded_pic_idx = dec->frame_number;
563
	for (i = 0; i < 2; ++i)
564
		result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
565
 
566
	result.variant_type = 0;
567
	result.profile_and_level_indication = 0xF0; // ASP Level0
568
 
569
	result.video_object_layer_verid = 0x5; // advanced simple
570
	result.video_object_layer_shape = 0x0; // rectangular
571
 
572
	result.video_object_layer_width = dec->base.width;
573
	result.video_object_layer_height = dec->base.height;
574
 
575
	result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
576
 
577
	result.flags |= pic->short_video_header << 0;
578
	//result.flags |= obmc_disable << 1;
579
	result.flags |= pic->interlaced << 2;
580
        result.flags |= 1 << 3; // load_intra_quant_mat
581
	result.flags |= 1 << 4; // load_nonintra_quant_mat
582
	result.flags |= pic->quarter_sample << 5;
583
	result.flags |= 1 << 6; // complexity_estimation_disable
584
	result.flags |= pic->resync_marker_disable << 7;
585
	//result.flags |= data_partitioned << 8;
586
	//result.flags |= reversible_vlc << 9;
587
	result.flags |= 0 << 10; // newpred_enable
588
	result.flags |= 0 << 11; // reduced_resolution_vop_enable
589
	//result.flags |= scalability << 12;
590
	//result.flags |= is_object_layer_identifier << 13;
591
	//result.flags |= fixed_vop_rate << 14;
592
	//result.flags |= newpred_segment_type << 15;
593
 
594
	result.quant_type = pic->quant_type;
595
 
596
	for (i = 0; i < 64; ++i) {
597
		result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
598
		result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
599
	}
600
 
601
	/*
602
	int32_t 	trd [2]
603
	int32_t 	trb [2]
604
	uint8_t 	vop_coding_type
605
	uint8_t 	vop_fcode_forward
606
	uint8_t 	vop_fcode_backward
607
	uint8_t 	rounding_control
608
	uint8_t 	alternate_vertical_scan_flag
609
	uint8_t 	top_field_first
610
	*/
611
 
612
	return result;
613
}
614
 
615
/**
616
 * destroy this video decoder
617
 */
618
static void ruvd_destroy(struct pipe_video_decoder *decoder)
619
{
620
	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
621
	struct ruvd_msg msg;
622
	unsigned i;
623
 
624
	assert(decoder);
625
 
626
	memset(&msg, 0, sizeof(msg));
627
	msg.size = sizeof(msg);
628
	msg.msg_type = RUVD_MSG_DESTROY;
629
	msg.stream_handle = dec->stream_handle;
630
	send_msg(dec, &msg);
631
 
632
	flush(dec);
633
 
634
	dec->ws->cs_destroy(dec->cs);
635
 
636
	for (i = 0; i < NUM_BUFFERS; ++i) {
637
		destroy_buffer(&dec->msg_fb_buffers[i]);
638
		destroy_buffer(&dec->bs_buffers[i]);
639
	}
640
 
641
	destroy_buffer(&dec->dpb);
642
 
643
	FREE(dec);
644
}
645
 
646
/* free associated data in the video buffer callback */
647
static void ruvd_destroy_associated_data(void *data)
648
{
649
	/* NOOP, since we only use an intptr */
650
}
651
 
652
/**
653
 * start decoding of a new frame
654
 */
655
static void ruvd_begin_frame(struct pipe_video_decoder *decoder,
656
			     struct pipe_video_buffer *target,
657
			     struct pipe_picture_desc *picture)
658
{
659
	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
660
	uintptr_t frame;
661
 
662
	assert(decoder);
663
 
664
	frame = ++dec->frame_number;
665
	vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
666
					    &ruvd_destroy_associated_data);
667
 
668
	dec->bs_size = 0;
669
	dec->bs_ptr = dec->ws->buffer_map(
670
		dec->bs_buffers[dec->cur_buffer].cs_handle,
671
		dec->cs, PIPE_TRANSFER_WRITE);
672
}
673
 
674
/**
675
 * decode a macroblock
676
 */
677
static void ruvd_decode_macroblock(struct pipe_video_decoder *decoder,
678
				   struct pipe_video_buffer *target,
679
				   struct pipe_picture_desc *picture,
680
				   const struct pipe_macroblock *macroblocks,
681
				   unsigned num_macroblocks)
682
{
683
	/* not supported (yet) */
684
	assert(0);
685
}
686
 
687
/**
688
 * decode a bitstream
689
 */
690
static void ruvd_decode_bitstream(struct pipe_video_decoder *decoder,
691
				  struct pipe_video_buffer *target,
692
				  struct pipe_picture_desc *picture,
693
				  unsigned num_buffers,
694
				  const void * const *buffers,
695
				  const unsigned *sizes)
696
{
697
	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
698
	unsigned i;
699
 
700
	assert(decoder);
701
 
702
	if (!dec->bs_ptr)
703
		return;
704
 
705
	for (i = 0; i < num_buffers; ++i) {
706
		struct ruvd_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
707
		unsigned new_size = dec->bs_size + sizes[i];
708
 
709
		if (new_size > buf->buf->size) {
710
			dec->ws->buffer_unmap(buf->cs_handle);
711
			if (!resize_buffer(dec, buf, new_size)) {
712
				RUVD_ERR("Can't resize bitstream buffer!");
713
				return;
714
			}
715
 
716
			dec->bs_ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs,
717
							  PIPE_TRANSFER_WRITE);
718
			if (!dec->bs_ptr)
719
				return;
720
 
721
			dec->bs_ptr += dec->bs_size;
722
		}
723
 
724
		memcpy(dec->bs_ptr, buffers[i], sizes[i]);
725
		dec->bs_size += sizes[i];
726
		dec->bs_ptr += sizes[i];
727
	}
728
}
729
 
730
/**
731
 * end decoding of the current frame
732
 */
733
static void ruvd_end_frame(struct pipe_video_decoder *decoder,
734
			   struct pipe_video_buffer *target,
735
			   struct pipe_picture_desc *picture)
736
{
737
	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
738
	struct radeon_winsys_cs_handle *dt;
739
	struct ruvd_buffer *msg_fb_buf, *bs_buf;
740
	struct ruvd_msg msg;
741
	unsigned bs_size;
742
 
743
	assert(decoder);
744
 
745
	if (!dec->bs_ptr)
746
		return;
747
 
748
	msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
749
	bs_buf = &dec->bs_buffers[dec->cur_buffer];
750
 
751
	bs_size = align(dec->bs_size, 128);
752
	memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
753
	dec->ws->buffer_unmap(bs_buf->cs_handle);
754
 
755
	memset(&msg, 0, sizeof(msg));
756
	msg.size = sizeof(msg);
757
	msg.msg_type = RUVD_MSG_DECODE;
758
	msg.stream_handle = dec->stream_handle;
759
	msg.status_report_feedback_number = dec->frame_number;
760
 
761
	msg.body.decode.stream_type = profile2stream_type(dec->base.profile);
762
	msg.body.decode.decode_flags = 0x1;
763
	msg.body.decode.width_in_samples = dec->base.width;
764
	msg.body.decode.height_in_samples = dec->base.height;
765
 
766
	msg.body.decode.dpb_size = dec->dpb.buf->size;
767
	msg.body.decode.bsd_size = bs_size;
768
 
769
	dt = dec->set_dtb(&msg, (struct vl_video_buffer *)target);
770
 
771
	switch (u_reduce_video_profile(picture->profile)) {
772
	case PIPE_VIDEO_CODEC_MPEG4_AVC:
773
		msg.body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
774
		break;
775
 
776
	case PIPE_VIDEO_CODEC_VC1:
777
		msg.body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
778
		break;
779
 
780
	case PIPE_VIDEO_CODEC_MPEG12:
781
		msg.body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
782
		break;
783
 
784
	case PIPE_VIDEO_CODEC_MPEG4:
785
		msg.body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
786
		break;
787
 
788
	default:
789
		assert(0);
790
		return;
791
	}
792
 
793
	msg.body.decode.db_surf_tile_config = msg.body.decode.dt_surf_tile_config;
794
	msg.body.decode.extension_support = 0x1;
795
 
796
	send_msg(dec, &msg);
797
	send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.cs_handle, 0,
798
		 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
799
	send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->cs_handle,
800
		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
801
	send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
802
		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
803
	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->cs_handle,
804
		 0x1000, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
805
	set_reg(dec, RUVD_ENGINE_CNTL, 1);
806
 
807
	flush(dec);
808
	next_buffer(dec);
809
}
810
 
811
/**
812
 * flush any outstanding command buffers to the hardware
813
 */
814
static void ruvd_flush(struct pipe_video_decoder *decoder)
815
{
816
}
817
 
818
/**
819
 * create and UVD decoder
820
 */
821
struct pipe_video_decoder *ruvd_create_decoder(struct pipe_context *context,
822
					       enum pipe_video_profile profile,
823
					       enum pipe_video_entrypoint entrypoint,
824
					       enum pipe_video_chroma_format chroma_format,
825
					       unsigned width, unsigned height,
826
					       unsigned max_references, bool expect_chunked_decode,
827
					       struct radeon_winsys* ws,
828
					       ruvd_set_dtb set_dtb)
829
{
830
	unsigned dpb_size = calc_dpb_size(profile, width, height, max_references);
831
	struct radeon_info info;
832
	struct ruvd_decoder *dec;
833
	struct ruvd_msg msg;
834
	int i;
835
 
836
	ws->query_info(ws, &info);
837
 
838
	switch(u_reduce_video_profile(profile)) {
839
	case PIPE_VIDEO_CODEC_MPEG12:
840
		if (entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
841
			return vl_create_mpeg12_decoder(context, profile, entrypoint,
842
							chroma_format, width,
843
							height, max_references, expect_chunked_decode);
844
 
845
		/* fall through */
846
	case PIPE_VIDEO_CODEC_MPEG4:
847
	case PIPE_VIDEO_CODEC_MPEG4_AVC:
848
		width = align(width, VL_MACROBLOCK_WIDTH);
849
		height = align(height, VL_MACROBLOCK_HEIGHT);
850
		break;
851
 
852
	default:
853
		break;
854
	}
855
 
856
 
857
	dec = CALLOC_STRUCT(ruvd_decoder);
858
 
859
	if (!dec)
860
		return NULL;
861
 
862
	dec->base.context = context;
863
	dec->base.profile = profile;
864
	dec->base.entrypoint = entrypoint;
865
	dec->base.chroma_format = chroma_format;
866
	dec->base.width = width;
867
	dec->base.height = height;
868
 
869
	dec->base.destroy = ruvd_destroy;
870
	dec->base.begin_frame = ruvd_begin_frame;
871
	dec->base.decode_macroblock = ruvd_decode_macroblock;
872
	dec->base.decode_bitstream = ruvd_decode_bitstream;
873
	dec->base.end_frame = ruvd_end_frame;
874
	dec->base.flush = ruvd_flush;
875
 
876
	dec->set_dtb = set_dtb;
877
	dec->stream_handle = alloc_stream_handle();
878
	dec->ws = ws;
879
	dec->cs = ws->cs_create(ws, RING_UVD, NULL);
880
	if (!dec->cs) {
881
		RUVD_ERR("Can't get command submission context.\n");
882
		goto error;
883
	}
884
 
885
	for (i = 0; i < NUM_BUFFERS; ++i) {
886
		unsigned msg_fb_size = align(sizeof(struct ruvd_msg), 0x1000) + 0x1000;
887
		if (!create_buffer(dec, &dec->msg_fb_buffers[i], msg_fb_size)) {
888
			RUVD_ERR("Can't allocated message buffers.\n");
889
			goto error;
890
		}
891
 
892
		if (!create_buffer(dec, &dec->bs_buffers[i], 4096)) {
893
			RUVD_ERR("Can't allocated bitstream buffers.\n");
894
			goto error;
895
		}
896
 
897
		clear_buffer(dec, &dec->msg_fb_buffers[i]);
898
		clear_buffer(dec, &dec->bs_buffers[i]);
899
	}
900
 
901
	if (!create_buffer(dec, &dec->dpb, dpb_size)) {
902
		RUVD_ERR("Can't allocated dpb.\n");
903
		goto error;
904
	}
905
 
906
	clear_buffer(dec, &dec->dpb);
907
 
908
	memset(&msg, 0, sizeof(msg));
909
	msg.size = sizeof(msg);
910
	msg.msg_type = RUVD_MSG_CREATE;
911
	msg.stream_handle = dec->stream_handle;
912
	msg.body.create.stream_type = profile2stream_type(dec->base.profile);
913
	msg.body.create.width_in_samples = dec->base.width;
914
	msg.body.create.height_in_samples = dec->base.height;
915
	msg.body.create.dpb_size = dec->dpb.buf->size;
916
	send_msg(dec, &msg);
917
	flush(dec);
918
	next_buffer(dec);
919
 
920
	return &dec->base;
921
 
922
error:
923
	if (dec->cs) dec->ws->cs_destroy(dec->cs);
924
 
925
	for (i = 0; i < NUM_BUFFERS; ++i) {
926
		destroy_buffer(&dec->msg_fb_buffers[i]);
927
		destroy_buffer(&dec->bs_buffers[i]);
928
	}
929
 
930
	destroy_buffer(&dec->dpb);
931
 
932
	FREE(dec);
933
 
934
	return NULL;
935
}
936
 
937
/**
938
 * join surfaces into the same buffer with identical tiling params
939
 * sumup their sizes and replace the backend buffers with a single bo
940
 */
941
void ruvd_join_surfaces(struct radeon_winsys* ws, unsigned bind,
942
			struct pb_buffer** buffers[VL_NUM_COMPONENTS],
943
			struct radeon_surface *surfaces[VL_NUM_COMPONENTS])
944
{
945
	unsigned best_tiling, best_wh, off;
946
	unsigned size, alignment;
947
	struct pb_buffer *pb;
948
	unsigned i, j;
949
 
950
	for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
951
		unsigned wh;
952
 
953
		if (!surfaces[i])
954
			continue;
955
 
956
		/* choose the smallest bank w/h for now */
957
		wh = surfaces[i]->bankw * surfaces[i]->bankh;
958
		if (wh < best_wh) {
959
			best_wh = wh;
960
			best_tiling = i;
961
		}
962
	}
963
 
964
	for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
965
		if (!surfaces[i])
966
			continue;
967
 
968
		/* copy the tiling parameters */
969
		surfaces[i]->bankw = surfaces[best_tiling]->bankw;
970
		surfaces[i]->bankh = surfaces[best_tiling]->bankh;
971
		surfaces[i]->mtilea = surfaces[best_tiling]->mtilea;
972
		surfaces[i]->tile_split = surfaces[best_tiling]->tile_split;
973
 
974
		/* adjust the texture layer offsets */
975
		off = align(off, surfaces[i]->bo_alignment);
976
		for (j = 0; j < Elements(surfaces[i]->level); ++j)
977
			surfaces[i]->level[j].offset += off;
978
		off += surfaces[i]->bo_size;
979
	}
980
 
981
	for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
982
		if (!buffers[i] || !*buffers[i])
983
			continue;
984
 
985
		size = align(size, (*buffers[i])->alignment);
986
		size += (*buffers[i])->size;
987
		alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
988
	}
989
 
990
	if (!size)
991
		return;
992
 
993
	/* TODO: 2D tiling workaround */
994
	alignment *= 2;
995
 
996
	pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM);
997
	if (!pb)
998
		return;
999
 
1000
	for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
1001
		if (!buffers[i] || !*buffers[i])
1002
			continue;
1003
 
1004
		pb_reference(buffers[i], pb);
1005
	}
1006
 
1007
	pb_reference(&pb, NULL);
1008
}
1009
 
1010
/* calculate top/bottom offset */
1011
static unsigned texture_offset(struct radeon_surface *surface, unsigned layer)
1012
{
1013
	return surface->level[0].offset +
1014
		layer * surface->level[0].slice_size;
1015
}
1016
 
1017
/* hw encode the aspect of macro tiles */
1018
static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
1019
{
1020
	switch (macro_tile_aspect) {
1021
	default:
1022
	case 1: macro_tile_aspect = 0;  break;
1023
	case 2: macro_tile_aspect = 1;  break;
1024
	case 4: macro_tile_aspect = 2;  break;
1025
	case 8: macro_tile_aspect = 3;  break;
1026
	}
1027
	return macro_tile_aspect;
1028
}
1029
 
1030
/* hw encode the bank width and height */
1031
static unsigned bank_wh(unsigned bankwh)
1032
{
1033
	switch (bankwh) {
1034
	default:
1035
	case 1: bankwh = 0;     break;
1036
	case 2: bankwh = 1;     break;
1037
	case 4: bankwh = 2;     break;
1038
	case 8: bankwh = 3;     break;
1039
	}
1040
	return bankwh;
1041
}
1042
 
1043
/**
1044
 * fill decoding target field from the luma and chroma surfaces
1045
 */
1046
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma,
1047
			  struct radeon_surface *chroma)
1048
{
1049
	msg->body.decode.dt_pitch = luma->level[0].pitch_bytes;
1050
	switch (luma->level[0].mode) {
1051
	case RADEON_SURF_MODE_LINEAR_ALIGNED:
1052
		msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
1053
		msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
1054
		break;
1055
	case RADEON_SURF_MODE_1D:
1056
		msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1057
		msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
1058
		break;
1059
	case RADEON_SURF_MODE_2D:
1060
		msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1061
		msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
1062
		break;
1063
	default:
1064
		assert(0);
1065
		break;
1066
	}
1067
 
1068
	msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
1069
	msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
1070
	if (msg->body.decode.dt_field_mode) {
1071
		msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
1072
		msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
1073
	} else {
1074
		msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
1075
		msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
1076
	}
1077
 
1078
	assert(luma->bankw == chroma->bankw);
1079
	assert(luma->bankh == chroma->bankh);
1080
	assert(luma->mtilea == chroma->mtilea);
1081
 
1082
	msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw));
1083
	msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh));
1084
	msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea));
1085
}
1086
 
1087
int ruvd_get_video_param(struct pipe_screen *screen,
1088
			 enum pipe_video_profile profile,
1089
			 enum pipe_video_cap param)
1090
{
1091
	switch (param) {
1092
	case PIPE_VIDEO_CAP_SUPPORTED:
1093
		switch (u_reduce_video_profile(profile)) {
1094
		case PIPE_VIDEO_CODEC_MPEG12:
1095
		case PIPE_VIDEO_CODEC_MPEG4:
1096
		case PIPE_VIDEO_CODEC_MPEG4_AVC:
1097
		case PIPE_VIDEO_CODEC_VC1:
1098
			return true;
1099
		default:
1100
			return false;
1101
		}
1102
	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
1103
		return 1;
1104
	case PIPE_VIDEO_CAP_MAX_WIDTH:
1105
		return 2048;
1106
	case PIPE_VIDEO_CAP_MAX_HEIGHT:
1107
		return 1152;
1108
	case PIPE_VIDEO_CAP_PREFERED_FORMAT:
1109
		return PIPE_FORMAT_NV12;
1110
	case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
1111
		return true;
1112
	case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
1113
		return true;
1114
	case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
1115
		return true;
1116
	default:
1117
		return 0;
1118
	}
1119
}
1120
 
1121
boolean ruvd_is_format_supported(struct pipe_screen *screen,
1122
				 enum pipe_format format,
1123
				 enum pipe_video_profile profile)
1124
{
1125
	/* we can only handle this one anyway */
1126
	return format == PIPE_FORMAT_NV12;
1127
}