Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011-2013 Maarten Lankhorst
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "nvc0_video.h"
  24. #include <sys/mman.h>
  25.  
  26. struct mpeg12_picparm_vp {
  27.         uint16_t width; // 00 in mb units
  28.         uint16_t height; // 02 in mb units
  29.  
  30.         uint32_t unk04; // 04 stride for Y?
  31.         uint32_t unk08; // 08 stride for CbCr?
  32.  
  33.         uint32_t ofs[6]; // 1c..20 ofs
  34.         uint32_t bucket_size; // 24
  35.         uint32_t inter_ring_data_size; // 28
  36.         uint16_t unk2c; // 2c
  37.         uint16_t alternate_scan; // 2e
  38.         uint16_t unk30; // 30 not seen set yet
  39.         uint16_t picture_structure; // 32
  40.         uint16_t pad2[3];
  41.         uint16_t unk3a; // 3a set on I frame?
  42.  
  43.         uint32_t f_code[4]; // 3c
  44.         uint32_t picture_coding_type; // 4c
  45.         uint32_t intra_dc_precision; // 50
  46.         uint32_t q_scale_type; // 54
  47.         uint32_t top_field_first; // 58
  48.         uint32_t full_pel_forward_vector; // 5c
  49.         uint32_t full_pel_backward_vector; // 60
  50.         uint8_t intra_quantizer_matrix[0x40]; // 64
  51.         uint8_t non_intra_quantizer_matrix[0x40]; // a4
  52. };
  53.  
  54. struct mpeg4_picparm_vp {
  55.         uint32_t width; // 00 in normal units
  56.         uint32_t height; // 04 in normal units
  57.         uint32_t unk08; // stride 1
  58.         uint32_t unk0c; // stride 2
  59.         uint32_t ofs[6]; // 10..24 ofs
  60.         uint32_t bucket_size; // 28
  61.         uint32_t pad1; // 2c, pad
  62.         uint32_t pad2; // 30
  63.         uint32_t inter_ring_data_size; // 34
  64.  
  65.         uint32_t trd[2]; // 38, 3c
  66.         uint32_t trb[2]; // 40, 44
  67.         uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
  68.         uint16_t f_code_fw; // 4c
  69.         uint16_t f_code_bw; // 4e
  70.         uint8_t interlaced; // 50
  71.  
  72.         uint8_t quant_type; // bool, written to 528
  73.         uint8_t quarter_sample; // bool, written to 548
  74.         uint8_t short_video_header; // bool, negated written to 528 shifted by 1
  75.         uint8_t u54; // bool, written to 0x740
  76.         uint8_t vop_coding_type; // 55
  77.         uint8_t rounding_control; // 56
  78.         uint8_t alternate_vertical_scan_flag; // 57 bool
  79.         uint8_t top_field_first; // bool, written to vuc
  80.  
  81.         uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
  82.         uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY?
  83.  
  84.         uint32_t intra[0x10]; // 9c
  85.         uint32_t non_intra[0x10]; // bc
  86.         // udc..uff pad?
  87. };
  88.  
  89. // Full version, with data pumped from BSP
  90. struct vc1_picparm_vp {
  91.         uint32_t bucket_size; // 00
  92.         uint32_t pad; // 04
  93.  
  94.         uint32_t inter_ring_data_size; // 08
  95.         uint32_t unk0c; // stride 1
  96.         uint32_t unk10; // stride 2
  97.         uint32_t ofs[6]; // 14..28 ofs
  98.  
  99.         uint16_t width; // 2c
  100.         uint16_t height; // 2e
  101.  
  102.         uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
  103.         uint8_t loopfilter; // 31 written into vuc
  104.         uint8_t fastuvmc; // 32, written into vuc
  105.         uint8_t dquant; // 33
  106.  
  107.         uint8_t overlap; // 34
  108.         uint8_t quantizer; // 35
  109.         uint8_t u36; // 36, bool
  110.         uint8_t pad2; // 37, to align to 0x38
  111. };
  112.  
  113. struct h264_picparm_vp { // 700..a00
  114.         uint16_t width, height;
  115.         uint32_t stride1, stride2; // 04 08
  116.         uint32_t ofs[6]; // 0c..24 in-image offset
  117.  
  118.         uint32_t u24; // nfi ac8 ?
  119.         uint32_t bucket_size; // 28 bucket size
  120.         uint32_t inter_ring_data_size; // 2c
  121.  
  122.         unsigned f0 : 1; // 0 0x01: into 640 shifted by 3, 540 shifted by 5, half size something?
  123.         unsigned f1 : 1; // 1 0x02: into vuc ofs 56
  124.         unsigned weighted_pred_flag : 1; // 2 0x04
  125.         unsigned f3 : 1; // 3 0x08: into vuc ofs 68
  126.         unsigned is_reference : 1; // 4
  127.         unsigned interlace : 1; // 5 field_pic_flag
  128.         unsigned bottom_field_flag : 1; // 6
  129.         unsigned f7 : 1; // 7 0x80: nfi yet
  130.  
  131.         signed log2_max_frame_num_minus4 : 4; // 31 0..3
  132.         unsigned u31_45 : 2; // 31 4..5
  133.         unsigned pic_order_cnt_type : 2; // 31 6..7
  134.         signed pic_init_qp_minus26 : 6; // 32 0..5
  135.         signed chroma_qp_index_offset : 5; // 32 6..10
  136.         signed second_chroma_qp_index_offset : 5; // 32 11..15
  137.  
  138.         unsigned weighted_bipred_idc : 2; // 34 0..1
  139.         unsigned fifo_dec_index : 7; // 34 2..8
  140.         unsigned tmp_idx : 5; // 34 9..13
  141.         unsigned frame_number : 16; // 34 14..29
  142.         unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
  143.         unsigned u34_3131 : 1; // 34 31..31 pad?
  144.  
  145.         uint32_t field_order_cnt[2]; // 38, 3c
  146.  
  147.         struct { // 40
  148.                 // 0x00223102
  149.                 // nfi (needs: top_is_reference, bottom_is_reference, is_long_term, maybe some other state that was saved..
  150.                 unsigned fifo_idx : 7; // 00 0..6
  151.                 unsigned tmp_idx : 5; // 00 7..11
  152.                 unsigned unk12 : 1; // 00 12 not seen yet, but set, maybe top_is_reference
  153.                 unsigned unk13 : 1; // 00 13 not seen yet, but set, maybe bottom_is_reference?
  154.                 unsigned unk14 : 1; // 00 14 skipped?
  155.                 unsigned notseenyet : 1; // 00 15 pad?
  156.                 unsigned unk16 : 1; // 00 16
  157.                 unsigned unk17 : 4; // 00 17..20
  158.                 unsigned unk21 : 4; // 00 21..24
  159.                 unsigned pad : 7; // 00 d25..31
  160.  
  161.                 uint32_t field_order_cnt[2]; // 04,08
  162.                 uint32_t frame_idx; // 0c
  163.         } refs[0x10];
  164.  
  165.         uint8_t m4x4[6][16]; // 140
  166.         uint8_t m8x8[2][64]; // 1a0
  167.         uint32_t u220; // 220 number of extra reorder_list to append?
  168.         uint8_t u224[0x20]; // 224..244 reorder_list append ?
  169.         uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
  170. };
  171.  
  172. static void
  173. nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffer *refs[16], unsigned seq, struct nvc0_video_buffer *target)
  174. {
  175.    unsigned h264 = u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG4_AVC;
  176.    unsigned i, idx, empty_spot = dec->base.max_references + 1;
  177.    for (i = 0; i < dec->base.max_references; ++i) {
  178.       if (!refs[i])
  179.          continue;
  180.  
  181.       idx = refs[i]->valid_ref;
  182.       //debug_printf("ref[%i] %p in slot %i\n", i, refs[i], idx);
  183.       assert(target != refs[i] ||
  184.              (h264 && empty_spot &&
  185.               (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top)));
  186.       if (target == refs[i])
  187.          empty_spot = 0;
  188.  
  189.       if (dec->refs[idx].vidbuf != refs[i]) {
  190.          debug_printf("%p is not a real ref\n", refs[i]);
  191.          // FIXME: Maybe do m2mf copy here if a application really depends on it?
  192.          continue;
  193.       }
  194.  
  195.       assert(dec->refs[idx].vidbuf == refs[i]);
  196.       dec->refs[idx].last_used = seq;
  197.    }
  198.    if (!empty_spot)
  199.       return;
  200.  
  201.    /* Try to find a real empty spot first, there should be one..
  202.     */
  203.    for (i = 0; i < dec->base.max_references + 1; ++i) {
  204.       if (dec->refs[i].last_used < seq) {
  205.          if (!dec->refs[i].vidbuf) {
  206.             empty_spot = i;
  207.             break;
  208.          }
  209.          if (empty_spot < dec->base.max_references+1 &&
  210.              dec->refs[empty_spot].last_used < dec->refs[i].last_used)
  211.             continue;
  212.          empty_spot = i;
  213.       }
  214.    }
  215.    assert(empty_spot < dec->base.max_references+1);
  216.    dec->refs[empty_spot].last_used = seq;
  217. //   debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, i);
  218.    dec->refs[empty_spot].vidbuf = target;
  219.    dec->refs[empty_spot].decoded_bottom = dec->refs[empty_spot].decoded_top = 0;
  220.    target->valid_ref = empty_spot;
  221. }
  222.  
  223. static void
  224. nvc0_decoder_kick_ref(struct nvc0_decoder *dec, struct nvc0_video_buffer *target)
  225. {
  226.    dec->refs[target->valid_ref].vidbuf = NULL;
  227.    dec->refs[target->valid_ref].last_used = 0;
  228. //   debug_printf("Unreffed %p\n", target);
  229. }
  230.  
  231. static uint32_t
  232. nvc0_decoder_fill_picparm_mpeg12_vp(struct nvc0_decoder *dec,
  233.                                     struct pipe_mpeg12_picture_desc *desc,
  234.                                     struct nvc0_video_buffer *refs[16],
  235.                                     unsigned *is_ref,
  236.                                     char *map)
  237. {
  238.    struct mpeg12_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
  239.    uint32_t i, ret = 0x01010, ring; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
  240.    assert(!(dec->base.width & 0xf));
  241.    *is_ref = desc->picture_coding_type <= 2;
  242.  
  243.    if (dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
  244.       pic_vp->picture_structure = 3;
  245.    else
  246.       pic_vp->picture_structure = desc->picture_structure;
  247.  
  248.    assert(desc->picture_structure != 4);
  249.    if (desc->picture_structure == 4) // Untested, but should work
  250.       ret |= 0x100;
  251.    pic_vp->width = mb(dec->base.width);
  252.    pic_vp->height = mb(dec->base.height);
  253.    pic_vp->unk08 = pic_vp->unk04 = (dec->base.width+0xf)&~0xf; // Stride
  254.  
  255.    nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]);
  256.    pic_vp->ofs[5] = pic_vp->ofs[3];
  257.    pic_vp->ofs[0] = pic_vp->ofs[2] = 0;
  258.    nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size);
  259.  
  260.    pic_vp->alternate_scan = desc->alternate_scan;
  261.    pic_vp->pad2[0] = pic_vp->pad2[1] = pic_vp->pad2[2] = 0;
  262.    pic_vp->unk30 = desc->picture_structure < 3 && (desc->picture_structure == 2 - desc->top_field_first);
  263.    pic_vp->unk3a = (desc->picture_coding_type == 1);
  264.    for (i = 0; i < 4; ++i)
  265.       pic_vp->f_code[i] = desc->f_code[i/2][i%2] + 1; // FU
  266.    pic_vp->picture_coding_type = desc->picture_coding_type;
  267.    pic_vp->intra_dc_precision = desc->intra_dc_precision;
  268.    pic_vp->q_scale_type = desc->q_scale_type;
  269.    pic_vp->top_field_first = desc->top_field_first;
  270.    pic_vp->full_pel_forward_vector = desc->full_pel_forward_vector;
  271.    pic_vp->full_pel_backward_vector = desc->full_pel_backward_vector;
  272.    memcpy(pic_vp->intra_quantizer_matrix, desc->intra_matrix, 0x40);
  273.    memcpy(pic_vp->non_intra_quantizer_matrix, desc->non_intra_matrix, 0x40);
  274.    memcpy(map, pic_vp, sizeof(*pic_vp));
  275.    refs[0] = (struct nvc0_video_buffer *)desc->ref[0];
  276.    refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1];
  277.    return ret | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1);
  278. }
  279.  
  280. static uint32_t
  281. nvc0_decoder_fill_picparm_mpeg4_vp(struct nvc0_decoder *dec,
  282.                                    struct pipe_mpeg4_picture_desc *desc,
  283.                                    struct nvc0_video_buffer *refs[16],
  284.                                    unsigned *is_ref,
  285.                                    char *map)
  286. {
  287.    struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
  288.    uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
  289.    assert(!(dec->base.width & 0xf));
  290.    *is_ref = desc->vop_coding_type <= 1;
  291.  
  292.    pic_vp->width = dec->base.width;
  293.    pic_vp->height = mb(dec->base.height)<<4;
  294.    pic_vp->unk0c = pic_vp->unk08 = mb(dec->base.width)<<4; // Stride
  295.  
  296.    nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]);
  297.    pic_vp->ofs[5] = pic_vp->ofs[3];
  298.    pic_vp->ofs[0] = pic_vp->ofs[2] = 0;
  299.    pic_vp->pad1 = pic_vp->pad2 = 0;
  300.    nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size);
  301.  
  302.    pic_vp->trd[0] = desc->trd[0];
  303.    pic_vp->trd[1] = desc->trd[1];
  304.    pic_vp->trb[0] = desc->trb[0];
  305.    pic_vp->trb[1] = desc->trb[1];
  306.    pic_vp->u48 = 0; // Codec?
  307.    pic_vp->pad1 = pic_vp->pad2 = 0;
  308.    pic_vp->f_code_fw = desc->vop_fcode_forward;
  309.    pic_vp->f_code_bw = desc->vop_fcode_backward;
  310.    pic_vp->interlaced = desc->interlaced;
  311.    pic_vp->quant_type = desc->quant_type;
  312.    pic_vp->quarter_sample = desc->quarter_sample;
  313.    pic_vp->short_video_header = desc->short_video_header;
  314.    pic_vp->u54 = 0;
  315.    pic_vp->vop_coding_type = desc->vop_coding_type;
  316.    pic_vp->rounding_control = desc->rounding_control;
  317.    pic_vp->alternate_vertical_scan_flag = desc->alternate_vertical_scan_flag;
  318.    pic_vp->top_field_first = desc->top_field_first;
  319.  
  320.    memcpy(pic_vp->intra, desc->intra_matrix, 0x40);
  321.    memcpy(pic_vp->non_intra, desc->non_intra_matrix, 0x40);
  322.    memcpy(map, pic_vp, sizeof(*pic_vp));
  323.    refs[0] = (struct nvc0_video_buffer *)desc->ref[0];
  324.    refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1];
  325.    return ret;
  326. }
  327.  
  328. static uint32_t
  329. nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec,
  330.                                   const struct pipe_h264_picture_desc *d,
  331.                                   struct nvc0_video_buffer *refs[16],
  332.                                   unsigned *is_ref,
  333.                                   char *map)
  334. {
  335.    struct h264_picparm_vp stub_h = {}, *h = &stub_h;
  336.    unsigned ring, i, j = 0;
  337.    assert(offsetof(struct h264_picparm_vp, u224) == 0x224);
  338.    *is_ref = d->is_reference;
  339.    dec->last_frame_num = d->frame_num;
  340.  
  341.    h->width = mb(dec->base.width);
  342.    h->height = mb(dec->base.height);
  343.    h->stride1 = h->stride2 = mb(dec->base.width)*16;
  344.    nvc0_decoder_ycbcr_offsets(dec, &h->ofs[1], &h->ofs[3], &h->ofs[4]);
  345.    h->ofs[5] = h->ofs[3];
  346.    h->ofs[0] = h->ofs[2] = 0;
  347.    h->u24 = dec->tmp_stride >> 8;
  348.    assert(h->u24);
  349.    nvc0_decoder_inter_sizes(dec, 1, &ring, &h->bucket_size, &h->inter_ring_data_size);
  350.  
  351.    h->u220 = 0;
  352.    h->f0 = d->mb_adaptive_frame_field_flag;
  353.    h->f1 = d->direct_8x8_inference_flag;
  354.    h->weighted_pred_flag = d->weighted_pred_flag;
  355.    h->f3 = d->constrained_intra_pred_flag;
  356.    h->is_reference = d->is_reference;
  357.    h->interlace = d->field_pic_flag;
  358.    h->bottom_field_flag = d->bottom_field_flag;
  359.    h->f7 = 0; // TODO: figure out when set..
  360.    h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4;
  361.    h->u31_45 = 1;
  362.  
  363.    h->pic_order_cnt_type = d->pic_order_cnt_type;
  364.    h->pic_init_qp_minus26 = d->pic_init_qp_minus26;
  365.    h->chroma_qp_index_offset = d->chroma_qp_index_offset;
  366.    h->second_chroma_qp_index_offset = d->second_chroma_qp_index_offset;
  367.    h->weighted_bipred_idc = d->weighted_bipred_idc;
  368.    h->tmp_idx = 0; // set in h264_vp_refs below
  369.    h->fifo_dec_index = 0; // always set to 0 to be fifo compatible with other codecs
  370.    h->frame_number = d->frame_num;
  371.    h->u34_3030 = h->u34_3131 = 0;
  372.    h->field_order_cnt[0] = d->field_order_cnt[0];
  373.    h->field_order_cnt[1] = d->field_order_cnt[1];
  374.    memset(h->refs, 0, sizeof(h->refs));
  375.    memcpy(h->m4x4, d->scaling_lists_4x4, sizeof(h->m4x4) + sizeof(h->m8x8));
  376.    h->u220 = 0;
  377.    for (i = 0; i < d->num_ref_frames; ++i) {
  378.       if (!d->ref[i])
  379.          break;
  380.       refs[j] = (struct nvc0_video_buffer *)d->ref[i];
  381.       h->refs[j].fifo_idx = j + 1;
  382.       h->refs[j].tmp_idx = refs[j]->valid_ref;
  383.       h->refs[j].field_order_cnt[0] = d->field_order_cnt_list[i][0];
  384.       h->refs[j].field_order_cnt[1] = d->field_order_cnt_list[i][1];
  385.       h->refs[j].frame_idx = d->frame_num_list[i];
  386.       if (!dec->refs[refs[j]->valid_ref].field_pic_flag) {
  387.          h->refs[j].unk12 = d->top_is_reference[i];
  388.          h->refs[j].unk13 = d->bottom_is_reference[i];
  389.       }
  390.       h->refs[j].unk14 = 0;
  391.       h->refs[j].notseenyet = 0;
  392.       h->refs[j].unk16 = dec->refs[refs[j]->valid_ref].field_pic_flag;
  393.       h->refs[j].unk17 = dec->refs[refs[j]->valid_ref].decoded_top &&
  394.                          d->top_is_reference[i];
  395.       h->refs[j].unk21 = dec->refs[refs[j]->valid_ref].decoded_bottom &&
  396.                          d->bottom_is_reference[i];
  397.       h->refs[j].pad = 0;
  398.       assert(!d->is_long_term[i]);
  399.       j++;
  400.    }
  401.    for (; i < 16; ++i)
  402.       assert(!d->ref[i]);
  403.    assert(d->num_ref_frames <= dec->base.max_references);
  404.  
  405.    for (; i < d->num_ref_frames; ++i)
  406.       h->refs[j].unk16 = d->field_pic_flag;
  407.    *(struct h264_picparm_vp *)map = *h;
  408.  
  409.    return 0x1113;
  410. }
  411.  
  412. static void
  413. nvc0_decoder_fill_picparm_h264_vp_refs(struct nvc0_decoder *dec,
  414.                                        struct pipe_h264_picture_desc *d,
  415.                                        struct nvc0_video_buffer *refs[16],
  416.                                        struct nvc0_video_buffer *target,
  417.                                        char *map)
  418. {
  419.    struct h264_picparm_vp *h = (struct h264_picparm_vp *)map;
  420.    assert(dec->refs[target->valid_ref].vidbuf == target);
  421. //    debug_printf("Target: %p\n", target);
  422.  
  423.    h->tmp_idx = target->valid_ref;
  424.    dec->refs[target->valid_ref].field_pic_flag = d->field_pic_flag;
  425.    if (!d->field_pic_flag || d->bottom_field_flag)
  426.       dec->refs[target->valid_ref].decoded_bottom = 1;
  427.    if (!d->field_pic_flag || !d->bottom_field_flag)
  428.       dec->refs[target->valid_ref].decoded_top = 1;
  429. }
  430.  
  431. static uint32_t
  432. nvc0_decoder_fill_picparm_vc1_vp(struct nvc0_decoder *dec,
  433.                                  struct pipe_vc1_picture_desc *d,
  434.                                  struct nvc0_video_buffer *refs[16],
  435.                                  unsigned *is_ref,
  436.                                  char *map)
  437. {
  438.    struct vc1_picparm_vp *vc = (struct vc1_picparm_vp *)map;
  439.    unsigned ring;
  440.    assert(dec->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE);
  441.    *is_ref = d->picture_type <= 1;
  442.  
  443.    nvc0_decoder_ycbcr_offsets(dec, &vc->ofs[1], &vc->ofs[3], &vc->ofs[4]);
  444.    vc->ofs[5] = vc->ofs[3];
  445.    vc->ofs[0] = vc->ofs[2] = 0;
  446.    vc->width = dec->base.width;
  447.    vc->height = mb(dec->base.height)<<4;
  448.    vc->unk0c = vc->unk10 = mb(dec->base.width)<<4; // Stride
  449.    vc->pad = vc->pad2 = 0;
  450.    nvc0_decoder_inter_sizes(dec, 1, &ring, &vc->bucket_size, &vc->inter_ring_data_size);
  451.    vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE;
  452.    vc->loopfilter = d->loopfilter;
  453.    vc->fastuvmc = d->fastuvmc;
  454.    vc->dquant = d->dquant;
  455.    vc->overlap = d->overlap;
  456.    vc->quantizer = d->quantizer;
  457.    vc->u36 = 0; // ? No idea what this one is..
  458.    refs[0] = (struct nvc0_video_buffer *)d->ref[0];
  459.    refs[!!refs[0]] = (struct nvc0_video_buffer *)d->ref[1];
  460.    return 0x12;
  461. }
  462.  
  463. #if NVC0_DEBUG_FENCE
  464. static void dump_comm_vp(struct nvc0_decoder *dec, struct comm *comm, u32 comm_seq,
  465.                          struct nouveau_bo *inter_bo, unsigned slice_size)
  466. {
  467.         unsigned i, idx = comm->pvp_cur_index & 0xf;
  468.         debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
  469. #if 0
  470.         debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
  471.         debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
  472.  
  473.         for (i = 0; i != comm->irq_index; ++i)
  474.                 debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
  475.         for (i = 0; i != comm->parse_endpos_index; ++i)
  476.                 debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
  477. #endif
  478.         debug_printf("mb_y = %u\n", comm->mb_y[idx]);
  479.         if (comm->status_vp[idx] == 1)
  480.                 return;
  481.  
  482.         if ((comm->pvp_stage & 0xff) != 0xff) {
  483.                 unsigned *map;
  484.                 assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
  485.                 map = inter_bo->map;
  486.                 for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
  487.                         debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
  488.                 }
  489.                 munmap(inter_bo->map, inter_bo->size);
  490.                 inter_bo->map = NULL;
  491.         }
  492.         assert((comm->pvp_stage & 0xff) == 0xff);
  493. }
  494. #endif
  495.  
  496. void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, union pipe_desc desc,
  497.                           struct nvc0_video_buffer *target, unsigned comm_seq,
  498.                           unsigned *caps, unsigned *is_ref,
  499.                           struct nvc0_video_buffer *refs[16])
  500. {
  501.    struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
  502.    enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
  503.    char *vp = bsp_bo->map + VP_OFFSET;
  504.  
  505.    switch (codec){
  506.    case PIPE_VIDEO_CODEC_MPEG12:
  507.       *caps = nvc0_decoder_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp);
  508.       nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
  509.       return;
  510.    case PIPE_VIDEO_CODEC_MPEG4:
  511.       *caps = nvc0_decoder_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp);
  512.       nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
  513.       return;
  514.    case PIPE_VIDEO_CODEC_VC1: {
  515.       *caps = nvc0_decoder_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp);
  516.       nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
  517.       return;
  518.    }
  519.    case PIPE_VIDEO_CODEC_MPEG4_AVC: {
  520.       *caps = nvc0_decoder_fill_picparm_h264_vp(dec, desc.h264, refs, is_ref, vp);
  521.       nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
  522.       nvc0_decoder_fill_picparm_h264_vp_refs(dec, desc.h264, refs, target, vp);
  523.       return;
  524.    }
  525.    default: assert(0); return;
  526.    }
  527. }
  528.  
  529. void
  530. nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc,
  531.                 struct nvc0_video_buffer *target, unsigned comm_seq,
  532.                 unsigned caps, unsigned is_ref,
  533.                 struct nvc0_video_buffer *refs[16])
  534. {
  535.    struct nouveau_pushbuf *push = dec->pushbuf[1];
  536.    uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
  537.    uint32_t slice_size, bucket_size, ring_size, i;
  538.    enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
  539.    struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
  540.    struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
  541.    u32 fence_extra = 0, codec_extra = 0;
  542.    struct nouveau_pushbuf_refn bo_refs[] = {
  543.       { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
  544.       { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
  545.       { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
  546. #ifdef NVC0_DEBUG_FENCE
  547.       { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
  548. #endif
  549.       { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
  550.    };
  551.    int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
  552.  
  553. #if NVC0_DEBUG_FENCE
  554.    fence_extra = 4;
  555. #endif
  556.  
  557.    if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) {
  558.       nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
  559.       codec_extra += 2;
  560.    } else
  561.       nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
  562.  
  563.    if (dec->base.max_references > 2)
  564.       codec_extra += 1 + (dec->base.max_references - 2);
  565.  
  566.    pic_addr[16] = nvc0_video_addr(dec, target) >> 8;
  567.    last_addr = null_addr = nvc0_video_addr(dec, NULL) >> 8;
  568.  
  569.    for (i = 0; i < dec->base.max_references; ++i) {
  570.       if (!refs[i])
  571.          pic_addr[i] = last_addr;
  572.       else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
  573.          last_addr = pic_addr[i] = nvc0_video_addr(dec, refs[i]) >> 8;
  574.       else
  575.          pic_addr[i] = null_addr;
  576.    }
  577.    if (!is_ref)
  578.       nvc0_decoder_kick_ref(dec, target);
  579.  
  580.    nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_CODEC_MPEG12) +
  581.               6 + codec_extra + fence_extra + 2, num_refs, 0);
  582.  
  583.    nouveau_pushbuf_refn(push, bo_refs, num_refs);
  584.  
  585.    bsp_addr = bsp_bo->offset >> 8;
  586. #if NVC0_DEBUG_FENCE
  587.    comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
  588. #else
  589.    comm_addr = bsp_addr + (COMM_OFFSET>>8);
  590. #endif
  591.    inter_addr = inter_bo->offset >> 8;
  592.    if (dec->fw_bo)
  593.       ucode_addr = dec->fw_bo->offset >> 8;
  594.    else
  595.       ucode_addr = 0;
  596.  
  597.    BEGIN_NVC0(push, SUBC_VP(0x700), 7);
  598.    PUSH_DATA (push, caps); // 700
  599.    PUSH_DATA (push, comm_seq); // 704
  600.    PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0
  601.    PUSH_DATA (push, dec->fw_sizes); // 70c
  602.    PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
  603.    PUSH_DATA (push, inter_addr); // 714 inter_parm
  604.    PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
  605.  
  606.    if (bucket_size) {
  607.       uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
  608.  
  609.       BEGIN_NVC0(push, SUBC_VP(0x71c), 2);
  610.       PUSH_DATA (push, tmpimg_addr >> 8); // 71c
  611.       PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
  612.    }
  613.  
  614.    BEGIN_NVC0(push, SUBC_VP(0x724), 5);
  615.    PUSH_DATA (push, comm_addr); // 724
  616.    PUSH_DATA (push, ucode_addr); // 728
  617.    PUSH_DATA (push, pic_addr[16]); // 734
  618.    PUSH_DATA (push, pic_addr[0]); // 72c
  619.    PUSH_DATA (push, pic_addr[1]); // 730
  620.  
  621.    if (dec->base.max_references > 2) {
  622.       int i;
  623.  
  624.       BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2);
  625.       for (i = 2; i < dec->base.max_references; ++i) {
  626.          assert(0x400 + (i - 2) * 4 < 0x438);
  627.          PUSH_DATA (push, pic_addr[i]);
  628.       }
  629.    }
  630.  
  631.    if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) {
  632.       BEGIN_NVC0(push, SUBC_VP(0x438), 1);
  633.       PUSH_DATA (push, desc.h264->slice_count);
  634.    }
  635.  
  636.    //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
  637.  
  638. #if NVC0_DEBUG_FENCE
  639.    BEGIN_NVC0(push, SUBC_VP(0x240), 3);
  640.    PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
  641.    PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
  642.    PUSH_DATA (push, dec->fence_seq);
  643.  
  644.    BEGIN_NVC0(push, SUBC_VP(0x300), 1);
  645.    PUSH_DATA (push, 1);
  646.    PUSH_KICK(push);
  647.  
  648.    {
  649.       unsigned spin = 0;
  650.       do {
  651.          usleep(100);
  652.          if ((spin++ & 0xff) == 0xff) {
  653.             debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]);
  654.             dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
  655.          }
  656.       } while (dec->fence_seq > dec->fence_map[4]);
  657.    }
  658.    dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
  659. #else
  660.    BEGIN_NVC0(push, SUBC_VP(0x300), 1);
  661.    PUSH_DATA (push, 0);
  662.    PUSH_KICK (push);
  663. #endif
  664. }
  665.