Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2013 Ilia Mirkin
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "nv84_video.h"
  24.  
  25. struct iparm {
  26.    struct iseqparm {
  27.       uint32_t chroma_format_idc; // 00
  28.       uint32_t pad[(0x128 - 0x4) / 4];
  29.       uint32_t log2_max_frame_num_minus4; // 128
  30.       uint32_t pic_order_cnt_type; // 12c
  31.       uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130
  32.       uint32_t delta_pic_order_always_zero_flag; // 134
  33.       uint32_t num_ref_frames; // 138
  34.       uint32_t pic_width_in_mbs_minus1; // 13c
  35.       uint32_t pic_height_in_map_units_minus1; // 140
  36.       uint32_t frame_mbs_only_flag; // 144
  37.       uint32_t mb_adaptive_frame_field_flag; // 148
  38.       uint32_t direct_8x8_inference_flag; // 14c
  39.    } iseqparm; // 000
  40.    struct ipicparm {
  41.       uint32_t entropy_coding_mode_flag; // 00
  42.       uint32_t pic_order_present_flag; // 04
  43.       uint32_t num_slice_groups_minus1; // 08
  44.       uint32_t slice_group_map_type; // 0c
  45.       uint32_t pad1[0x60 / 4];
  46.       uint32_t u70; // 70
  47.       uint32_t u74; // 74
  48.       uint32_t u78; // 78
  49.       uint32_t num_ref_idx_l0_active_minus1; // 7c
  50.       uint32_t num_ref_idx_l1_active_minus1; // 80
  51.       uint32_t weighted_pred_flag; // 84
  52.       uint32_t weighted_bipred_idc; // 88
  53.       uint32_t pic_init_qp_minus26; // 8c
  54.       uint32_t chroma_qp_index_offset; // 90
  55.       uint32_t deblocking_filter_control_present_flag; // 94
  56.       uint32_t constrained_intra_pred_flag; // 98
  57.       uint32_t redundant_pic_cnt_present_flag; // 9c
  58.       uint32_t transform_8x8_mode_flag; // a0
  59.       uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4];
  60.       uint32_t second_chroma_qp_index_offset; // 1c8
  61.       uint32_t u1cc; // 1cc
  62.       uint32_t curr_pic_order_cnt; // 1d0
  63.       uint32_t field_order_cnt[2]; // 1d4
  64.       uint32_t curr_mvidx; // 1dc
  65.       struct iref {
  66.          uint32_t u00; // 00
  67.          uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom
  68.          uint8_t is_long_term; // 08
  69.          uint8_t non_existing; // 09
  70.          uint32_t frame_idx; // 0c
  71.          uint32_t field_order_cnt[2]; // 10
  72.          uint32_t mvidx; // 18
  73.          uint8_t field_pic_flag; // 1c
  74.          // 20
  75.       } refs[0x10]; // 1e0
  76.    } ipicparm; // 150
  77. };
  78.  
  79. int
  80. nv84_decoder_bsp(struct nv84_decoder *dec,
  81.                  struct pipe_h264_picture_desc *desc,
  82.                  unsigned num_buffers,
  83.                  const void *const *data,
  84.                  const unsigned *num_bytes,
  85.                  struct nv84_video_buffer *dest)
  86. {
  87.    struct iparm params;
  88.    uint32_t more_params[0x44 / 4] = {0};
  89.    unsigned total_bytes = 0;
  90.    int i;
  91.    static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0};
  92.    char indexes[17] = {0};
  93.    struct nouveau_pushbuf *push = dec->bsp_pushbuf;
  94.    struct nouveau_pushbuf_refn bo_refs[] = {
  95.       { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
  96.       { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
  97.       { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
  98.       { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
  99.    };
  100.  
  101.    nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client);
  102.  
  103.    STATIC_ASSERT(sizeof(struct iparm) == 0x530);
  104.  
  105.    memset(&params, 0, sizeof(params));
  106.  
  107.    dest->frame_num = dest->frame_num_max = desc->frame_num;
  108.  
  109.    for (i = 0; i < 16; i++) {
  110.       struct iref *ref = &params.ipicparm.refs[i];
  111.       struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i];
  112.       if (!frame) break;
  113.       /* The frame index is relative to the last IDR frame. So once the frame
  114.        * num goes back to 0, previous reference frames need to have a negative
  115.        * index.
  116.        */
  117.       if (desc->frame_num >= frame->frame_num_max) {
  118.          frame->frame_num_max = desc->frame_num;
  119.       } else {
  120.          frame->frame_num -= frame->frame_num_max + 1;
  121.          frame->frame_num_max = desc->frame_num;
  122.       }
  123.       ref->non_existing = 0;
  124.       ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) |
  125.          (desc->bottom_is_reference[i] ? 2 : 0);
  126.       ref->is_long_term = desc->is_long_term[i];
  127.       ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0];
  128.       ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1];
  129.       ref->frame_idx = frame->frame_num;
  130.       ref->u00 = ref->mvidx = frame->mvidx;
  131.       ref->field_pic_flag = desc->field_pic_flag;
  132.       indexes[frame->mvidx] = 1;
  133.    }
  134.  
  135.    /* Needs to be adjusted if we ever support non-4:2:0 videos */
  136.    params.iseqparm.chroma_format_idc = 1;
  137.  
  138.    params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1;
  139.    if (desc->field_pic_flag || desc->mb_adaptive_frame_field_flag)
  140.       params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1;
  141.    else
  142.       params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1;
  143.  
  144.    if (desc->bottom_field_flag)
  145.       params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1];
  146.    else
  147.       params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0];
  148.    params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0];
  149.    params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1];
  150.    if (desc->is_reference) {
  151.       if (dest->mvidx < 0) {
  152.          for (i = 0; i < desc->num_ref_frames + 1; i++) {
  153.             if (!indexes[i]) {
  154.                dest->mvidx = i;
  155.                break;
  156.             }
  157.          }
  158.          assert(i != desc->num_ref_frames + 1);
  159.       }
  160.  
  161.       params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx;
  162.    }
  163.  
  164.    params.iseqparm.num_ref_frames = desc->num_ref_frames;
  165.    params.iseqparm.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
  166.    params.ipicparm.constrained_intra_pred_flag = desc->constrained_intra_pred_flag;
  167.    params.ipicparm.weighted_pred_flag = desc->weighted_pred_flag;
  168.    params.ipicparm.weighted_bipred_idc = desc->weighted_bipred_idc;
  169.    params.iseqparm.frame_mbs_only_flag = desc->frame_mbs_only_flag;
  170.    params.ipicparm.transform_8x8_mode_flag = desc->transform_8x8_mode_flag;
  171.    params.ipicparm.chroma_qp_index_offset = desc->chroma_qp_index_offset;
  172.    params.ipicparm.second_chroma_qp_index_offset = desc->second_chroma_qp_index_offset;
  173.    params.ipicparm.pic_init_qp_minus26 = desc->pic_init_qp_minus26;
  174.    params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1;
  175.    params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1;
  176.    params.iseqparm.log2_max_frame_num_minus4 = desc->log2_max_frame_num_minus4;
  177.    params.iseqparm.pic_order_cnt_type = desc->pic_order_cnt_type;
  178.    params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->log2_max_pic_order_cnt_lsb_minus4;
  179.    params.iseqparm.delta_pic_order_always_zero_flag = desc->delta_pic_order_always_zero_flag;
  180.    params.iseqparm.direct_8x8_inference_flag = desc->direct_8x8_inference_flag;
  181.    params.ipicparm.entropy_coding_mode_flag = desc->entropy_coding_mode_flag;
  182.    params.ipicparm.pic_order_present_flag = desc->pic_order_present_flag;
  183.    params.ipicparm.deblocking_filter_control_present_flag = desc->deblocking_filter_control_present_flag;
  184.    params.ipicparm.redundant_pic_cnt_present_flag = desc->redundant_pic_cnt_present_flag;
  185.  
  186.    memcpy(dec->bitstream->map, &params, sizeof(params));
  187.    for (i = 0; i < num_buffers; i++) {
  188.       assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700);
  189.       memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]);
  190.       total_bytes += num_bytes[i];
  191.    }
  192.    memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end));
  193.    total_bytes += sizeof(end);
  194.    more_params[1] = total_bytes;
  195.    memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params));
  196.  
  197.    PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2);
  198.    nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0]));
  199.  
  200.    /* Wait for the fence = 1 */
  201.    BEGIN_NV04(push, SUBC_BSP(0x10), 4);
  202.    PUSH_DATAh(push, dec->fence->offset);
  203.    PUSH_DATA (push, dec->fence->offset);
  204.    PUSH_DATA (push, 1);
  205.    PUSH_DATA (push, 1);
  206.  
  207.    /* TODO: Use both halves of bitstream/vpring for alternating frames */
  208.  
  209.    /* Kick off the BSP */
  210.    BEGIN_NV04(push, SUBC_BSP(0x400), 20);
  211.    PUSH_DATA (push, dec->bitstream->offset >> 8);
  212.    PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7);
  213.    PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
  214.    PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6);
  215.    PUSH_DATA (push, 1);
  216.    PUSH_DATA (push, dec->mbring->offset >> 8);
  217.    PUSH_DATA (push, dec->frame_size);
  218.    PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8);
  219.    PUSH_DATA (push, dec->vpring->offset >> 8);
  220.    PUSH_DATA (push, dec->vpring->size / 2);
  221.    PUSH_DATA (push, dec->vpring_residual);
  222.    PUSH_DATA (push, dec->vpring_ctrl);
  223.    PUSH_DATA (push, 0);
  224.    PUSH_DATA (push, dec->vpring_residual);
  225.    PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl);
  226.    PUSH_DATA (push, dec->vpring_deblock);
  227.    PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
  228.                      dec->vpring_residual + dec->vpring_deblock) >> 8);
  229.    PUSH_DATA (push, 0x654321);
  230.    PUSH_DATA (push, 0);
  231.    PUSH_DATA (push, 0x100008);
  232.  
  233.    BEGIN_NV04(push, SUBC_BSP(0x620), 2);
  234.    PUSH_DATA (push, 0);
  235.    PUSH_DATA (push, 0);
  236.  
  237.    BEGIN_NV04(push, SUBC_BSP(0x300), 1);
  238.    PUSH_DATA (push, 0);
  239.  
  240.    /* Write fence = 2, intr */
  241.    BEGIN_NV04(push, SUBC_BSP(0x610), 3);
  242.    PUSH_DATAh(push, dec->fence->offset);
  243.    PUSH_DATA (push, dec->fence->offset);
  244.    PUSH_DATA (push, 2);
  245.  
  246.    BEGIN_NV04(push, SUBC_BSP(0x304), 1);
  247.    PUSH_DATA (push, 0x101);
  248.    PUSH_KICK (push);
  249.    return 0;
  250. }
  251.