Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  */
  24. #include "sysdeps.h"
  25. #include <stdio.h>
  26. #include <string.h>
  27. #include <stdlib.h>
  28. #include <getopt.h>
  29. #include <unistd.h>
  30. #include <sys/types.h>
  31. #include <sys/stat.h>
  32. #include <sys/time.h>
  33. #include <sys/mman.h>
  34. #include <fcntl.h>
  35. #include <assert.h>
  36. #include <pthread.h>
  37. #include <errno.h>
  38. #include <math.h>
  39. #include <va/va.h>
  40. #include <va/va_enc_h264.h>
  41. #include "va_display.h"
  42.  
  43. #define CHECK_VASTATUS(va_status,func)                                  \
  44.     if (va_status != VA_STATUS_SUCCESS) {                               \
  45.         fprintf(stderr,"%s:%s (%d) failed,exit\n", __func__, func, __LINE__); \
  46.         exit(1);                                                        \
  47.     }
  48.  
  49. #include "../loadsurface.h"
  50.  
  51. #define NAL_REF_IDC_NONE        0
  52. #define NAL_REF_IDC_LOW         1
  53. #define NAL_REF_IDC_MEDIUM      2
  54. #define NAL_REF_IDC_HIGH        3
  55.  
  56. #define NAL_NON_IDR             1
  57. #define NAL_IDR                 5
  58. #define NAL_SPS                 7
  59. #define NAL_PPS                 8
  60. #define NAL_SEI                 6
  61.  
  62. #define SLICE_TYPE_P            0
  63. #define SLICE_TYPE_B            1
  64. #define SLICE_TYPE_I            2
  65. #define IS_P_SLICE(type) (SLICE_TYPE_P == (type))
  66. #define IS_B_SLICE(type) (SLICE_TYPE_B == (type))
  67. #define IS_I_SLICE(type) (SLICE_TYPE_I == (type))
  68.  
  69.  
  70. #define ENTROPY_MODE_CAVLC      0
  71. #define ENTROPY_MODE_CABAC      1
  72.  
  73. #define PROFILE_IDC_BASELINE    66
  74. #define PROFILE_IDC_MAIN        77
  75. #define PROFILE_IDC_HIGH        100
  76.    
  77. #define BITSTREAM_ALLOCATE_STEPPING     4096
  78.  
  79. #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
  80. #define SURFACE_NUM 16 /* 16 surfaces for reference */
  81. static  VADisplay va_dpy;
  82. static  VAProfile h264_profile = ~0;
  83. static  VAConfigAttrib attrib[VAConfigAttribTypeMax];
  84. static  VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
  85. static  int config_attrib_num = 0, enc_packed_header_idx;
  86. static  VASurfaceID src_surface[SURFACE_NUM];
  87. static  VABufferID  coded_buf[SURFACE_NUM];
  88. static  VASurfaceID ref_surface[SURFACE_NUM];
  89. static  VAConfigID config_id;
  90. static  VAContextID context_id;
  91. static  VAEncSequenceParameterBufferH264 seq_param;
  92. static  VAEncPictureParameterBufferH264 pic_param;
  93. static  VAEncSliceParameterBufferH264 slice_param;
  94. static  VAPictureH264 CurrentCurrPic;
  95. static  VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32];
  96.  
  97. static  unsigned int MaxFrameNum = (2<<16);
  98. static  unsigned int MaxPicOrderCntLsb = (2<<8);
  99. static  unsigned int Log2MaxFrameNum = 16;
  100. static  unsigned int Log2MaxPicOrderCntLsb = 8;
  101.  
  102. static  unsigned int num_ref_frames = 2;
  103. static  unsigned int numShortTerm = 0;
  104. static  int constraint_set_flag = 0;
  105. static  int h264_packedheader = 0; /* support pack header? */
  106. static  int h264_maxref = (1<<16|1);
  107. static  int h264_entropy_mode = 1; /* cabac */
  108.  
  109. static  char *coded_fn = NULL, *srcyuv_fn = NULL, *recyuv_fn = NULL;
  110. static  FILE *coded_fp = NULL, *srcyuv_fp = NULL, *recyuv_fp = NULL;
  111. static  unsigned long long srcyuv_frames = 0;
  112. static  int srcyuv_fourcc = VA_FOURCC_NV12;
  113. static  int calc_psnr = 0;
  114.  
  115. static  int frame_width = 176;
  116. static  int frame_height = 144;
  117. static  int frame_width_mbaligned;
  118. static  int frame_height_mbaligned;
  119. static  int frame_rate = 30;
  120. static  unsigned int frame_count = 60;
  121. static  unsigned int frame_coded = 0;
  122. static  unsigned int frame_bitrate = 0;
  123. static  unsigned int frame_slices = 1;
  124. static  double frame_size = 0;
  125. static  int initial_qp = 26;
  126. static  int minimal_qp = 0;
  127. static  int intra_period = 30;
  128. static  int intra_idr_period = 60;
  129. static  int ip_period = 1;
  130. static  int rc_mode = -1;
  131. static  int rc_default_modes[] = {
  132.     VA_RC_VBR,
  133.     VA_RC_CQP,
  134.     VA_RC_VBR_CONSTRAINED,
  135.     VA_RC_CBR,
  136.     VA_RC_VCM,
  137.     VA_RC_NONE,
  138. };
  139. static  unsigned long long current_frame_encoding = 0;
  140. static  unsigned long long current_frame_display = 0;
  141. static  unsigned long long current_IDR_display = 0;
  142. static  unsigned int current_frame_num = 0;
  143. static  int current_frame_type;
  144. #define current_slot (current_frame_display % SURFACE_NUM)
  145.  
  146. static  int misc_priv_type = 0;
  147. static  int misc_priv_value = 0;
  148.  
  149. #define MIN(a, b) ((a)>(b)?(b):(a))
  150. #define MAX(a, b) ((a)>(b)?(a):(b))
  151.  
  152. /* thread to save coded data/upload source YUV */
  153. struct storage_task_t {
  154.     void *next;
  155.     unsigned long long display_order;
  156.     unsigned long long encode_order;
  157. };
  158. static  struct storage_task_t *storage_task_header = NULL, *storage_task_tail = NULL;
  159. #define SRC_SURFACE_IN_ENCODING 0
  160. #define SRC_SURFACE_IN_STORAGE  1
  161. static  int srcsurface_status[SURFACE_NUM];
  162. static  int encode_syncmode = 0;
  163. static  pthread_mutex_t encode_mutex = PTHREAD_MUTEX_INITIALIZER;
  164. static  pthread_cond_t  encode_cond = PTHREAD_COND_INITIALIZER;
  165. static  pthread_t encode_thread;
  166.    
  167. /* for performance profiling */
  168. static unsigned int UploadPictureTicks=0;
  169. static unsigned int BeginPictureTicks=0;
  170. static unsigned int RenderPictureTicks=0;
  171. static unsigned int EndPictureTicks=0;
  172. static unsigned int SyncPictureTicks=0;
  173. static unsigned int SavePictureTicks=0;
  174. static unsigned int TotalTicks=0;
  175.  
  176. struct __bitstream {
  177.     unsigned int *buffer;
  178.     int bit_offset;
  179.     int max_size_in_dword;
  180. };
  181. typedef struct __bitstream bitstream;
  182.  
  183.  
  184. static unsigned int
  185. va_swap32(unsigned int val)
  186. {
  187.     unsigned char *pval = (unsigned char *)&val;
  188.  
  189.     return ((pval[0] << 24)     |
  190.             (pval[1] << 16)     |
  191.             (pval[2] << 8)      |
  192.             (pval[3] << 0));
  193. }
  194.  
  195. static void
  196. bitstream_start(bitstream *bs)
  197. {
  198.     bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING;
  199.     bs->buffer = calloc(bs->max_size_in_dword * sizeof(int), 1);
  200.     bs->bit_offset = 0;
  201. }
  202.  
  203. static void
  204. bitstream_end(bitstream *bs)
  205. {
  206.     int pos = (bs->bit_offset >> 5);
  207.     int bit_offset = (bs->bit_offset & 0x1f);
  208.     int bit_left = 32 - bit_offset;
  209.  
  210.     if (bit_offset) {
  211.         bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left));
  212.     }
  213. }
  214.  
  215. static void
  216. bitstream_put_ui(bitstream *bs, unsigned int val, int size_in_bits)
  217. {
  218.     int pos = (bs->bit_offset >> 5);
  219.     int bit_offset = (bs->bit_offset & 0x1f);
  220.     int bit_left = 32 - bit_offset;
  221.  
  222.     if (!size_in_bits)
  223.         return;
  224.  
  225.     bs->bit_offset += size_in_bits;
  226.  
  227.     if (bit_left > size_in_bits) {
  228.         bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val);
  229.     } else {
  230.         size_in_bits -= bit_left;
  231.         bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
  232.         bs->buffer[pos] = va_swap32(bs->buffer[pos]);
  233.  
  234.         if (pos + 1 == bs->max_size_in_dword) {
  235.             bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING;
  236.             bs->buffer = realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int));
  237.         }
  238.  
  239.         bs->buffer[pos + 1] = val;
  240.     }
  241. }
  242.  
  243. static void
  244. bitstream_put_ue(bitstream *bs, unsigned int val)
  245. {
  246.     int size_in_bits = 0;
  247.     int tmp_val = ++val;
  248.  
  249.     while (tmp_val) {
  250.         tmp_val >>= 1;
  251.         size_in_bits++;
  252.     }
  253.  
  254.     bitstream_put_ui(bs, 0, size_in_bits - 1); // leading zero
  255.     bitstream_put_ui(bs, val, size_in_bits);
  256. }
  257.  
  258. static void
  259. bitstream_put_se(bitstream *bs, int val)
  260. {
  261.     unsigned int new_val;
  262.  
  263.     if (val <= 0)
  264.         new_val = -2 * val;
  265.     else
  266.         new_val = 2 * val - 1;
  267.  
  268.     bitstream_put_ue(bs, new_val);
  269. }
  270.  
  271. static void
  272. bitstream_byte_aligning(bitstream *bs, int bit)
  273. {
  274.     int bit_offset = (bs->bit_offset & 0x7);
  275.     int bit_left = 8 - bit_offset;
  276.     int new_val;
  277.  
  278.     if (!bit_offset)
  279.         return;
  280.  
  281.     assert(bit == 0 || bit == 1);
  282.  
  283.     if (bit)
  284.         new_val = (1 << bit_left) - 1;
  285.     else
  286.         new_val = 0;
  287.  
  288.     bitstream_put_ui(bs, new_val, bit_left);
  289. }
  290.  
  291. static void
  292. rbsp_trailing_bits(bitstream *bs)
  293. {
  294.     bitstream_put_ui(bs, 1, 1);
  295.     bitstream_byte_aligning(bs, 0);
  296. }
  297.  
  298. static void nal_start_code_prefix(bitstream *bs)
  299. {
  300.     bitstream_put_ui(bs, 0x00000001, 32);
  301. }
  302.  
  303. static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type)
  304. {
  305.     bitstream_put_ui(bs, 0, 1);                /* forbidden_zero_bit: 0 */
  306.     bitstream_put_ui(bs, nal_ref_idc, 2);
  307.     bitstream_put_ui(bs, nal_unit_type, 5);
  308. }
  309.  
  310. static void sps_rbsp(bitstream *bs)
  311. {
  312.     int profile_idc = PROFILE_IDC_BASELINE;
  313.  
  314.     if (h264_profile  == VAProfileH264High)
  315.         profile_idc = PROFILE_IDC_HIGH;
  316.     else if (h264_profile  == VAProfileH264Main)
  317.         profile_idc = PROFILE_IDC_MAIN;
  318.  
  319.     bitstream_put_ui(bs, profile_idc, 8);               /* profile_idc */
  320.     bitstream_put_ui(bs, !!(constraint_set_flag & 1), 1);                         /* constraint_set0_flag */
  321.     bitstream_put_ui(bs, !!(constraint_set_flag & 2), 1);                         /* constraint_set1_flag */
  322.     bitstream_put_ui(bs, !!(constraint_set_flag & 4), 1);                         /* constraint_set2_flag */
  323.     bitstream_put_ui(bs, !!(constraint_set_flag & 8), 1);                         /* constraint_set3_flag */
  324.     bitstream_put_ui(bs, 0, 4);                         /* reserved_zero_4bits */
  325.     bitstream_put_ui(bs, seq_param.level_idc, 8);      /* level_idc */
  326.     bitstream_put_ue(bs, seq_param.seq_parameter_set_id);      /* seq_parameter_set_id */
  327.  
  328.     if ( profile_idc == PROFILE_IDC_HIGH) {
  329.         bitstream_put_ue(bs, 1);        /* chroma_format_idc = 1, 4:2:0 */
  330.         bitstream_put_ue(bs, 0);        /* bit_depth_luma_minus8 */
  331.         bitstream_put_ue(bs, 0);        /* bit_depth_chroma_minus8 */
  332.         bitstream_put_ui(bs, 0, 1);     /* qpprime_y_zero_transform_bypass_flag */
  333.         bitstream_put_ui(bs, 0, 1);     /* seq_scaling_matrix_present_flag */
  334.     }
  335.  
  336.     bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_frame_num_minus4); /* log2_max_frame_num_minus4 */
  337.     bitstream_put_ue(bs, seq_param.seq_fields.bits.pic_order_cnt_type);        /* pic_order_cnt_type */
  338.  
  339.     if (seq_param.seq_fields.bits.pic_order_cnt_type == 0)
  340.         bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4);     /* log2_max_pic_order_cnt_lsb_minus4 */
  341.     else {
  342.         assert(0);
  343.     }
  344.  
  345.     bitstream_put_ue(bs, seq_param.max_num_ref_frames);        /* num_ref_frames */
  346.     bitstream_put_ui(bs, 0, 1);                                 /* gaps_in_frame_num_value_allowed_flag */
  347.  
  348.     bitstream_put_ue(bs, seq_param.picture_width_in_mbs - 1);  /* pic_width_in_mbs_minus1 */
  349.     bitstream_put_ue(bs, seq_param.picture_height_in_mbs - 1); /* pic_height_in_map_units_minus1 */
  350.     bitstream_put_ui(bs, seq_param.seq_fields.bits.frame_mbs_only_flag, 1);    /* frame_mbs_only_flag */
  351.  
  352.     if (!seq_param.seq_fields.bits.frame_mbs_only_flag) {
  353.         assert(0);
  354.     }
  355.  
  356.     bitstream_put_ui(bs, seq_param.seq_fields.bits.direct_8x8_inference_flag, 1);      /* direct_8x8_inference_flag */
  357.     bitstream_put_ui(bs, seq_param.frame_cropping_flag, 1);            /* frame_cropping_flag */
  358.  
  359.     if (seq_param.frame_cropping_flag) {
  360.         bitstream_put_ue(bs, seq_param.frame_crop_left_offset);        /* frame_crop_left_offset */
  361.         bitstream_put_ue(bs, seq_param.frame_crop_right_offset);       /* frame_crop_right_offset */
  362.         bitstream_put_ue(bs, seq_param.frame_crop_top_offset);         /* frame_crop_top_offset */
  363.         bitstream_put_ue(bs, seq_param.frame_crop_bottom_offset);      /* frame_crop_bottom_offset */
  364.     }
  365.    
  366.     //if ( frame_bit_rate < 0 ) { //TODO EW: the vui header isn't correct
  367.     if ( 1 ) {
  368.         bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */
  369.     } else {
  370.         bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */
  371.         bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */
  372.         bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */
  373.         bitstream_put_ui(bs, 0, 1); /* video_signal_type_present_flag */
  374.         bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */
  375.         bitstream_put_ui(bs, 1, 1); /* timing_info_present_flag */
  376.         {
  377.             bitstream_put_ui(bs, 15, 32);
  378.             bitstream_put_ui(bs, 900, 32);
  379.             bitstream_put_ui(bs, 1, 1);
  380.         }
  381.         bitstream_put_ui(bs, 1, 1); /* nal_hrd_parameters_present_flag */
  382.         {
  383.             // hrd_parameters
  384.             bitstream_put_ue(bs, 0);    /* cpb_cnt_minus1 */
  385.             bitstream_put_ui(bs, 4, 4); /* bit_rate_scale */
  386.             bitstream_put_ui(bs, 6, 4); /* cpb_size_scale */
  387.            
  388.             bitstream_put_ue(bs, frame_bitrate - 1); /* bit_rate_value_minus1[0] */
  389.             bitstream_put_ue(bs, frame_bitrate*8 - 1); /* cpb_size_value_minus1[0] */
  390.             bitstream_put_ui(bs, 1, 1);  /* cbr_flag[0] */
  391.  
  392.             bitstream_put_ui(bs, 23, 5);   /* initial_cpb_removal_delay_length_minus1 */
  393.             bitstream_put_ui(bs, 23, 5);   /* cpb_removal_delay_length_minus1 */
  394.             bitstream_put_ui(bs, 23, 5);   /* dpb_output_delay_length_minus1 */
  395.             bitstream_put_ui(bs, 23, 5);   /* time_offset_length  */
  396.         }
  397.         bitstream_put_ui(bs, 0, 1);   /* vcl_hrd_parameters_present_flag */
  398.         bitstream_put_ui(bs, 0, 1);   /* low_delay_hrd_flag */
  399.  
  400.         bitstream_put_ui(bs, 0, 1); /* pic_struct_present_flag */
  401.         bitstream_put_ui(bs, 0, 1); /* bitstream_restriction_flag */
  402.     }
  403.  
  404.     rbsp_trailing_bits(bs);     /* rbsp_trailing_bits */
  405. }
  406.  
  407.  
  408. static void pps_rbsp(bitstream *bs)
  409. {
  410.     bitstream_put_ue(bs, pic_param.pic_parameter_set_id);      /* pic_parameter_set_id */
  411.     bitstream_put_ue(bs, pic_param.seq_parameter_set_id);      /* seq_parameter_set_id */
  412.  
  413.     bitstream_put_ui(bs, pic_param.pic_fields.bits.entropy_coding_mode_flag, 1);  /* entropy_coding_mode_flag */
  414.  
  415.     bitstream_put_ui(bs, 0, 1);                         /* pic_order_present_flag: 0 */
  416.  
  417.     bitstream_put_ue(bs, 0);                            /* num_slice_groups_minus1 */
  418.  
  419.     bitstream_put_ue(bs, pic_param.num_ref_idx_l0_active_minus1);      /* num_ref_idx_l0_active_minus1 */
  420.     bitstream_put_ue(bs, pic_param.num_ref_idx_l1_active_minus1);      /* num_ref_idx_l1_active_minus1 1 */
  421.  
  422.     bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_pred_flag, 1);     /* weighted_pred_flag: 0 */
  423.     bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_bipred_idc, 2);     /* weighted_bipred_idc: 0 */
  424.  
  425.     bitstream_put_se(bs, pic_param.pic_init_qp - 26);  /* pic_init_qp_minus26 */
  426.     bitstream_put_se(bs, 0);                            /* pic_init_qs_minus26 */
  427.     bitstream_put_se(bs, 0);                            /* chroma_qp_index_offset */
  428.  
  429.     bitstream_put_ui(bs, pic_param.pic_fields.bits.deblocking_filter_control_present_flag, 1); /* deblocking_filter_control_present_flag */
  430.     bitstream_put_ui(bs, 0, 1);                         /* constrained_intra_pred_flag */
  431.     bitstream_put_ui(bs, 0, 1);                         /* redundant_pic_cnt_present_flag */
  432.    
  433.     /* more_rbsp_data */
  434.     bitstream_put_ui(bs, pic_param.pic_fields.bits.transform_8x8_mode_flag, 1);    /*transform_8x8_mode_flag */
  435.     bitstream_put_ui(bs, 0, 1);                         /* pic_scaling_matrix_present_flag */
  436.     bitstream_put_se(bs, pic_param.second_chroma_qp_index_offset );    /*second_chroma_qp_index_offset */
  437.  
  438.     rbsp_trailing_bits(bs);
  439. }
  440.  
  441. static void slice_header(bitstream *bs)
  442. {
  443.     int first_mb_in_slice = slice_param.macroblock_address;
  444.  
  445.     bitstream_put_ue(bs, first_mb_in_slice);        /* first_mb_in_slice: 0 */
  446.     bitstream_put_ue(bs, slice_param.slice_type);   /* slice_type */
  447.     bitstream_put_ue(bs, slice_param.pic_parameter_set_id);        /* pic_parameter_set_id: 0 */
  448.     bitstream_put_ui(bs, pic_param.frame_num, seq_param.seq_fields.bits.log2_max_frame_num_minus4 + 4); /* frame_num */
  449.  
  450.     /* frame_mbs_only_flag == 1 */
  451.     if (!seq_param.seq_fields.bits.frame_mbs_only_flag) {
  452.         /* FIXME: */
  453.         assert(0);
  454.     }
  455.  
  456.     if (pic_param.pic_fields.bits.idr_pic_flag)
  457.         bitstream_put_ue(bs, slice_param.idr_pic_id);           /* idr_pic_id: 0 */
  458.  
  459.     if (seq_param.seq_fields.bits.pic_order_cnt_type == 0) {
  460.         bitstream_put_ui(bs, pic_param.CurrPic.TopFieldOrderCnt, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 + 4);
  461.         /* pic_order_present_flag == 0 */
  462.     } else {
  463.         /* FIXME: */
  464.         assert(0);
  465.     }
  466.  
  467.     /* redundant_pic_cnt_present_flag == 0 */
  468.     /* slice type */
  469.     if (IS_P_SLICE(slice_param.slice_type)) {
  470.         bitstream_put_ui(bs, slice_param.num_ref_idx_active_override_flag, 1);            /* num_ref_idx_active_override_flag: */
  471.  
  472.         if (slice_param.num_ref_idx_active_override_flag)
  473.             bitstream_put_ue(bs, slice_param.num_ref_idx_l0_active_minus1);
  474.  
  475.         /* ref_pic_list_reordering */
  476.         bitstream_put_ui(bs, 0, 1);            /* ref_pic_list_reordering_flag_l0: 0 */
  477.     } else if (IS_B_SLICE(slice_param.slice_type)) {
  478.         bitstream_put_ui(bs, slice_param.direct_spatial_mv_pred_flag, 1);            /* direct_spatial_mv_pred: 1 */
  479.  
  480.         bitstream_put_ui(bs, slice_param.num_ref_idx_active_override_flag, 1);       /* num_ref_idx_active_override_flag: */
  481.  
  482.         if (slice_param.num_ref_idx_active_override_flag) {
  483.             bitstream_put_ue(bs, slice_param.num_ref_idx_l0_active_minus1);
  484.             bitstream_put_ue(bs, slice_param.num_ref_idx_l1_active_minus1);
  485.         }
  486.  
  487.         /* ref_pic_list_reordering */
  488.         bitstream_put_ui(bs, 0, 1);            /* ref_pic_list_reordering_flag_l0: 0 */
  489.         bitstream_put_ui(bs, 0, 1);            /* ref_pic_list_reordering_flag_l1: 0 */
  490.     }
  491.  
  492.     if ((pic_param.pic_fields.bits.weighted_pred_flag &&
  493.          IS_P_SLICE(slice_param.slice_type)) ||
  494.         ((pic_param.pic_fields.bits.weighted_bipred_idc == 1) &&
  495.          IS_B_SLICE(slice_param.slice_type))) {
  496.         /* FIXME: fill weight/offset table */
  497.         assert(0);
  498.     }
  499.  
  500.     /* dec_ref_pic_marking */
  501.     if (pic_param.pic_fields.bits.reference_pic_flag) {     /* nal_ref_idc != 0 */
  502.         unsigned char no_output_of_prior_pics_flag = 0;
  503.         unsigned char long_term_reference_flag = 0;
  504.         unsigned char adaptive_ref_pic_marking_mode_flag = 0;
  505.  
  506.         if (pic_param.pic_fields.bits.idr_pic_flag) {
  507.             bitstream_put_ui(bs, no_output_of_prior_pics_flag, 1);            /* no_output_of_prior_pics_flag: 0 */
  508.             bitstream_put_ui(bs, long_term_reference_flag, 1);            /* long_term_reference_flag: 0 */
  509.         } else {
  510.             bitstream_put_ui(bs, adaptive_ref_pic_marking_mode_flag, 1);            /* adaptive_ref_pic_marking_mode_flag: 0 */
  511.         }
  512.     }
  513.  
  514.     if (pic_param.pic_fields.bits.entropy_coding_mode_flag &&
  515.         !IS_I_SLICE(slice_param.slice_type))
  516.         bitstream_put_ue(bs, slice_param.cabac_init_idc);               /* cabac_init_idc: 0 */
  517.  
  518.     bitstream_put_se(bs, slice_param.slice_qp_delta);                   /* slice_qp_delta: 0 */
  519.  
  520.     /* ignore for SP/SI */
  521.  
  522.     if (pic_param.pic_fields.bits.deblocking_filter_control_present_flag) {
  523.         bitstream_put_ue(bs, slice_param.disable_deblocking_filter_idc);           /* disable_deblocking_filter_idc: 0 */
  524.  
  525.         if (slice_param.disable_deblocking_filter_idc != 1) {
  526.             bitstream_put_se(bs, slice_param.slice_alpha_c0_offset_div2);          /* slice_alpha_c0_offset_div2: 2 */
  527.             bitstream_put_se(bs, slice_param.slice_beta_offset_div2);              /* slice_beta_offset_div2: 2 */
  528.         }
  529.     }
  530.  
  531.     if (pic_param.pic_fields.bits.entropy_coding_mode_flag) {
  532.         bitstream_byte_aligning(bs, 1);
  533.     }
  534. }
  535.  
  536. static int
  537. build_packed_pic_buffer(unsigned char **header_buffer)
  538. {
  539.     bitstream bs;
  540.  
  541.     bitstream_start(&bs);
  542.     nal_start_code_prefix(&bs);
  543.     nal_header(&bs, NAL_REF_IDC_HIGH, NAL_PPS);
  544.     pps_rbsp(&bs);
  545.     bitstream_end(&bs);
  546.  
  547.     *header_buffer = (unsigned char *)bs.buffer;
  548.     return bs.bit_offset;
  549. }
  550.  
  551. static int
  552. build_packed_seq_buffer(unsigned char **header_buffer)
  553. {
  554.     bitstream bs;
  555.  
  556.     bitstream_start(&bs);
  557.     nal_start_code_prefix(&bs);
  558.     nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS);
  559.     sps_rbsp(&bs);
  560.     bitstream_end(&bs);
  561.  
  562.     *header_buffer = (unsigned char *)bs.buffer;
  563.     return bs.bit_offset;
  564. }
  565.  
  566. static int
  567. build_packed_sei_buffer_timing(unsigned int init_cpb_removal_length,
  568.                                 unsigned int init_cpb_removal_delay,
  569.                                 unsigned int init_cpb_removal_delay_offset,
  570.                                 unsigned int cpb_removal_length,
  571.                                 unsigned int cpb_removal_delay,
  572.                                 unsigned int dpb_output_length,
  573.                                 unsigned int dpb_output_delay,
  574.                                 unsigned char **sei_buffer)
  575. {
  576.     unsigned char *byte_buf;
  577.     int bp_byte_size, i, pic_byte_size;
  578.  
  579.     bitstream nal_bs;
  580.     bitstream sei_bp_bs, sei_pic_bs;
  581.  
  582.     bitstream_start(&sei_bp_bs);
  583.     bitstream_put_ue(&sei_bp_bs, 0);       /*seq_parameter_set_id*/
  584.     bitstream_put_ui(&sei_bp_bs, init_cpb_removal_delay, cpb_removal_length);
  585.     bitstream_put_ui(&sei_bp_bs, init_cpb_removal_delay_offset, cpb_removal_length);
  586.     if ( sei_bp_bs.bit_offset & 0x7) {
  587.         bitstream_put_ui(&sei_bp_bs, 1, 1);
  588.     }
  589.     bitstream_end(&sei_bp_bs);
  590.     bp_byte_size = (sei_bp_bs.bit_offset + 7) / 8;
  591.    
  592.     bitstream_start(&sei_pic_bs);
  593.     bitstream_put_ui(&sei_pic_bs, cpb_removal_delay, cpb_removal_length);
  594.     bitstream_put_ui(&sei_pic_bs, dpb_output_delay, dpb_output_length);
  595.     if ( sei_pic_bs.bit_offset & 0x7) {
  596.         bitstream_put_ui(&sei_pic_bs, 1, 1);
  597.     }
  598.     bitstream_end(&sei_pic_bs);
  599.     pic_byte_size = (sei_pic_bs.bit_offset + 7) / 8;
  600.    
  601.     bitstream_start(&nal_bs);
  602.     nal_start_code_prefix(&nal_bs);
  603.     nal_header(&nal_bs, NAL_REF_IDC_NONE, NAL_SEI);
  604.  
  605.         /* Write the SEI buffer period data */    
  606.     bitstream_put_ui(&nal_bs, 0, 8);
  607.     bitstream_put_ui(&nal_bs, bp_byte_size, 8);
  608.    
  609.     byte_buf = (unsigned char *)sei_bp_bs.buffer;
  610.     for(i = 0; i < bp_byte_size; i++) {
  611.         bitstream_put_ui(&nal_bs, byte_buf[i], 8);
  612.     }
  613.     free(byte_buf);
  614.         /* write the SEI timing data */
  615.     bitstream_put_ui(&nal_bs, 0x01, 8);
  616.     bitstream_put_ui(&nal_bs, pic_byte_size, 8);
  617.    
  618.     byte_buf = (unsigned char *)sei_pic_bs.buffer;
  619.     for(i = 0; i < pic_byte_size; i++) {
  620.         bitstream_put_ui(&nal_bs, byte_buf[i], 8);
  621.     }
  622.     free(byte_buf);
  623.  
  624.     rbsp_trailing_bits(&nal_bs);
  625.     bitstream_end(&nal_bs);
  626.  
  627.     *sei_buffer = (unsigned char *)nal_bs.buffer;
  628.    
  629.     return nal_bs.bit_offset;
  630. }
  631.  
  632. static int build_packed_slice_buffer(unsigned char **header_buffer)
  633. {
  634.     bitstream bs;
  635.     int is_idr = !!pic_param.pic_fields.bits.idr_pic_flag;
  636.     int is_ref = !!pic_param.pic_fields.bits.reference_pic_flag;
  637.  
  638.     bitstream_start(&bs);
  639.     nal_start_code_prefix(&bs);
  640.  
  641.     if (IS_I_SLICE(slice_param.slice_type)) {
  642.         nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR);
  643.     } else if (IS_P_SLICE(slice_param.slice_type)) {
  644.         nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR);
  645.     } else {
  646.         assert(IS_B_SLICE(slice_param.slice_type));
  647.         nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR);
  648.     }
  649.  
  650.     slice_header(&bs);
  651.     bitstream_end(&bs);
  652.  
  653.     *header_buffer = (unsigned char *)bs.buffer;
  654.     return bs.bit_offset;
  655. }
  656.  
  657.  
  658. /*
  659.  * Helper function for profiling purposes
  660.  */
  661. static unsigned int GetTickCount()
  662. {
  663.     struct timeval tv;
  664.     if (gettimeofday(&tv, NULL))
  665.         return 0;
  666.     return tv.tv_usec/1000+tv.tv_sec*1000;
  667. }
  668.  
  669. /*
  670.   Assume frame sequence is: Frame#0,#1,#2,...,#M,...,#X,... (encoding order)
  671.   1) period between Frame #X and Frame #N = #X - #N
  672.   2) 0 means infinite for intra_period/intra_idr_period, and 0 is invalid for ip_period
  673.   3) intra_idr_period % intra_period (intra_period > 0) and intra_period % ip_period must be 0
  674.   4) intra_period and intra_idr_period take precedence over ip_period
  675.   5) if ip_period > 1, intra_period and intra_idr_period are not  the strict periods
  676.      of I/IDR frames, see bellow examples
  677.   -------------------------------------------------------------------
  678.   intra_period intra_idr_period ip_period frame sequence (intra_period/intra_idr_period/ip_period)
  679.   0            ignored          1          IDRPPPPPPP ...     (No IDR/I any more)
  680.   0            ignored        >=2          IDR(PBB)(PBB)...   (No IDR/I any more)
  681.   1            0                ignored    IDRIIIIIII...      (No IDR any more)
  682.   1            1                ignored    IDR IDR IDR IDR...
  683.   1            >=2              ignored    IDRII IDRII IDR... (1/3/ignore)
  684.   >=2          0                1          IDRPPP IPPP I...   (3/0/1)
  685.   >=2          0              >=2          IDR(PBB)(PBB)(IBB) (6/0/3)
  686.                                               (PBB)(IBB)(PBB)(IBB)...
  687.   >=2          >=2              1          IDRPPPPP IPPPPP IPPPPP (6/18/1)
  688.                                            IDRPPPPP IPPPPP IPPPPP...
  689.   >=2          >=2              >=2        {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)} (6/18/3)
  690.                                            {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)}...
  691.                                            {IDR(PBB)(PBB)(IBB)(PBB)}           (6/12/3)
  692.                                            {IDR(PBB)(PBB)(IBB)(PBB)}...
  693.                                            {IDR(PBB)(PBB)}                     (6/6/3)
  694.                                            {IDR(PBB)(PBB)}.
  695. */
  696.  
  697. /*
  698.  * Return displaying order with specified periods and encoding order
  699.  * displaying_order: displaying order
  700.  * frame_type: frame type
  701.  */
  702. #define FRAME_P 0
  703. #define FRAME_B 1
  704. #define FRAME_I 2
  705. #define FRAME_IDR 7
  706. void encoding2display_order(
  707.     unsigned long long encoding_order,int intra_period,
  708.     int intra_idr_period,int ip_period,
  709.     unsigned long long *displaying_order,
  710.     int *frame_type)
  711. {
  712.     int encoding_order_gop = 0;
  713.  
  714.     if (intra_period == 1) { /* all are I/IDR frames */
  715.         *displaying_order = encoding_order;
  716.         if (intra_idr_period == 0)
  717.             *frame_type = (encoding_order == 0)?FRAME_IDR:FRAME_I;
  718.         else
  719.             *frame_type = (encoding_order % intra_idr_period == 0)?FRAME_IDR:FRAME_I;
  720.         return;
  721.     }
  722.  
  723.     if (intra_period == 0)
  724.         intra_idr_period = 0;
  725.  
  726.     /* new sequence like
  727.      * IDR PPPPP IPPPPP
  728.      * IDR (PBB)(PBB)(IBB)(PBB)
  729.      */
  730.     encoding_order_gop = (intra_idr_period == 0)? encoding_order:
  731.         (encoding_order % (intra_idr_period + ((ip_period == 1)?0:1)));
  732.          
  733.     if (encoding_order_gop == 0) { /* the first frame */
  734.         *frame_type = FRAME_IDR;
  735.         *displaying_order = encoding_order;
  736.     } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */
  737.         *frame_type = FRAME_B;
  738.         *displaying_order = encoding_order - 1;
  739.     } else if ((intra_period != 0) && /* have I frames */
  740.                (encoding_order_gop >= 2) &&
  741.                ((ip_period == 1 && encoding_order_gop % intra_period == 0) || /* for IDR PPPPP IPPPP */
  742.                 /* for IDR (PBB)(PBB)(IBB) */
  743.                 (ip_period >= 2 && ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0))) {
  744.         *frame_type = FRAME_I;
  745.         *displaying_order = encoding_order + ip_period - 1;
  746.     } else {
  747.         *frame_type = FRAME_P;
  748.         *displaying_order = encoding_order + ip_period - 1;
  749.     }
  750. }
  751.  
  752.  
  753. static char *fourcc_to_string(int fourcc)
  754. {
  755.     switch (fourcc) {
  756.     case VA_FOURCC_NV12:
  757.         return "NV12";
  758.     case VA_FOURCC_IYUV:
  759.         return "IYUV";
  760.     case VA_FOURCC_YV12:
  761.         return "YV12";
  762.     case VA_FOURCC_UYVY:
  763.         return "UYVY";
  764.     default:
  765.         return "Unknown";
  766.     }
  767. }
  768.  
  769. static int string_to_fourcc(char *str)
  770. {
  771.     int fourcc;
  772.    
  773.     if (!strncmp(str, "NV12", 4))
  774.         fourcc = VA_FOURCC_NV12;
  775.     else if (!strncmp(str, "IYUV", 4))
  776.         fourcc = VA_FOURCC_IYUV;
  777.     else if (!strncmp(str, "YV12", 4))
  778.         fourcc = VA_FOURCC_YV12;
  779.     else if (!strncmp(str, "UYVY", 4))
  780.         fourcc = VA_FOURCC_UYVY;
  781.     else {
  782.         printf("Unknow FOURCC\n");
  783.         fourcc = -1;
  784.     }
  785.     return fourcc;
  786. }
  787.  
  788.  
  789. static char *rc_to_string(int rcmode)
  790. {
  791.     switch (rc_mode) {
  792.     case VA_RC_NONE:
  793.         return "NONE";
  794.     case VA_RC_CBR:
  795.         return "CBR";
  796.     case VA_RC_VBR:
  797.         return "VBR";
  798.     case VA_RC_VCM:
  799.         return "VCM";
  800.     case VA_RC_CQP:
  801.         return "CQP";
  802.     case VA_RC_VBR_CONSTRAINED:
  803.         return "VBR_CONSTRAINED";
  804.     default:
  805.         return "Unknown";
  806.     }
  807. }
  808.  
  809. static int string_to_rc(char *str)
  810. {
  811.     int rc_mode;
  812.    
  813.     if (!strncmp(str, "NONE", 4))
  814.         rc_mode = VA_RC_NONE;
  815.     else if (!strncmp(str, "CBR", 3))
  816.         rc_mode = VA_RC_CBR;
  817.     else if (!strncmp(str, "VBR", 3))
  818.         rc_mode = VA_RC_VBR;
  819.     else if (!strncmp(str, "VCM", 3))
  820.         rc_mode = VA_RC_VCM;
  821.     else if (!strncmp(str, "CQP", 3))
  822.         rc_mode = VA_RC_CQP;
  823.     else if (!strncmp(str, "VBR_CONSTRAINED", 15))
  824.         rc_mode = VA_RC_VBR_CONSTRAINED;
  825.     else {
  826.         printf("Unknown RC mode\n");
  827.         rc_mode = -1;
  828.     }
  829.     return rc_mode;
  830. }
  831.  
  832.  
  833. static int print_help(void)
  834. {
  835.     printf("./h264encode <options>\n");
  836.     printf("   -w <width> -h <height>\n");
  837.     printf("   -framecount <frame number>\n");
  838.     printf("   -n <frame number>\n");
  839.     printf("      if set to 0 and srcyuv is set, the frame count is from srcuv file\n");
  840.     printf("   -o <coded file>\n");
  841.     printf("   -f <frame rate>\n");
  842.     printf("   --intra_period <number>\n");
  843.     printf("   --idr_period <number>\n");
  844.     printf("   --ip_period <number>\n");
  845.     printf("   --bitrate <bitrate>\n");
  846.     printf("   --initialqp <number>\n");
  847.     printf("   --minqp <number>\n");
  848.     printf("   --rcmode <NONE|CBR|VBR|VCM|CQP|VBR_CONTRAINED>\n");
  849.     printf("   --syncmode: sequentially upload source, encoding, save result, no multi-thread\n");
  850.     printf("   --srcyuv <filename> load YUV from a file\n");
  851.     printf("   --fourcc <NV12|IYUV|YV12> source YUV fourcc\n");
  852.     printf("   --recyuv <filename> save reconstructed YUV into a file\n");
  853.     printf("   --enablePSNR calculate PSNR of recyuv vs. srcyuv\n");
  854.     printf("   --entropy <0|1>, 1 means cabac, 0 cavlc\n");
  855.     printf("   --profile <BP|MP|HP>\n");
  856.     return 0;
  857. }
  858.  
  859. static int process_cmdline(int argc, char *argv[])
  860. {
  861.     char c;
  862.     const struct option long_opts[] = {
  863.         {"help", no_argument, NULL, 0 },
  864.         {"bitrate", required_argument, NULL, 1 },
  865.         {"minqp", required_argument, NULL, 2 },
  866.         {"initialqp", required_argument, NULL, 3 },
  867.         {"intra_period", required_argument, NULL, 4 },
  868.         {"idr_period", required_argument, NULL, 5 },
  869.         {"ip_period", required_argument, NULL, 6 },
  870.         {"rcmode", required_argument, NULL, 7 },
  871.         {"srcyuv", required_argument, NULL, 9 },
  872.         {"recyuv", required_argument, NULL, 10 },
  873.         {"fourcc", required_argument, NULL, 11 },
  874.         {"syncmode", no_argument, NULL, 12 },
  875.         {"enablePSNR", no_argument, NULL, 13 },
  876.         {"prit", required_argument, NULL, 14 },
  877.         {"priv", required_argument, NULL, 15 },
  878.         {"framecount", required_argument, NULL, 16 },
  879.         {"entropy", required_argument, NULL, 17 },
  880.         {"profile", required_argument, NULL, 18 },
  881.         {NULL, no_argument, NULL, 0 }};
  882.     int long_index;
  883.    
  884.     while ((c =getopt_long_only(argc,argv,"w:h:n:f:o:?",long_opts,&long_index)) != EOF) {
  885.         switch (c) {
  886.         case 'w':
  887.             frame_width = atoi(optarg);
  888.             break;
  889.         case 'h':
  890.             frame_height = atoi(optarg);
  891.             break;
  892.         case 'n':
  893.         case 16:
  894.             frame_count = atoi(optarg);
  895.             break;
  896.         case 'f':
  897.             frame_rate = atoi(optarg);
  898.             break;
  899.         case 'o':
  900.             coded_fn = strdup(optarg);
  901.             break;
  902.         case 0:
  903.             print_help();
  904.             exit(0);
  905.         case 1:
  906.             frame_bitrate = atoi(optarg);
  907.             break;
  908.         case 2:
  909.             minimal_qp = atoi(optarg);
  910.             break;
  911.         case 3:
  912.             initial_qp = atoi(optarg);
  913.             break;
  914.         case 4:
  915.             intra_period = atoi(optarg);
  916.             break;
  917.         case 5:
  918.             intra_idr_period = atoi(optarg);
  919.             break;
  920.         case 6:
  921.             ip_period = atoi(optarg);
  922.             break;
  923.         case 7:
  924.             rc_mode = string_to_rc(optarg);
  925.             if (rc_mode < 0) {
  926.                 print_help();
  927.                 exit(1);
  928.             }
  929.             break;
  930.         case 9:
  931.             srcyuv_fn = strdup(optarg);
  932.             break;
  933.         case 10:
  934.             recyuv_fn = strdup(optarg);
  935.             break;
  936.         case 11:
  937.             srcyuv_fourcc = string_to_fourcc(optarg);
  938.             if (srcyuv_fourcc <= 0) {
  939.                 print_help();
  940.                 exit(1);
  941.             }
  942.             break;
  943.         case 12:
  944.             encode_syncmode = 1;
  945.             break;
  946.         case 13:
  947.             calc_psnr = 1;
  948.             break;
  949.         case 14:
  950.             misc_priv_type = strtol(optarg, NULL, 0);
  951.             break;
  952.         case 15:
  953.             misc_priv_value = strtol(optarg, NULL, 0);
  954.             break;
  955.         case 17:
  956.             h264_entropy_mode = atoi(optarg) ? 1: 0;
  957.             break;
  958.         case 18:
  959.             if (strncmp(optarg, "BP", 2) == 0)
  960.                 h264_profile = VAProfileH264Baseline;
  961.             else if (strncmp(optarg, "MP", 2) == 0)
  962.                 h264_profile = VAProfileH264Main;
  963.             else if (strncmp(optarg, "HP", 2) == 0)
  964.                 h264_profile = VAProfileH264High;
  965.             else
  966.                 h264_profile = 0;
  967.             break;
  968.         case ':':
  969.         case '?':
  970.             print_help();
  971.             exit(0);
  972.         }
  973.     }
  974.  
  975.     if (ip_period < 1) {
  976.         printf(" ip_period must be greater than 0\n");
  977.         exit(0);
  978.     }
  979.     if (intra_period != 1 && intra_period % ip_period != 0) {
  980.         printf(" intra_period must be a multiplier of ip_period\n");
  981.         exit(0);        
  982.     }
  983.     if (intra_period != 0 && intra_idr_period % intra_period != 0) {
  984.         printf(" intra_idr_period must be a multiplier of intra_period\n");
  985.         exit(0);        
  986.     }
  987.  
  988.     if (frame_bitrate == 0)
  989.         frame_bitrate = frame_width * frame_height * 12 * frame_rate / 50;
  990.        
  991.     /* open source file */
  992.     if (srcyuv_fn) {
  993.         srcyuv_fp = fopen(srcyuv_fn,"r");
  994.    
  995.         if (srcyuv_fp == NULL)
  996.             printf("Open source YUV file %s failed, use auto-generated YUV data\n", srcyuv_fn);
  997.         else {
  998.             struct stat tmp;
  999.  
  1000.             fstat(fileno(srcyuv_fp), &tmp);
  1001.             srcyuv_frames = tmp.st_size / (frame_width * frame_height * 1.5);
  1002.             printf("Source YUV file %s with %llu frames\n", srcyuv_fn, srcyuv_frames);
  1003.  
  1004.             if (frame_count == 0)
  1005.                 frame_count = srcyuv_frames;
  1006.         }
  1007.     }
  1008.  
  1009.     /* open source file */
  1010.     if (recyuv_fn) {
  1011.         recyuv_fp = fopen(recyuv_fn,"w+");
  1012.    
  1013.         if (recyuv_fp == NULL)
  1014.             printf("Open reconstructed YUV file %s failed\n", recyuv_fn);
  1015.     }
  1016.    
  1017.     if (coded_fn == NULL) {
  1018.         struct stat buf;
  1019.         if (stat("/tmp", &buf) == 0)
  1020.             coded_fn = strdup("/tmp/test.264");
  1021.         else if (stat("/sdcard", &buf) == 0)
  1022.             coded_fn = strdup("/sdcard/test.264");
  1023.         else
  1024.             coded_fn = strdup("./test.264");
  1025.     }
  1026.    
  1027.     /* store coded data into a file */
  1028.     coded_fp = fopen(coded_fn,"w+");
  1029.     if (coded_fp == NULL) {
  1030.         printf("Open file %s failed, exit\n", coded_fn);
  1031.         exit(1);
  1032.     }
  1033.  
  1034.     frame_width_mbaligned = (frame_width + 15) & (~15);
  1035.     frame_height_mbaligned = (frame_height + 15) & (~15);
  1036.     if (frame_width != frame_width_mbaligned ||
  1037.         frame_height != frame_height_mbaligned) {
  1038.         printf("Source frame is %dx%d and will code clip to %dx%d with crop\n",
  1039.                frame_width, frame_height,
  1040.                frame_width_mbaligned, frame_height_mbaligned
  1041.                );
  1042.     }
  1043.    
  1044.     return 0;
  1045. }
  1046.  
  1047. static int init_va(void)
  1048. {
  1049.     VAProfile profile_list[]={VAProfileH264High,VAProfileH264Main,VAProfileH264Baseline,VAProfileH264ConstrainedBaseline};
  1050.     VAEntrypoint *entrypoints;
  1051.     int num_entrypoints, slice_entrypoint;
  1052.     int support_encode = 0;    
  1053.     int major_ver, minor_ver;
  1054.     VAStatus va_status;
  1055.     unsigned int i;
  1056.  
  1057.     va_dpy = va_open_display();
  1058.     va_status = vaInitialize(va_dpy, &major_ver, &minor_ver);
  1059.     CHECK_VASTATUS(va_status, "vaInitialize");
  1060.  
  1061.     num_entrypoints = vaMaxNumEntrypoints(va_dpy);
  1062.     entrypoints = malloc(num_entrypoints * sizeof(*entrypoints));
  1063.     if (!entrypoints) {
  1064.         fprintf(stderr, "error: failed to initialize VA entrypoints array\n");
  1065.         exit(1);
  1066.     }
  1067.  
  1068.     /* use the highest profile */
  1069.     for (i = 0; i < sizeof(profile_list)/sizeof(profile_list[0]); i++) {
  1070.         if ((h264_profile != ~0) && h264_profile != profile_list[i])
  1071.             continue;
  1072.        
  1073.         h264_profile = profile_list[i];
  1074.         vaQueryConfigEntrypoints(va_dpy, h264_profile, entrypoints, &num_entrypoints);
  1075.         for (slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
  1076.             if (entrypoints[slice_entrypoint] == VAEntrypointEncSlice) {
  1077.                 support_encode = 1;
  1078.                 break;
  1079.             }
  1080.         }
  1081.         if (support_encode == 1)
  1082.             break;
  1083.     }
  1084.    
  1085.     if (support_encode == 0) {
  1086.         printf("Can't find VAEntrypointEncSlice for H264 profiles\n");
  1087.         exit(1);
  1088.     } else {
  1089.         switch (h264_profile) {
  1090.             case VAProfileH264Baseline:
  1091.                 printf("Use profile VAProfileH264Baseline\n");
  1092.                 ip_period = 1;
  1093.                 constraint_set_flag |= (1 << 0); /* Annex A.2.1 */
  1094.                 h264_entropy_mode = 0;
  1095.                 break;
  1096.             case VAProfileH264ConstrainedBaseline:
  1097.                 printf("Use profile VAProfileH264ConstrainedBaseline\n");
  1098.                 constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.2 */
  1099.                 ip_period = 1;
  1100.                 break;
  1101.  
  1102.             case VAProfileH264Main:
  1103.                 printf("Use profile VAProfileH264Main\n");
  1104.                 constraint_set_flag |= (1 << 1); /* Annex A.2.2 */
  1105.                 break;
  1106.  
  1107.             case VAProfileH264High:
  1108.                 constraint_set_flag |= (1 << 3); /* Annex A.2.4 */
  1109.                 printf("Use profile VAProfileH264High\n");
  1110.                 break;
  1111.             default:
  1112.                 printf("unknow profile. Set to Baseline");
  1113.                 h264_profile = VAProfileH264Baseline;
  1114.                 ip_period = 1;
  1115.                 constraint_set_flag |= (1 << 0); /* Annex A.2.1 */
  1116.                 break;
  1117.         }
  1118.     }
  1119.  
  1120.     /* find out the format for the render target, and rate control mode */
  1121.     for (i = 0; i < VAConfigAttribTypeMax; i++)
  1122.         attrib[i].type = i;
  1123.  
  1124.     va_status = vaGetConfigAttributes(va_dpy, h264_profile, VAEntrypointEncSlice,
  1125.                                       &attrib[0], VAConfigAttribTypeMax);
  1126.     CHECK_VASTATUS(va_status, "vaGetConfigAttributes");
  1127.     /* check the interested configattrib */
  1128.     if ((attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) {
  1129.         printf("Not find desired YUV420 RT format\n");
  1130.         exit(1);
  1131.     } else {
  1132.         config_attrib[config_attrib_num].type = VAConfigAttribRTFormat;
  1133.         config_attrib[config_attrib_num].value = VA_RT_FORMAT_YUV420;
  1134.         config_attrib_num++;
  1135.     }
  1136.    
  1137.     if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) {
  1138.         int tmp = attrib[VAConfigAttribRateControl].value;
  1139.  
  1140.         printf("Support rate control mode (0x%x):", tmp);
  1141.        
  1142.         if (tmp & VA_RC_NONE)
  1143.             printf("NONE ");
  1144.         if (tmp & VA_RC_CBR)
  1145.             printf("CBR ");
  1146.         if (tmp & VA_RC_VBR)
  1147.             printf("VBR ");
  1148.         if (tmp & VA_RC_VCM)
  1149.             printf("VCM ");
  1150.         if (tmp & VA_RC_CQP)
  1151.             printf("CQP ");
  1152.         if (tmp & VA_RC_VBR_CONSTRAINED)
  1153.             printf("VBR_CONSTRAINED ");
  1154.  
  1155.         printf("\n");
  1156.  
  1157.         if (rc_mode == -1 || !(rc_mode & tmp))  {
  1158.             if (rc_mode != -1) {
  1159.                 printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode));
  1160.             }
  1161.  
  1162.             for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) {
  1163.                 if (rc_default_modes[i] & tmp) {
  1164.                     rc_mode = rc_default_modes[i];
  1165.                     break;
  1166.                 }
  1167.             }
  1168.  
  1169.             printf("RateControl mode: %s\n", rc_to_string(rc_mode));
  1170.         }
  1171.  
  1172.         config_attrib[config_attrib_num].type = VAConfigAttribRateControl;
  1173.         config_attrib[config_attrib_num].value = rc_mode;
  1174.         config_attrib_num++;
  1175.     }
  1176.    
  1177.  
  1178.     if (attrib[VAConfigAttribEncPackedHeaders].value != VA_ATTRIB_NOT_SUPPORTED) {
  1179.         int tmp = attrib[VAConfigAttribEncPackedHeaders].value;
  1180.  
  1181.         printf("Support VAConfigAttribEncPackedHeaders\n");
  1182.        
  1183.         h264_packedheader = 1;
  1184.         config_attrib[config_attrib_num].type = VAConfigAttribEncPackedHeaders;
  1185.         config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE;
  1186.        
  1187.         if (tmp & VA_ENC_PACKED_HEADER_SEQUENCE) {
  1188.             printf("Support packed sequence headers\n");
  1189.             config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SEQUENCE;
  1190.         }
  1191.        
  1192.         if (tmp & VA_ENC_PACKED_HEADER_PICTURE) {
  1193.             printf("Support packed picture headers\n");
  1194.             config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_PICTURE;
  1195.         }
  1196.        
  1197.         if (tmp & VA_ENC_PACKED_HEADER_SLICE) {
  1198.             printf("Support packed slice headers\n");
  1199.             config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SLICE;
  1200.         }
  1201.        
  1202.         if (tmp & VA_ENC_PACKED_HEADER_MISC) {
  1203.             printf("Support packed misc headers\n");
  1204.             config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_MISC;
  1205.         }
  1206.        
  1207.         enc_packed_header_idx = config_attrib_num;
  1208.         config_attrib_num++;
  1209.     }
  1210.  
  1211.     if (attrib[VAConfigAttribEncInterlaced].value != VA_ATTRIB_NOT_SUPPORTED) {
  1212.         int tmp = attrib[VAConfigAttribEncInterlaced].value;
  1213.        
  1214.         printf("Support VAConfigAttribEncInterlaced\n");
  1215.  
  1216.         if (tmp & VA_ENC_INTERLACED_FRAME)
  1217.             printf("support VA_ENC_INTERLACED_FRAME\n");
  1218.         if (tmp & VA_ENC_INTERLACED_FIELD)
  1219.             printf("Support VA_ENC_INTERLACED_FIELD\n");
  1220.         if (tmp & VA_ENC_INTERLACED_MBAFF)
  1221.             printf("Support VA_ENC_INTERLACED_MBAFF\n");
  1222.         if (tmp & VA_ENC_INTERLACED_PAFF)
  1223.             printf("Support VA_ENC_INTERLACED_PAFF\n");
  1224.        
  1225.         config_attrib[config_attrib_num].type = VAConfigAttribEncInterlaced;
  1226.         config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE;
  1227.         config_attrib_num++;
  1228.     }
  1229.    
  1230.     if (attrib[VAConfigAttribEncMaxRefFrames].value != VA_ATTRIB_NOT_SUPPORTED) {
  1231.         h264_maxref = attrib[VAConfigAttribEncMaxRefFrames].value;
  1232.        
  1233.         printf("Support %d RefPicList0 and %d RefPicList1\n",
  1234.                h264_maxref & 0xffff, (h264_maxref >> 16) & 0xffff );
  1235.     }
  1236.  
  1237.     if (attrib[VAConfigAttribEncMaxSlices].value != VA_ATTRIB_NOT_SUPPORTED)
  1238.         printf("Support %d slices\n", attrib[VAConfigAttribEncMaxSlices].value);
  1239.  
  1240.     if (attrib[VAConfigAttribEncSliceStructure].value != VA_ATTRIB_NOT_SUPPORTED) {
  1241.         int tmp = attrib[VAConfigAttribEncSliceStructure].value;
  1242.        
  1243.         printf("Support VAConfigAttribEncSliceStructure\n");
  1244.  
  1245.         if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS)
  1246.             printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS\n");
  1247.         if (tmp & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS)
  1248.             printf("Support VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS\n");
  1249.         if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS)
  1250.             printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS\n");
  1251.     }
  1252.     if (attrib[VAConfigAttribEncMacroblockInfo].value != VA_ATTRIB_NOT_SUPPORTED) {
  1253.         printf("Support VAConfigAttribEncMacroblockInfo\n");
  1254.     }
  1255.  
  1256.     free(entrypoints);
  1257.     return 0;
  1258. }
  1259.  
  1260. static int setup_encode()
  1261. {
  1262.     VAStatus va_status;
  1263.     VASurfaceID *tmp_surfaceid;
  1264.     int codedbuf_size, i;
  1265.    
  1266.     va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
  1267.             &config_attrib[0], config_attrib_num, &config_id);
  1268.     CHECK_VASTATUS(va_status, "vaCreateConfig");
  1269.  
  1270.     /* create source surfaces */
  1271.     va_status = vaCreateSurfaces(va_dpy,
  1272.                                  VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
  1273.                                  &src_surface[0], SURFACE_NUM,
  1274.                                  NULL, 0);
  1275.     CHECK_VASTATUS(va_status, "vaCreateSurfaces");
  1276.  
  1277.     /* create reference surfaces */
  1278.     va_status = vaCreateSurfaces(
  1279.         va_dpy,
  1280.         VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
  1281.         &ref_surface[0], SURFACE_NUM,
  1282.         NULL, 0
  1283.         );
  1284.     CHECK_VASTATUS(va_status, "vaCreateSurfaces");
  1285.  
  1286.     tmp_surfaceid = calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
  1287.     memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
  1288.     memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
  1289.    
  1290.     /* Create a context for this encode pipe */
  1291.     va_status = vaCreateContext(va_dpy, config_id,
  1292.                                 frame_width_mbaligned, frame_height_mbaligned,
  1293.                                 VA_PROGRESSIVE,
  1294.                                 tmp_surfaceid, 2 * SURFACE_NUM,
  1295.                                 &context_id);
  1296.     CHECK_VASTATUS(va_status, "vaCreateContext");
  1297.     free(tmp_surfaceid);
  1298.  
  1299.     codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
  1300.  
  1301.     for (i = 0; i < SURFACE_NUM; i++) {
  1302.         /* create coded buffer once for all
  1303.          * other VA buffers which won't be used again after vaRenderPicture.
  1304.          * so APP can always vaCreateBuffer for every frame
  1305.          * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
  1306.          * so VA won't maintain the coded buffer
  1307.          */
  1308.         va_status = vaCreateBuffer(va_dpy,context_id,VAEncCodedBufferType,
  1309.                 codedbuf_size, 1, NULL, &coded_buf[i]);
  1310.         CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1311.     }
  1312.    
  1313.     return 0;
  1314. }
  1315.  
  1316.  
  1317.  
  1318. #define partition(ref, field, key, ascending)   \
  1319.     while (i <= j) {                            \
  1320.         if (ascending) {                        \
  1321.             while (ref[i].field < key)          \
  1322.                 i++;                            \
  1323.             while (ref[j].field > key)          \
  1324.                 j--;                            \
  1325.         } else {                                \
  1326.             while (ref[i].field > key)          \
  1327.                 i++;                            \
  1328.             while (ref[j].field < key)          \
  1329.                 j--;                            \
  1330.         }                                       \
  1331.         if (i <= j) {                           \
  1332.             tmp = ref[i];                       \
  1333.             ref[i] = ref[j];                    \
  1334.             ref[j] = tmp;                       \
  1335.             i++;                                \
  1336.             j--;                                \
  1337.         }                                       \
  1338.     }                                           \
  1339.  
  1340. static void sort_one(VAPictureH264 ref[], int left, int right,
  1341.                      int ascending, int frame_idx)
  1342. {
  1343.     int i = left, j = right;
  1344.     unsigned int key;
  1345.     VAPictureH264 tmp;
  1346.  
  1347.     if (frame_idx) {
  1348.         key = ref[(left + right) / 2].frame_idx;
  1349.         partition(ref, frame_idx, key, ascending);
  1350.     } else {
  1351.         key = ref[(left + right) / 2].TopFieldOrderCnt;
  1352.         partition(ref, TopFieldOrderCnt, (signed int)key, ascending);
  1353.     }
  1354.    
  1355.     /* recursion */
  1356.     if (left < j)
  1357.         sort_one(ref, left, j, ascending, frame_idx);
  1358.    
  1359.     if (i < right)
  1360.         sort_one(ref, i, right, ascending, frame_idx);
  1361. }
  1362.  
  1363. static void sort_two(VAPictureH264 ref[], int left, int right, unsigned int key, unsigned int frame_idx,
  1364.                      int partition_ascending, int list0_ascending, int list1_ascending)
  1365. {
  1366.     int i = left, j = right;
  1367.     VAPictureH264 tmp;
  1368.  
  1369.     if (frame_idx) {
  1370.         partition(ref, frame_idx, key, partition_ascending);
  1371.     } else {
  1372.         partition(ref, TopFieldOrderCnt, (signed int)key, partition_ascending);
  1373.     }
  1374.    
  1375.  
  1376.     sort_one(ref, left, i-1, list0_ascending, frame_idx);
  1377.     sort_one(ref, j+1, right, list1_ascending, frame_idx);
  1378. }
  1379.  
  1380. static int update_ReferenceFrames(void)
  1381. {
  1382.     int i;
  1383.    
  1384.     if (current_frame_type == FRAME_B)
  1385.         return 0;
  1386.  
  1387.     CurrentCurrPic.flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE;
  1388.     numShortTerm++;
  1389.     if (numShortTerm > num_ref_frames)
  1390.         numShortTerm = num_ref_frames;
  1391.     for (i=numShortTerm-1; i>0; i--)
  1392.         ReferenceFrames[i] = ReferenceFrames[i-1];
  1393.     ReferenceFrames[0] = CurrentCurrPic;
  1394.    
  1395.     if (current_frame_type != FRAME_B)
  1396.         current_frame_num++;
  1397.     if (current_frame_num > MaxFrameNum)
  1398.         current_frame_num = 0;
  1399.    
  1400.     return 0;
  1401. }
  1402.  
  1403.  
  1404. static int update_RefPicList(void)
  1405. {
  1406.     unsigned int current_poc = CurrentCurrPic.TopFieldOrderCnt;
  1407.    
  1408.     if (current_frame_type == FRAME_P) {
  1409.         memcpy(RefPicList0_P, ReferenceFrames, numShortTerm * sizeof(VAPictureH264));
  1410.         sort_one(RefPicList0_P, 0, numShortTerm-1, 0, 1);
  1411.     }
  1412.    
  1413.     if (current_frame_type == FRAME_B) {
  1414.         memcpy(RefPicList0_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264));
  1415.         sort_two(RefPicList0_B, 0, numShortTerm-1, current_poc, 0,
  1416.                  1, 0, 1);
  1417.  
  1418.         memcpy(RefPicList1_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264));
  1419.         sort_two(RefPicList1_B, 0, numShortTerm-1, current_poc, 0,
  1420.                  0, 1, 0);
  1421.     }
  1422.    
  1423.     return 0;
  1424. }
  1425.  
  1426.  
  1427. static int render_sequence(void)
  1428. {
  1429.     VABufferID seq_param_buf, rc_param_buf, misc_param_tmpbuf, render_id[2];
  1430.     VAStatus va_status;
  1431.     VAEncMiscParameterBuffer *misc_param, *misc_param_tmp;
  1432.     VAEncMiscParameterRateControl *misc_rate_ctrl;
  1433.    
  1434.     seq_param.level_idc = 41 /*SH_LEVEL_3*/;
  1435.     seq_param.picture_width_in_mbs = frame_width_mbaligned / 16;
  1436.     seq_param.picture_height_in_mbs = frame_height_mbaligned / 16;
  1437.     seq_param.bits_per_second = frame_bitrate;
  1438.  
  1439.     seq_param.intra_period = intra_period;
  1440.     seq_param.intra_idr_period = intra_idr_period;
  1441.     seq_param.ip_period = ip_period;
  1442.  
  1443.     seq_param.max_num_ref_frames = num_ref_frames;
  1444.     seq_param.seq_fields.bits.frame_mbs_only_flag = 1;
  1445.     seq_param.time_scale = 900;
  1446.     seq_param.num_units_in_tick = 15; /* Tc = num_units_in_tick / time_sacle */
  1447.     seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = Log2MaxPicOrderCntLsb - 4;
  1448.     seq_param.seq_fields.bits.log2_max_frame_num_minus4 = Log2MaxFrameNum - 4;;
  1449.     seq_param.seq_fields.bits.frame_mbs_only_flag = 1;
  1450.     seq_param.seq_fields.bits.chroma_format_idc = 1;
  1451.     seq_param.seq_fields.bits.direct_8x8_inference_flag = 1;
  1452.    
  1453.     if (frame_width != frame_width_mbaligned ||
  1454.         frame_height != frame_height_mbaligned) {
  1455.         seq_param.frame_cropping_flag = 1;
  1456.         seq_param.frame_crop_left_offset = 0;
  1457.         seq_param.frame_crop_right_offset = (frame_width_mbaligned - frame_width)/2;
  1458.         seq_param.frame_crop_top_offset = 0;
  1459.         seq_param.frame_crop_bottom_offset = (frame_height_mbaligned - frame_height)/2;
  1460.     }
  1461.    
  1462.     va_status = vaCreateBuffer(va_dpy, context_id,
  1463.                                VAEncSequenceParameterBufferType,
  1464.                                sizeof(seq_param),1,&seq_param,&seq_param_buf);
  1465.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1466.    
  1467.     va_status = vaCreateBuffer(va_dpy, context_id,
  1468.                                VAEncMiscParameterBufferType,
  1469.                                sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl),
  1470.                                1,NULL,&rc_param_buf);
  1471.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1472.    
  1473.     vaMapBuffer(va_dpy, rc_param_buf,(void **)&misc_param);
  1474.     misc_param->type = VAEncMiscParameterTypeRateControl;
  1475.     misc_rate_ctrl = (VAEncMiscParameterRateControl *)misc_param->data;
  1476.     memset(misc_rate_ctrl, 0, sizeof(*misc_rate_ctrl));
  1477.     misc_rate_ctrl->bits_per_second = frame_bitrate;
  1478.     misc_rate_ctrl->target_percentage = 66;
  1479.     misc_rate_ctrl->window_size = 1000;
  1480.     misc_rate_ctrl->initial_qp = initial_qp;
  1481.     misc_rate_ctrl->min_qp = minimal_qp;
  1482.     misc_rate_ctrl->basic_unit_size = 0;
  1483.     vaUnmapBuffer(va_dpy, rc_param_buf);
  1484.  
  1485.     render_id[0] = seq_param_buf;
  1486.     render_id[1] = rc_param_buf;
  1487.    
  1488.     va_status = vaRenderPicture(va_dpy,context_id, &render_id[0], 2);
  1489.     CHECK_VASTATUS(va_status,"vaRenderPicture");;
  1490.  
  1491.     if (misc_priv_type != 0) {
  1492.         va_status = vaCreateBuffer(va_dpy, context_id,
  1493.                                    VAEncMiscParameterBufferType,
  1494.                                    sizeof(VAEncMiscParameterBuffer),
  1495.                                    1, NULL, &misc_param_tmpbuf);
  1496.         CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1497.         vaMapBuffer(va_dpy, misc_param_tmpbuf,(void **)&misc_param_tmp);
  1498.         misc_param_tmp->type = misc_priv_type;
  1499.         misc_param_tmp->data[0] = misc_priv_value;
  1500.         vaUnmapBuffer(va_dpy, misc_param_tmpbuf);
  1501.    
  1502.         va_status = vaRenderPicture(va_dpy,context_id, &misc_param_tmpbuf, 1);
  1503.     }
  1504.    
  1505.     return 0;
  1506. }
  1507.  
  1508. static int calc_poc(int pic_order_cnt_lsb)
  1509. {
  1510.     static int PicOrderCntMsb_ref = 0, pic_order_cnt_lsb_ref = 0;
  1511.     int prevPicOrderCntMsb, prevPicOrderCntLsb;
  1512.     int PicOrderCntMsb, TopFieldOrderCnt;
  1513.    
  1514.     if (current_frame_type == FRAME_IDR)
  1515.         prevPicOrderCntMsb = prevPicOrderCntLsb = 0;
  1516.     else {
  1517.         prevPicOrderCntMsb = PicOrderCntMsb_ref;
  1518.         prevPicOrderCntLsb = pic_order_cnt_lsb_ref;
  1519.     }
  1520.    
  1521.     if ((pic_order_cnt_lsb < prevPicOrderCntLsb) &&
  1522.         ((prevPicOrderCntLsb - pic_order_cnt_lsb) >= (int)(MaxPicOrderCntLsb / 2)))
  1523.         PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb;
  1524.     else if ((pic_order_cnt_lsb > prevPicOrderCntLsb) &&
  1525.              ((pic_order_cnt_lsb - prevPicOrderCntLsb) > (int)(MaxPicOrderCntLsb / 2)))
  1526.         PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb;
  1527.     else
  1528.         PicOrderCntMsb = prevPicOrderCntMsb;
  1529.    
  1530.     TopFieldOrderCnt = PicOrderCntMsb + pic_order_cnt_lsb;
  1531.  
  1532.     if (current_frame_type != FRAME_B) {
  1533.         PicOrderCntMsb_ref = PicOrderCntMsb;
  1534.         pic_order_cnt_lsb_ref = pic_order_cnt_lsb;
  1535.     }
  1536.    
  1537.     return TopFieldOrderCnt;
  1538. }
  1539.  
  1540. static int render_picture(void)
  1541. {
  1542.     VABufferID pic_param_buf;
  1543.     VAStatus va_status;
  1544.     int i = 0;
  1545.  
  1546.     pic_param.CurrPic.picture_id = ref_surface[current_slot];
  1547.     pic_param.CurrPic.frame_idx = current_frame_num;
  1548.     pic_param.CurrPic.flags = 0;
  1549.     pic_param.CurrPic.TopFieldOrderCnt = calc_poc((current_frame_display - current_IDR_display) % MaxPicOrderCntLsb);
  1550.     pic_param.CurrPic.BottomFieldOrderCnt = pic_param.CurrPic.TopFieldOrderCnt;
  1551.     CurrentCurrPic = pic_param.CurrPic;
  1552.  
  1553.     if (getenv("TO_DEL")) { /* set RefPicList into ReferenceFrames */
  1554.         update_RefPicList(); /* calc RefPicList */
  1555.         memset(pic_param.ReferenceFrames, 0xff, 16 * sizeof(VAPictureH264)); /* invalid all */
  1556.         if (current_frame_type == FRAME_P) {
  1557.             pic_param.ReferenceFrames[0] = RefPicList0_P[0];
  1558.         } else if (current_frame_type == FRAME_B) {
  1559.             pic_param.ReferenceFrames[0] = RefPicList0_B[0];
  1560.             pic_param.ReferenceFrames[1] = RefPicList1_B[0];
  1561.         }
  1562.     } else {
  1563.         memcpy(pic_param.ReferenceFrames, ReferenceFrames, numShortTerm*sizeof(VAPictureH264));
  1564.         for (i = numShortTerm; i < SURFACE_NUM; i++) {
  1565.             pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE;
  1566.             pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
  1567.         }
  1568.     }
  1569.    
  1570.     pic_param.pic_fields.bits.idr_pic_flag = (current_frame_type == FRAME_IDR);
  1571.     pic_param.pic_fields.bits.reference_pic_flag = (current_frame_type != FRAME_B);
  1572.     pic_param.pic_fields.bits.entropy_coding_mode_flag = h264_entropy_mode;
  1573.     pic_param.pic_fields.bits.deblocking_filter_control_present_flag = 1;
  1574.     pic_param.frame_num = current_frame_num;
  1575.     pic_param.coded_buf = coded_buf[current_slot];
  1576.     pic_param.last_picture = (current_frame_encoding == frame_count);
  1577.     pic_param.pic_init_qp = initial_qp;
  1578.  
  1579.     va_status = vaCreateBuffer(va_dpy, context_id,VAEncPictureParameterBufferType,
  1580.                                sizeof(pic_param),1,&pic_param, &pic_param_buf);
  1581.     CHECK_VASTATUS(va_status,"vaCreateBuffer");;
  1582.  
  1583.     va_status = vaRenderPicture(va_dpy,context_id, &pic_param_buf, 1);
  1584.     CHECK_VASTATUS(va_status,"vaRenderPicture");
  1585.  
  1586.     return 0;
  1587. }
  1588.  
  1589. static int render_packedsequence(void)
  1590. {
  1591.     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
  1592.     VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2];
  1593.     unsigned int length_in_bits;
  1594.     unsigned char *packedseq_buffer = NULL;
  1595.     VAStatus va_status;
  1596.  
  1597.     length_in_bits = build_packed_seq_buffer(&packedseq_buffer);
  1598.    
  1599.     packedheader_param_buffer.type = VAEncPackedHeaderSequence;
  1600.    
  1601.     packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/
  1602.     packedheader_param_buffer.has_emulation_bytes = 0;
  1603.     va_status = vaCreateBuffer(va_dpy,
  1604.                                context_id,
  1605.                                VAEncPackedHeaderParameterBufferType,
  1606.                                sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
  1607.                                &packedseq_para_bufid);
  1608.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1609.  
  1610.     va_status = vaCreateBuffer(va_dpy,
  1611.                                context_id,
  1612.                                VAEncPackedHeaderDataBufferType,
  1613.                                (length_in_bits + 7) / 8, 1, packedseq_buffer,
  1614.                                &packedseq_data_bufid);
  1615.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1616.  
  1617.     render_id[0] = packedseq_para_bufid;
  1618.     render_id[1] = packedseq_data_bufid;
  1619.     va_status = vaRenderPicture(va_dpy,context_id, render_id, 2);
  1620.     CHECK_VASTATUS(va_status,"vaRenderPicture");
  1621.  
  1622.     free(packedseq_buffer);
  1623.    
  1624.     return 0;
  1625. }
  1626.  
  1627.  
  1628. static int render_packedpicture(void)
  1629. {
  1630.     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
  1631.     VABufferID packedpic_para_bufid, packedpic_data_bufid, render_id[2];
  1632.     unsigned int length_in_bits;
  1633.     unsigned char *packedpic_buffer = NULL;
  1634.     VAStatus va_status;
  1635.  
  1636.     length_in_bits = build_packed_pic_buffer(&packedpic_buffer);
  1637.     packedheader_param_buffer.type = VAEncPackedHeaderPicture;
  1638.     packedheader_param_buffer.bit_length = length_in_bits;
  1639.     packedheader_param_buffer.has_emulation_bytes = 0;
  1640.  
  1641.     va_status = vaCreateBuffer(va_dpy,
  1642.                                context_id,
  1643.                                VAEncPackedHeaderParameterBufferType,
  1644.                                sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
  1645.                                &packedpic_para_bufid);
  1646.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1647.  
  1648.     va_status = vaCreateBuffer(va_dpy,
  1649.                                context_id,
  1650.                                VAEncPackedHeaderDataBufferType,
  1651.                                (length_in_bits + 7) / 8, 1, packedpic_buffer,
  1652.                                &packedpic_data_bufid);
  1653.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1654.  
  1655.     render_id[0] = packedpic_para_bufid;
  1656.     render_id[1] = packedpic_data_bufid;
  1657.     va_status = vaRenderPicture(va_dpy,context_id, render_id, 2);
  1658.     CHECK_VASTATUS(va_status,"vaRenderPicture");
  1659.  
  1660.     free(packedpic_buffer);
  1661.    
  1662.     return 0;
  1663. }
  1664.  
  1665. static void render_packedsei(void)
  1666. {
  1667.     VAEncPackedHeaderParameterBuffer packed_header_param_buffer;
  1668.     VABufferID packed_sei_header_param_buf_id, packed_sei_buf_id, render_id[2];
  1669.     unsigned int length_in_bits /*offset_in_bytes*/;
  1670.     unsigned char *packed_sei_buffer = NULL;
  1671.     VAStatus va_status;
  1672.     int init_cpb_size, target_bit_rate, i_initial_cpb_removal_delay_length, i_initial_cpb_removal_delay;
  1673.     int i_cpb_removal_delay, i_dpb_output_delay_length, i_cpb_removal_delay_length;
  1674.  
  1675.     /* it comes for the bps defined in SPS */
  1676.     target_bit_rate = frame_bitrate;
  1677.     init_cpb_size = (target_bit_rate * 8) >> 10;
  1678.     i_initial_cpb_removal_delay = init_cpb_size * 0.5 * 1024 / target_bit_rate * 90000;
  1679.  
  1680.     i_cpb_removal_delay = 2;
  1681.     i_initial_cpb_removal_delay_length = 24;
  1682.     i_cpb_removal_delay_length = 24;
  1683.     i_dpb_output_delay_length = 24;
  1684.    
  1685.  
  1686.     length_in_bits = build_packed_sei_buffer_timing(
  1687.         i_initial_cpb_removal_delay_length,
  1688.         i_initial_cpb_removal_delay,
  1689.         0,
  1690.         i_cpb_removal_delay_length,
  1691.         i_cpb_removal_delay * current_frame_encoding,
  1692.         i_dpb_output_delay_length,
  1693.         0,
  1694.         &packed_sei_buffer);
  1695.  
  1696.     //offset_in_bytes = 0;
  1697.     packed_header_param_buffer.type = VAEncPackedHeaderH264_SEI;
  1698.     packed_header_param_buffer.bit_length = length_in_bits;
  1699.     packed_header_param_buffer.has_emulation_bytes = 0;
  1700.  
  1701.     va_status = vaCreateBuffer(va_dpy,
  1702.                                context_id,
  1703.                                VAEncPackedHeaderParameterBufferType,
  1704.                                sizeof(packed_header_param_buffer), 1, &packed_header_param_buffer,
  1705.                                &packed_sei_header_param_buf_id);
  1706.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1707.  
  1708.     va_status = vaCreateBuffer(va_dpy,
  1709.                                context_id,
  1710.                                VAEncPackedHeaderDataBufferType,
  1711.                                (length_in_bits + 7) / 8, 1, packed_sei_buffer,
  1712.                                &packed_sei_buf_id);
  1713.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1714.  
  1715.  
  1716.     render_id[0] = packed_sei_header_param_buf_id;
  1717.     render_id[1] = packed_sei_buf_id;
  1718.     va_status = vaRenderPicture(va_dpy,context_id, render_id, 2);
  1719.     CHECK_VASTATUS(va_status,"vaRenderPicture");
  1720.  
  1721.    
  1722.     free(packed_sei_buffer);
  1723.        
  1724.     return;
  1725. }
  1726.  
  1727.  
  1728. static int render_hrd(void)
  1729. {
  1730.     VABufferID misc_parameter_hrd_buf_id;
  1731.     VAStatus va_status;
  1732.     VAEncMiscParameterBuffer *misc_param;
  1733.     VAEncMiscParameterHRD *misc_hrd_param;
  1734.    
  1735.     va_status = vaCreateBuffer(va_dpy, context_id,
  1736.                    VAEncMiscParameterBufferType,
  1737.                    sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterHRD),
  1738.                    1,
  1739.                    NULL,
  1740.                    &misc_parameter_hrd_buf_id);
  1741.     CHECK_VASTATUS(va_status, "vaCreateBuffer");
  1742.  
  1743.     vaMapBuffer(va_dpy,
  1744.                 misc_parameter_hrd_buf_id,
  1745.                 (void **)&misc_param);
  1746.     misc_param->type = VAEncMiscParameterTypeHRD;
  1747.     misc_hrd_param = (VAEncMiscParameterHRD *)misc_param->data;
  1748.  
  1749.     if (frame_bitrate > 0) {
  1750.         misc_hrd_param->initial_buffer_fullness = frame_bitrate * 1024 * 4;
  1751.         misc_hrd_param->buffer_size = frame_bitrate * 1024 * 8;
  1752.     } else {
  1753.         misc_hrd_param->initial_buffer_fullness = 0;
  1754.         misc_hrd_param->buffer_size = 0;
  1755.     }
  1756.     vaUnmapBuffer(va_dpy, misc_parameter_hrd_buf_id);
  1757.  
  1758.     va_status = vaRenderPicture(va_dpy,context_id, &misc_parameter_hrd_buf_id, 1);
  1759.     CHECK_VASTATUS(va_status,"vaRenderPicture");;
  1760.  
  1761.     return 0;
  1762. }
  1763.  
  1764. static void render_packedslice()
  1765. {
  1766.     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
  1767.     VABufferID packedslice_para_bufid, packedslice_data_bufid, render_id[2];
  1768.     unsigned int length_in_bits;
  1769.     unsigned char *packedslice_buffer = NULL;
  1770.     VAStatus va_status;
  1771.  
  1772.     length_in_bits = build_packed_slice_buffer(&packedslice_buffer);
  1773.     packedheader_param_buffer.type = VAEncPackedHeaderSlice;
  1774.     packedheader_param_buffer.bit_length = length_in_bits;
  1775.     packedheader_param_buffer.has_emulation_bytes = 0;
  1776.  
  1777.     va_status = vaCreateBuffer(va_dpy,
  1778.                                context_id,
  1779.                                VAEncPackedHeaderParameterBufferType,
  1780.                                sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
  1781.                                &packedslice_para_bufid);
  1782.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1783.  
  1784.     va_status = vaCreateBuffer(va_dpy,
  1785.                                context_id,
  1786.                                VAEncPackedHeaderDataBufferType,
  1787.                                (length_in_bits + 7) / 8, 1, packedslice_buffer,
  1788.                                &packedslice_data_bufid);
  1789.     CHECK_VASTATUS(va_status,"vaCreateBuffer");
  1790.  
  1791.     render_id[0] = packedslice_para_bufid;
  1792.     render_id[1] = packedslice_data_bufid;
  1793.     va_status = vaRenderPicture(va_dpy,context_id, render_id, 2);
  1794.     CHECK_VASTATUS(va_status,"vaRenderPicture");
  1795.  
  1796.     free(packedslice_buffer);
  1797. }
  1798.  
  1799. static int render_slice(void)
  1800. {
  1801.     VABufferID slice_param_buf;
  1802.     VAStatus va_status;
  1803.     int i;
  1804.  
  1805.     update_RefPicList();
  1806.    
  1807.     /* one frame, one slice */
  1808.     slice_param.macroblock_address = 0;
  1809.     slice_param.num_macroblocks = frame_width_mbaligned * frame_height_mbaligned/(16*16); /* Measured by MB */
  1810.     slice_param.slice_type = (current_frame_type == FRAME_IDR)?2:current_frame_type;
  1811.     if (current_frame_type == FRAME_IDR) {
  1812.         if (current_frame_encoding != 0)
  1813.             ++slice_param.idr_pic_id;
  1814.     } else if (current_frame_type == FRAME_P) {
  1815.         int refpiclist0_max = h264_maxref & 0xffff;
  1816.         memcpy(slice_param.RefPicList0, RefPicList0_P, refpiclist0_max*sizeof(VAPictureH264));
  1817.  
  1818.         for (i = refpiclist0_max; i < 32; i++) {
  1819.             slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
  1820.             slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
  1821.         }
  1822.     } else if (current_frame_type == FRAME_B) {
  1823.         int refpiclist0_max = h264_maxref & 0xffff;
  1824.         int refpiclist1_max = (h264_maxref >> 16) & 0xffff;
  1825.  
  1826.         memcpy(slice_param.RefPicList0, RefPicList0_B, refpiclist0_max*sizeof(VAPictureH264));
  1827.         for (i = refpiclist0_max; i < 32; i++) {
  1828.             slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
  1829.             slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
  1830.         }
  1831.  
  1832.         memcpy(slice_param.RefPicList1, RefPicList1_B, refpiclist1_max*sizeof(VAPictureH264));
  1833.         for (i = refpiclist1_max; i < 32; i++) {
  1834.             slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE;
  1835.             slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
  1836.         }
  1837.     }
  1838.  
  1839.     slice_param.slice_alpha_c0_offset_div2 = 0;
  1840.     slice_param.slice_beta_offset_div2 = 0;
  1841.     slice_param.direct_spatial_mv_pred_flag = 1;
  1842.     slice_param.pic_order_cnt_lsb = (current_frame_display - current_IDR_display) % MaxPicOrderCntLsb;
  1843.    
  1844.  
  1845.     if (h264_packedheader &&
  1846.         config_attrib[enc_packed_header_idx].value & VA_ENC_PACKED_HEADER_SLICE)
  1847.         render_packedslice();
  1848.  
  1849.     va_status = vaCreateBuffer(va_dpy,context_id,VAEncSliceParameterBufferType,
  1850.                                sizeof(slice_param),1,&slice_param,&slice_param_buf);
  1851.     CHECK_VASTATUS(va_status,"vaCreateBuffer");;
  1852.  
  1853.     va_status = vaRenderPicture(va_dpy,context_id, &slice_param_buf, 1);
  1854.     CHECK_VASTATUS(va_status,"vaRenderPicture");
  1855.    
  1856.     return 0;
  1857. }
  1858.  
  1859.  
  1860. static int upload_source_YUV_once_for_all()
  1861. {
  1862.     int box_width=8;
  1863.     int row_shift=0;
  1864.     int i;
  1865.  
  1866.     for (i = 0; i < SURFACE_NUM; i++) {
  1867.         printf("\rLoading data into surface %d.....", i);
  1868.         upload_surface(va_dpy, src_surface[i], box_width, row_shift, 0);
  1869.  
  1870.         row_shift++;
  1871.         if (row_shift==(2*box_width)) row_shift= 0;
  1872.     }
  1873.     printf("Complete surface loading\n");
  1874.  
  1875.     return 0;
  1876. }
  1877.  
  1878. static int load_surface(VASurfaceID surface_id, unsigned long long display_order)
  1879. {
  1880.     unsigned char *srcyuv_ptr = NULL, *src_Y = NULL, *src_U = NULL, *src_V = NULL;
  1881.     unsigned long long frame_start, mmap_start;
  1882.     char *mmap_ptr = NULL;
  1883.     int frame_size, mmap_size;
  1884.    
  1885.     if (srcyuv_fp == NULL)
  1886.         return 0;
  1887.    
  1888.     /* allow encoding more than srcyuv_frames */    
  1889.     display_order = display_order % srcyuv_frames;
  1890.     frame_size = frame_width * frame_height * 3 / 2; /* for YUV420 */
  1891.     frame_start = display_order * frame_size;
  1892.    
  1893.     mmap_start = frame_start & (~0xfff);
  1894.     mmap_size = (frame_size + (frame_start & 0xfff) + 0xfff) & (~0xfff);
  1895.     mmap_ptr = mmap(0, mmap_size, PROT_READ, MAP_SHARED,
  1896.                     fileno(srcyuv_fp), mmap_start);
  1897.     if (mmap_ptr == MAP_FAILED) {
  1898.         printf("Failed to mmap YUV file (%s)\n", strerror(errno));
  1899.         return 1;
  1900.     }
  1901.     srcyuv_ptr = (unsigned char *)mmap_ptr +  (frame_start & 0xfff);
  1902.     if (srcyuv_fourcc == VA_FOURCC_NV12) {
  1903.         src_Y = srcyuv_ptr;
  1904.         src_U = src_Y + frame_width * frame_height;
  1905.         src_V = NULL;
  1906.     } else if (srcyuv_fourcc == VA_FOURCC_IYUV ||
  1907.         srcyuv_fourcc == VA_FOURCC_YV12) {
  1908.         src_Y = srcyuv_ptr;
  1909.         if (srcyuv_fourcc == VA_FOURCC_IYUV) {
  1910.             src_U = src_Y + frame_width * frame_height;
  1911.             src_V = src_U + (frame_width/2) * (frame_height/2);
  1912.         } else { /* YV12 */
  1913.             src_V = src_Y + frame_width * frame_height;
  1914.             src_U = src_V + (frame_width/2) * (frame_height/2);
  1915.         }
  1916.     } else {
  1917.         printf("Unsupported source YUV format\n");
  1918.         exit(1);
  1919.     }
  1920.    
  1921.     upload_surface_yuv(va_dpy, surface_id,
  1922.                        srcyuv_fourcc, frame_width, frame_height,
  1923.                        src_Y, src_U, src_V);
  1924.     if (mmap_ptr)
  1925.         munmap(mmap_ptr, mmap_size);
  1926.  
  1927.     return 0;
  1928. }
  1929.  
  1930.  
  1931. static int save_recyuv(VASurfaceID surface_id,
  1932.                        unsigned long long display_order,
  1933.                        unsigned long long encode_order)
  1934. {
  1935.     unsigned char *dst_Y = NULL, *dst_U = NULL, *dst_V = NULL;
  1936.  
  1937.     if (recyuv_fp == NULL)
  1938.         return 0;
  1939.  
  1940.     if (srcyuv_fourcc == VA_FOURCC_NV12) {
  1941.         int uv_size = 2 * (frame_width/2) * (frame_height/2);
  1942.         dst_Y = malloc(2*uv_size);
  1943.         dst_U = malloc(uv_size);
  1944.     } else if (srcyuv_fourcc == VA_FOURCC_IYUV ||
  1945.                srcyuv_fourcc == VA_FOURCC_YV12) {
  1946.         int uv_size = (frame_width/2) * (frame_height/2);
  1947.         dst_Y = malloc(4*uv_size);
  1948.         dst_U = malloc(uv_size);
  1949.         dst_V = malloc(uv_size);
  1950.     } else {
  1951.         printf("Unsupported source YUV format\n");
  1952.         exit(1);
  1953.     }
  1954.    
  1955.     download_surface_yuv(va_dpy, surface_id,
  1956.                          srcyuv_fourcc, frame_width, frame_height,
  1957.                          dst_Y, dst_U, dst_V);
  1958.     fseek(recyuv_fp, display_order * frame_width * frame_height * 1.5, SEEK_SET);
  1959.  
  1960.     if (srcyuv_fourcc == VA_FOURCC_NV12) {
  1961.         int uv_size = 2 * (frame_width/2) * (frame_height/2);
  1962.         fwrite(dst_Y, uv_size * 2, 1, recyuv_fp);
  1963.         fwrite(dst_U, uv_size, 1, recyuv_fp);
  1964.     } else if (srcyuv_fourcc == VA_FOURCC_IYUV ||
  1965.                srcyuv_fourcc == VA_FOURCC_YV12) {
  1966.         int uv_size = (frame_width/2) * (frame_height/2);
  1967.         fwrite(dst_Y, uv_size * 4, 1, recyuv_fp);
  1968.        
  1969.         if (srcyuv_fourcc == VA_FOURCC_IYUV) {
  1970.             fwrite(dst_U, uv_size, 1, recyuv_fp);
  1971.             fwrite(dst_V, uv_size, 1, recyuv_fp);
  1972.         } else {
  1973.             fwrite(dst_V, uv_size, 1, recyuv_fp);
  1974.             fwrite(dst_U, uv_size, 1, recyuv_fp);
  1975.         }
  1976.     } else {
  1977.         printf("Unsupported YUV format\n");
  1978.         exit(1);
  1979.     }
  1980.    
  1981.     if (dst_Y)
  1982.         free(dst_Y);
  1983.     if (dst_U)
  1984.         free(dst_U);
  1985.     if (dst_V)
  1986.         free(dst_V);
  1987.  
  1988.     fflush(recyuv_fp);
  1989.  
  1990.     return 0;
  1991. }
  1992.  
  1993.  
  1994. static int save_codeddata(unsigned long long display_order, unsigned long long encode_order)
  1995. {    
  1996.     VACodedBufferSegment *buf_list = NULL;
  1997.     VAStatus va_status;
  1998.     unsigned int coded_size = 0;
  1999.  
  2000.     va_status = vaMapBuffer(va_dpy,coded_buf[display_order % SURFACE_NUM],(void **)(&buf_list));
  2001.     CHECK_VASTATUS(va_status,"vaMapBuffer");
  2002.     while (buf_list != NULL) {
  2003.         coded_size += fwrite(buf_list->buf, 1, buf_list->size, coded_fp);
  2004.         buf_list = (VACodedBufferSegment *) buf_list->next;
  2005.  
  2006.         frame_size += coded_size;
  2007.     }
  2008.     vaUnmapBuffer(va_dpy,coded_buf[display_order % SURFACE_NUM]);
  2009.  
  2010.     printf("\r      "); /* return back to startpoint */
  2011.     switch (encode_order % 4) {
  2012.         case 0:
  2013.             printf("|");
  2014.             break;
  2015.         case 1:
  2016.             printf("/");
  2017.             break;
  2018.         case 2:
  2019.             printf("-");
  2020.             break;
  2021.         case 3:
  2022.             printf("\\");
  2023.             break;
  2024.     }
  2025.     printf("%08lld", encode_order);
  2026.     printf("(%06d bytes coded)",coded_size);
  2027.  
  2028.     fflush(coded_fp);
  2029.    
  2030.     return 0;
  2031. }
  2032.  
  2033.  
  2034. static struct storage_task_t * storage_task_dequeue(void)
  2035. {
  2036.     struct storage_task_t *header;
  2037.  
  2038.     pthread_mutex_lock(&encode_mutex);
  2039.  
  2040.     header = storage_task_header;    
  2041.     if (storage_task_header != NULL) {
  2042.         if (storage_task_tail == storage_task_header)
  2043.             storage_task_tail = NULL;
  2044.         storage_task_header = header->next;
  2045.     }
  2046.    
  2047.     pthread_mutex_unlock(&encode_mutex);
  2048.    
  2049.     return header;
  2050. }
  2051.  
  2052. static int storage_task_queue(unsigned long long display_order, unsigned long long encode_order)
  2053. {
  2054.     struct storage_task_t *tmp;
  2055.  
  2056.     tmp = calloc(1, sizeof(struct storage_task_t));
  2057.     tmp->display_order = display_order;
  2058.     tmp->encode_order = encode_order;
  2059.  
  2060.     pthread_mutex_lock(&encode_mutex);
  2061.    
  2062.     if (storage_task_header == NULL) {
  2063.         storage_task_header = tmp;
  2064.         storage_task_tail = tmp;
  2065.     } else {
  2066.         storage_task_tail->next = tmp;
  2067.         storage_task_tail = tmp;
  2068.     }
  2069.  
  2070.     srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_STORAGE;
  2071.     pthread_cond_signal(&encode_cond);
  2072.    
  2073.     pthread_mutex_unlock(&encode_mutex);
  2074.    
  2075.     return 0;
  2076. }
  2077.  
  2078. static void storage_task(unsigned long long display_order, unsigned long long encode_order)
  2079. {
  2080.     unsigned int tmp;
  2081.     VAStatus va_status;
  2082.    
  2083.     tmp = GetTickCount();
  2084.     va_status = vaSyncSurface(va_dpy, src_surface[display_order % SURFACE_NUM]);
  2085.     CHECK_VASTATUS(va_status,"vaSyncSurface");
  2086.     SyncPictureTicks += GetTickCount() - tmp;
  2087.     tmp = GetTickCount();
  2088.     save_codeddata(display_order, encode_order);
  2089.     SavePictureTicks += GetTickCount() - tmp;
  2090.  
  2091.     save_recyuv(ref_surface[display_order % SURFACE_NUM], display_order, encode_order);
  2092.  
  2093.     /* reload a new frame data */
  2094.     tmp = GetTickCount();
  2095.     if (srcyuv_fp != NULL)
  2096.         load_surface(src_surface[display_order % SURFACE_NUM], display_order + SURFACE_NUM);
  2097.     UploadPictureTicks += GetTickCount() - tmp;
  2098.  
  2099.     pthread_mutex_lock(&encode_mutex);
  2100.     srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING;
  2101.     pthread_mutex_unlock(&encode_mutex);
  2102. }
  2103.  
  2104.        
  2105. static void * storage_task_thread(void *t)
  2106. {
  2107.     while (1) {
  2108.         struct storage_task_t *current;
  2109.        
  2110.         current = storage_task_dequeue();
  2111.         if (current == NULL) {
  2112.             pthread_mutex_lock(&encode_mutex);
  2113.             pthread_cond_wait(&encode_cond, &encode_mutex);
  2114.             pthread_mutex_unlock(&encode_mutex);
  2115.             continue;
  2116.         }
  2117.        
  2118.         storage_task(current->display_order, current->encode_order);
  2119.        
  2120.         free(current);
  2121.  
  2122.         /* all frames are saved, exit the thread */
  2123.         if (++frame_coded >= frame_count)
  2124.             break;
  2125.     }
  2126.  
  2127.     return 0;
  2128. }
  2129.  
  2130.  
  2131. static int encode_frames(void)
  2132. {
  2133.     unsigned int i, tmp;
  2134.     VAStatus va_status;
  2135.     //VASurfaceStatus surface_status;
  2136.  
  2137.     /* upload RAW YUV data into all surfaces */
  2138.     tmp = GetTickCount();
  2139.     if (srcyuv_fp != NULL) {
  2140.         for (i = 0; i < SURFACE_NUM; i++)
  2141.             load_surface(src_surface[i], i);
  2142.     } else
  2143.         upload_source_YUV_once_for_all();
  2144.     UploadPictureTicks += GetTickCount() - tmp;
  2145.    
  2146.     /* ready for encoding */
  2147.     memset(srcsurface_status, SRC_SURFACE_IN_ENCODING, sizeof(srcsurface_status));
  2148.    
  2149.     memset(&seq_param, 0, sizeof(seq_param));
  2150.     memset(&pic_param, 0, sizeof(pic_param));
  2151.     memset(&slice_param, 0, sizeof(slice_param));
  2152.  
  2153.     if (encode_syncmode == 0)
  2154.         pthread_create(&encode_thread, NULL, storage_task_thread, NULL);
  2155.    
  2156.     for (current_frame_encoding = 0; current_frame_encoding < frame_count; current_frame_encoding++) {
  2157.         encoding2display_order(current_frame_encoding, intra_period, intra_idr_period, ip_period,
  2158.                                &current_frame_display, &current_frame_type);
  2159.         if (current_frame_type == FRAME_IDR) {
  2160.             numShortTerm = 0;
  2161.             current_frame_num = 0;
  2162.             current_IDR_display = current_frame_display;
  2163.         }
  2164.  
  2165.         /* check if the source frame is ready */
  2166.         while (srcsurface_status[current_slot] != SRC_SURFACE_IN_ENCODING) {
  2167.             usleep(1);
  2168.         }
  2169.        
  2170.         tmp = GetTickCount();
  2171.         va_status = vaBeginPicture(va_dpy, context_id, src_surface[current_slot]);
  2172.         CHECK_VASTATUS(va_status,"vaBeginPicture");
  2173.         BeginPictureTicks += GetTickCount() - tmp;
  2174.        
  2175.         tmp = GetTickCount();
  2176.         if (current_frame_type == FRAME_IDR) {
  2177.             render_sequence();
  2178.             render_picture();            
  2179.             if (h264_packedheader) {
  2180.                 render_packedsequence();
  2181.                 render_packedpicture();
  2182.             }
  2183.             //if (rc_mode == VA_RC_CBR)
  2184.             //    render_packedsei();
  2185.             //render_hrd();
  2186.         } else {
  2187.             //render_sequence();
  2188.             render_picture();
  2189.             //if (rc_mode == VA_RC_CBR)
  2190.             //    render_packedsei();
  2191.             //render_hrd();
  2192.         }
  2193.         render_slice();
  2194.         RenderPictureTicks += GetTickCount() - tmp;
  2195.        
  2196.         tmp = GetTickCount();
  2197.         va_status = vaEndPicture(va_dpy,context_id);
  2198.         CHECK_VASTATUS(va_status,"vaEndPicture");;
  2199.         EndPictureTicks += GetTickCount() - tmp;
  2200.  
  2201.         if (encode_syncmode)
  2202.             storage_task(current_frame_display, current_frame_encoding);
  2203.         else /* queue the storage task queue */
  2204.             storage_task_queue(current_frame_display, current_frame_encoding);
  2205.        
  2206.         update_ReferenceFrames();        
  2207.     }
  2208.  
  2209.     if (encode_syncmode == 0) {
  2210.         int ret;
  2211.         pthread_join(encode_thread, (void **)&ret);
  2212.     }
  2213.    
  2214.     return 0;
  2215. }
  2216.  
  2217.  
  2218. static int release_encode()
  2219. {
  2220.     int i;
  2221.    
  2222.     vaDestroySurfaces(va_dpy,&src_surface[0],SURFACE_NUM);
  2223.     vaDestroySurfaces(va_dpy,&ref_surface[0],SURFACE_NUM);
  2224.  
  2225.     for (i = 0; i < SURFACE_NUM; i++)
  2226.         vaDestroyBuffer(va_dpy,coded_buf[i]);
  2227.    
  2228.     vaDestroyContext(va_dpy,context_id);
  2229.     vaDestroyConfig(va_dpy,config_id);
  2230.  
  2231.     return 0;
  2232. }
  2233.  
  2234. static int deinit_va()
  2235. {
  2236.     vaTerminate(va_dpy);
  2237.  
  2238.     va_close_display(va_dpy);
  2239.  
  2240.     return 0;
  2241. }
  2242.  
  2243.  
  2244. static int print_input()
  2245. {
  2246.     printf("\n\nINPUT:Try to encode H264...\n");
  2247.     if (rc_mode != -1)
  2248.         printf("INPUT: RateControl  : %s\n", rc_to_string(rc_mode));
  2249.     printf("INPUT: Resolution   : %dx%d, %d frames\n",
  2250.            frame_width, frame_height, frame_count);
  2251.     printf("INPUT: FrameRate    : %d\n", frame_rate);
  2252.     printf("INPUT: Bitrate      : %d\n", frame_bitrate);
  2253.     printf("INPUT: Slieces      : %d\n", frame_slices);
  2254.     printf("INPUT: IntraPeriod  : %d\n", intra_period);
  2255.     printf("INPUT: IDRPeriod    : %d\n", intra_idr_period);
  2256.     printf("INPUT: IpPeriod     : %d\n", ip_period);
  2257.     printf("INPUT: Initial QP   : %d\n", initial_qp);
  2258.     printf("INPUT: Min QP       : %d\n", minimal_qp);
  2259.     printf("INPUT: Source YUV   : %s", srcyuv_fp?"FILE":"AUTO generated");
  2260.     if (srcyuv_fp)
  2261.         printf(":%s (fourcc %s)\n", srcyuv_fn, fourcc_to_string(srcyuv_fourcc));
  2262.     else
  2263.         printf("\n");
  2264.     printf("INPUT: Coded Clip   : %s\n", coded_fn);
  2265.     if (recyuv_fp == NULL)
  2266.         printf("INPUT: Rec   Clip   : %s\n", "Not save reconstructed frame");
  2267.     else
  2268.         printf("INPUT: Rec   Clip   : Save reconstructed frame into %s (fourcc %s)\n", recyuv_fn,
  2269.                fourcc_to_string(srcyuv_fourcc));
  2270.    
  2271.     printf("\n\n"); /* return back to startpoint */
  2272.    
  2273.     return 0;
  2274. }
  2275.  
  2276. static int calc_PSNR(double *psnr)
  2277. {
  2278.     char *srcyuv_ptr = NULL, *recyuv_ptr = NULL, tmp;
  2279.     unsigned long long min_size;
  2280.     unsigned long long i, sse=0;
  2281.     double ssemean;
  2282.     int fourM = 0x400000; /* 4M */
  2283.  
  2284.     min_size = MIN(srcyuv_frames, frame_count) * frame_width * frame_height * 1.5;
  2285.     for (i=0; i<min_size; i++) {
  2286.         unsigned long long j = i % fourM;
  2287.        
  2288.         if ((i % fourM) == 0) {
  2289.             if (srcyuv_ptr)
  2290.                 munmap(srcyuv_ptr, fourM);
  2291.             if (recyuv_ptr)
  2292.                 munmap(recyuv_ptr, fourM);
  2293.            
  2294.             srcyuv_ptr = mmap(0, fourM, PROT_READ, MAP_SHARED, fileno(srcyuv_fp), i);
  2295.             recyuv_ptr = mmap(0, fourM, PROT_READ, MAP_SHARED, fileno(recyuv_fp), i);
  2296.             if ((srcyuv_ptr == MAP_FAILED) || (recyuv_ptr == MAP_FAILED)) {
  2297.                 printf("Failed to mmap YUV files\n");
  2298.                 return 1;
  2299.             }
  2300.         }
  2301.         tmp = srcyuv_ptr[j] - recyuv_ptr[j];
  2302.         sse += tmp * tmp;
  2303.     }
  2304.     ssemean = (double)sse/(double)min_size;
  2305.     *psnr = 20.0*log10(255) - 10.0*log10(ssemean);
  2306.  
  2307.     if (srcyuv_ptr)
  2308.         munmap(srcyuv_ptr, fourM);
  2309.     if (recyuv_ptr)
  2310.         munmap(recyuv_ptr, fourM);
  2311.    
  2312.     return 0;
  2313. }
  2314.  
  2315. static int print_performance(unsigned int PictureCount)
  2316. {
  2317.     unsigned int psnr_ret = 1, others = 0;
  2318.     double psnr = 0, total_size = frame_width * frame_height * 1.5 * frame_count;
  2319.  
  2320.     if (calc_psnr && srcyuv_fp && recyuv_fp)
  2321.         psnr_ret = calc_PSNR(&psnr);
  2322.    
  2323.     others = TotalTicks - UploadPictureTicks - BeginPictureTicks
  2324.         - RenderPictureTicks - EndPictureTicks - SyncPictureTicks - SavePictureTicks;
  2325.  
  2326.     printf("\n\n");
  2327.  
  2328.     printf("PERFORMANCE:   Frame Rate           : %.2f fps (%d frames, %d ms (%.2f ms per frame))\n",
  2329.            (double) 1000*PictureCount / TotalTicks, PictureCount,
  2330.            TotalTicks, ((double)  TotalTicks) / (double) PictureCount);
  2331.     printf("PERFORMANCE:   Compression ratio    : %d:1\n", (unsigned int)(total_size / frame_size));
  2332.     if (psnr_ret == 0)
  2333.         printf("PERFORMANCE:   PSNR                 : %.2f (%lld frames calculated)\n",
  2334.                psnr, MIN(frame_count, srcyuv_frames));
  2335.  
  2336.     printf("PERFORMANCE:     UploadPicture      : %d ms (%.2f, %.2f%% percent)\n",
  2337.            (int) UploadPictureTicks, ((double)  UploadPictureTicks) / (double) PictureCount,
  2338.            UploadPictureTicks/(double) TotalTicks/0.01);
  2339.     printf("PERFORMANCE:     vaBeginPicture     : %d ms (%.2f, %.2f%% percent)\n",
  2340.            (int) BeginPictureTicks, ((double)  BeginPictureTicks) / (double) PictureCount,
  2341.            BeginPictureTicks/(double) TotalTicks/0.01);
  2342.     printf("PERFORMANCE:     vaRenderHeader     : %d ms (%.2f, %.2f%% percent)\n",
  2343.            (int) RenderPictureTicks, ((double)  RenderPictureTicks) / (double) PictureCount,
  2344.            RenderPictureTicks/(double) TotalTicks/0.01);
  2345.     printf("PERFORMANCE:     vaEndPicture       : %d ms (%.2f, %.2f%% percent)\n",
  2346.            (int) EndPictureTicks, ((double)  EndPictureTicks) / (double) PictureCount,
  2347.            EndPictureTicks/(double) TotalTicks/0.01);
  2348.     printf("PERFORMANCE:     vaSyncSurface      : %d ms (%.2f, %.2f%% percent)\n",
  2349.            (int) SyncPictureTicks, ((double) SyncPictureTicks) / (double) PictureCount,
  2350.            SyncPictureTicks/(double) TotalTicks/0.01);
  2351.     printf("PERFORMANCE:     SavePicture        : %d ms (%.2f, %.2f%% percent)\n",
  2352.            (int) SavePictureTicks, ((double)  SavePictureTicks) / (double) PictureCount,
  2353.            SavePictureTicks/(double) TotalTicks/0.01);
  2354.     printf("PERFORMANCE:     Others             : %d ms (%.2f, %.2f%% percent)\n",
  2355.            (int) others, ((double) others) / (double) PictureCount,
  2356.            others/(double) TotalTicks/0.01);
  2357.  
  2358.     if (encode_syncmode == 0)
  2359.         printf("(Multithread enabled, the timing is only for reference)\n");
  2360.    
  2361.     return 0;
  2362. }
  2363.  
  2364.  
  2365. int main(int argc,char **argv)
  2366. {
  2367.     unsigned int start;
  2368.    
  2369.     process_cmdline(argc, argv);
  2370.  
  2371.     print_input();
  2372.    
  2373.     start = GetTickCount();
  2374.    
  2375.     init_va();
  2376.     setup_encode();
  2377.    
  2378.     encode_frames();
  2379.  
  2380.     release_encode();
  2381.     deinit_va();
  2382.  
  2383.     TotalTicks += GetTickCount() - start;
  2384.     print_performance(frame_count);
  2385.    
  2386.     return 0;
  2387. }
  2388.