Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © <2010>, Intel Corporation.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  * Authors: Zhao Yakui <yakui.zhao@intel.com>
  24.  *
  25.  */
  26. // Modual name: InterFrame_ivy.asm
  27. //
  28. // Make inter predition estimation for Inter frame on Ivy
  29. //
  30.  
  31. //
  32. //  Now, begin source code....
  33. //
  34.  
  35. #define SAVE_RET        add (1) RETURN_REG<1>:ud   ip:ud        32:ud
  36. #define RETURN          mov (1) ip:ud   RETURN_REG<0,1,0>:ud
  37.  
  38. /*
  39.  * __START
  40.  */
  41. __INTER_START:
  42. mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
  43. mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
  44. mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
  45. mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
  46.  
  47. shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
  48. add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
  49. add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
  50. mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
  51. mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
  52.  
  53. shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
  54. add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
  55. mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
  56. mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
  57.        
  58. shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
  59. mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  60.  
  61. mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
  62. add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
  63. mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
  64. mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  65.        
  66. /*
  67.  * Media Read Message -- fetch Luma neighbor edge pixels
  68.  */
  69. /* ROW */
  70. mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
  71. send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
  72.  
  73. /* COL */
  74. mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
  75. send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
  76.        
  77. mov  (8) mb_mvp_ref.0<1>:ud     0:ud            {align1};
  78. mov  (8) mb_ref_win.0<1>:ud     0:ud            {align1};
  79. and.z.f0.0 (1)          null:uw mb_hwdep<0,1,0>:uw              0x04:uw   {align1};
  80. (f0.0) jmpi (1) __mb_hwdep_end;
  81. /* read back the data for MB A */
  82. /* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
  83. *  rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
  84. */
  85. mov  (8) mba_result.0<1>:ud     0x0:ud          {align1};
  86. mov  (8) mbb_result.0<1>:ud     0x0:ud          {align1};
  87. mov  (8) mbc_result.0<1>:ud     0x0:ud          {align1};
  88. mba_start:
  89. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  90. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
  91. /* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
  92. (f0.0)  mov  (2)        mba_result.20<1>:w      -1:w    {align1};
  93. (f0.0)  jmpi (1)        mbb_start;
  94. mov  (1) mba_result.0<1>:d      MB_AVAIL                {align1};      
  95. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  96. add  (1) tmp_reg0.0<1>:w        tmp_reg0.0<0,1,0>:w     -1:w    {align1};
  97. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  98. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  99. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
  100. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  101. mov  (1) mb_msg_tmp.8<1>:ud     mb_msg0.8<0,1,0>:ud     {align1};
  102.  
  103. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD      INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
  104. /* bind index 3, read 1 oword (16bytes), msg type: 0(OWord Block Read) */
  105. send (16)
  106.        mb_ind
  107.        mb_wb.0<1>:ud
  108.         NULL
  109.        data_port(
  110.                OBR_CACHE_TYPE,
  111.                OBR_MESSAGE_TYPE,
  112.                OBR_CONTROL_0,
  113.                OBR_BIND_IDX,
  114.                OBR_WRITE_COMMIT_CATEGORY,
  115.                OBR_HEADER_PRESENT
  116.        )
  117.        mlen 1
  118.        rlen 1
  119.        {align1};
  120.  
  121. /* TODO: RefID is required after multi-references are added */
  122. and.z.f0.0      (1)     null<1>:ud        mb_mode_wb.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
  123. (-f0.0)   mov (2)       mba_result.20<1>:w                      -1:w    {align1};
  124. (-f0.0)   jmpi  (1)     mbb_start;
  125.  
  126. mov   (1) mb_msg0.8<1>:UD       mb_msg_tmp.8<0,1,0>:ud  {align1};
  127. /* Read MV for MB A */
  128. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  129. send (16)
  130.        mb_ind
  131.        mb_mv0.0<1>:ud
  132.         NULL
  133.        data_port(
  134.                OBR_CACHE_TYPE,
  135.                OBR_MESSAGE_TYPE,
  136.                OBR_CONTROL_8,
  137.                OBR_BIND_IDX,
  138.                OBR_WRITE_COMMIT_CATEGORY,
  139.                OBR_HEADER_PRESENT
  140.        )
  141.        mlen 1
  142.        rlen 4
  143.        {align1};
  144. /* TODO: RefID is required after multi-references are added */
  145. /* MV */
  146. mov        (2)          mba_result.4<1>:ud              mb_mv1.8<2,2,1>:ud      {align1};
  147. mov        (1)          mba_result.16<1>:w              MB_PRED_FLAG            {align1};
  148.  
  149. mbb_start:
  150. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  151. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
  152. /* MB B doesn't exist. Zero MV. mba_flag is zero */
  153. /* If MB B doesn't exist, neither MB C nor D exists */
  154. (f0.0)  mov  (2)        mbb_result.20<1>:w      -1:w            {align1};
  155. (f0.0)  mov  (2)        mbc_result.20<1>:w      -1:w            {align1};
  156. (f0.0)  jmpi (1)        mb_mvp_start;
  157. mov  (1) mbb_result.0<1>:d      MB_AVAIL                {align1};      
  158. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  159. add  (1) tmp_reg0.2<1>:w        tmp_reg0.2<0,1,0>:w     -1:w    {align1};
  160. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  161. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  162. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
  163. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  164. mov  (1) mb_msg_tmp.8<1>:ud     mb_msg0.8<0,1,0>:ud     {align1};
  165.  
  166. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD      INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
  167.  
  168. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  169. send (16)
  170.        mb_ind
  171.        mb_wb.0<1>:ud
  172.         NULL
  173.        data_port(
  174.                OBR_CACHE_TYPE,
  175.                OBR_MESSAGE_TYPE,
  176.                OBR_CONTROL_0,
  177.                OBR_BIND_IDX,
  178.                OBR_WRITE_COMMIT_CATEGORY,
  179.                OBR_HEADER_PRESENT
  180.        )
  181.        mlen 1
  182.        rlen 1
  183.        {align1};
  184.  
  185. /* TODO: RefID is required after multi-references are added */
  186. and.z.f0.0      (1)     null<1>:ud        mb_mode_wb.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
  187. (-f0.0)   mov (2)       mbb_result.20<1>:w                      -1:w    {align1};
  188. (-f0.0)   jmpi  (1)     mbc_start;
  189.  
  190. mov   (1) mb_msg0.8<1>:UD       mb_msg_tmp.8<0,1,0>:ud  {align1};
  191. /* Read MV for MB B */
  192. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  193. send (16)
  194.        mb_ind
  195.        mb_mv0.0<1>:ud
  196.         NULL
  197.        data_port(
  198.                OBR_CACHE_TYPE,
  199.                OBR_MESSAGE_TYPE,
  200.                OBR_CONTROL_8,
  201.                OBR_BIND_IDX,
  202.                OBR_WRITE_COMMIT_CATEGORY,
  203.                OBR_HEADER_PRESENT
  204.        )
  205.        mlen 1
  206.        rlen 4
  207.        {align1};
  208. /* TODO: RefID is required after multi-references are added */
  209. mov        (2)          mbb_result.4<1>:ud              mb_mv2.16<2,2,1>:ud     {align1};
  210. mov        (1)          mbb_result.16<1>:w              MB_PRED_FLAG            {align1};
  211.  
  212. mbc_start:
  213. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  214. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_C:uw   {align1};
  215. /* MB C doesn't exist. Zero MV. mba_flag is zero */
  216. /* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
  217. (f0.0)  jmpi (1)        mbd_start;
  218. mov  (1) mbc_result.0<1>:d      MB_AVAIL                {align1};      
  219. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  220. add  (1) tmp_reg0.2<1>:w        tmp_reg0.2<0,1,0>:w     -1:w    {align1};
  221. add  (1) tmp_reg0.0<1>:w        tmp_reg0.0<0,1,0>:w     1:w     {align1};
  222. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  223. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  224. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
  225. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  226.  
  227. mov  (1) mb_msg_tmp.8<1>:ud     mb_msg0.8<0,1,0>:ud     {align1};
  228.  
  229. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD      INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
  230. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  231. send (16)
  232.        mb_ind
  233.        mb_wb.0<1>:ud
  234.         NULL
  235.        data_port(
  236.                OBR_CACHE_TYPE,
  237.                OBR_MESSAGE_TYPE,
  238.                OBR_CONTROL_0,
  239.                OBR_BIND_IDX,
  240.                OBR_WRITE_COMMIT_CATEGORY,
  241.                OBR_HEADER_PRESENT
  242.        )
  243.        mlen 1
  244.        rlen 1
  245.        {align1};
  246.  
  247. /* TODO: RefID is required after multi-references are added */
  248. and.z.f0.0      (1)     null<1>:ud        mb_mode_wb.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
  249. (-f0.0)   mov (2)       mbc_result.20<1>:w                      -1:w    {align1};
  250. (-f0.0)   jmpi  (1)     mb_mvp_start;
  251. mov   (1) mb_msg0.8<1>:UD       mb_msg_tmp.8<0,1,0>:ud {align1};
  252. /* Read MV for MB C */
  253. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  254. send (16)
  255.        mb_ind
  256.        mb_mv0.0<1>:ud
  257.         NULL
  258.        data_port(
  259.                OBR_CACHE_TYPE,
  260.                OBR_MESSAGE_TYPE,
  261.                OBR_CONTROL_8,
  262.                OBR_BIND_IDX,
  263.                OBR_WRITE_COMMIT_CATEGORY,
  264.                OBR_HEADER_PRESENT
  265.        )
  266.        mlen 1
  267.        rlen 4
  268.        {align1};
  269. /* TODO: RefID is required after multi-references are added */
  270. /* Forward MV */
  271. mov        (2)          mbc_result.4<1>:ud              mb_mv2.16<2,2,1>:ud     {align1};
  272. mov        (1)          mbc_result.16<1>:w              MB_PRED_FLAG            {align1};
  273.  
  274. jmpi   (1)    mb_mvp_start;
  275. mbd_start:
  276. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  277. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_D:uw   {align1};
  278. (f0.0)  jmpi (1)        mb_mvp_start;
  279. mov  (1) mbc_result.0<1>:d      MB_AVAIL                {align1};      
  280. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  281. add  (2) tmp_reg0.0<1>:w        tmp_reg0.0<2,2,1>:w     -1:w    {align1};
  282. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  283. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  284.  
  285. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
  286. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  287. mov  (1) mb_msg_tmp.8<1>:ud     mb_msg0.8<0,1,0>:ud     {align1};
  288.  
  289. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD      INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
  290. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  291. send (16)
  292.        mb_ind
  293.        mb_wb.0<1>:ud
  294.         NULL
  295.        data_port(
  296.                OBR_CACHE_TYPE,
  297.                OBR_MESSAGE_TYPE,
  298.                OBR_CONTROL_0,
  299.                OBR_BIND_IDX,
  300.                OBR_WRITE_COMMIT_CATEGORY,
  301.                OBR_HEADER_PRESENT
  302.        )
  303.        mlen 1
  304.        rlen 1
  305.        {align1};
  306.  
  307. and.z.f0.0      (1)     null<1>:ud        mb_mode_wb.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
  308. (-f0.0)   mov (2)       mbc_result.20<1>:w                      -1:w    {align1};
  309. (-f0.0)   jmpi  (1)     mb_mvp_start;
  310.  
  311. mov   (1) mb_msg0.8<1>:UD       mb_msg_tmp.8<0,1,0>:ud  {align1};
  312. /* Read MV for MB D */
  313. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  314. send (16)
  315.        mb_ind
  316.        mb_mv0.0<1>:ub
  317.         NULL
  318.        data_port(
  319.                OBR_CACHE_TYPE,
  320.                OBR_MESSAGE_TYPE,
  321.                OBR_CONTROL_8,
  322.                OBR_BIND_IDX,
  323.                OBR_WRITE_COMMIT_CATEGORY,
  324.                OBR_HEADER_PRESENT
  325.        )
  326.        mlen 1
  327.        rlen 4
  328.        {align1};
  329.  
  330. /* TODO: RefID is required after multi-references are added */
  331.  
  332. /* Forward MV */
  333. mov        (2)          mbc_result.4<1>:ud              mb_mv3.24<2,2,1>:ud     {align1};
  334. mov        (1)          mbc_result.16<1>:w              MB_PRED_FLAG            {align1};
  335.        
  336. mb_mvp_start:
  337. /*TODO: Add the skip prediction */
  338. /* Check whether both MB B and C are invailable */
  339. add     (1)     tmp_reg0.0<1>:d         mbb_result.0<0,1,0>:d   mbc_result.0<0,1,0>:d   {align1};
  340. cmp.z.f0.0 (1)  null:d                  tmp_reg0.0<0,1,0>:d     0:d     {align1};
  341. (-f0.0) jmpi (1)        mb_median_start;
  342. cmp.nz.f0.0 (1) null:d  mba_result.0<0,1,0>:d           0:d             {align1};
  343. (f0.0)  mov     (1)     mbb_result.4<1>:ud              mba_result.4<0,1,0>:ud  {align1};      
  344. (f0.0)  mov     (1)     mbc_result.4<1>:ud              mba_result.4<0,1,0>:ud  {align1};      
  345. (f0.0)  mov     (1)     mbb_result.20<1>:uw             mba_result.20<0,1,0>:uw {align1};      
  346. (f0.0)  mov     (1)     mbc_result.20<1>:uw             mba_result.20<0,1,0>:uw {align1};      
  347. (f0.0)  mov     (1)     mb_mvp_ref.0<1>:ud              mba_result.4<0,1,0>:ud  {align1};
  348. (-f0.0) mov     (1)     mb_mvp_ref.0<1>:ud              0:ud                    {align1};
  349. jmpi    (1)     __mb_hwdep_end;
  350.        
  351. mb_median_start:
  352. /* check whether only one neighbour MB has the same ref ID with the current MB */
  353. mov (8) tmp_reg0.0<1>:ud                0:ud            {align1};
  354. cmp.z.f0.0      (1)     null:d  mba_result.20<1>:w      0:w     {align1};
  355. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  356. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mba_result.4<0,1,0>:ud  {align1};
  357. cmp.z.f0.0      (1)     null:d  mbb_result.20<1>:w      0:w     {align1};
  358. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  359. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mbb_result.4<0,1,0>:ud  {align1};
  360. cmp.z.f0.0      (1)     null:d  mbc_result.20<1>:w      0:w     {align1};
  361. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  362. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mbc_result.4<0,1,0>:ud  {align1};
  363. cmp.e.f0.0      (1)     null:d  tmp_reg0.0<1>:w  1:w    {align1};
  364. (f0.0)  mov     (1)     mb_mvp_ref.0<1>:ud      tmp_reg0.4<0,1,0>:ud    {align1};
  365. (f0.0)  jmpi (1)  __mb_hwdep_end;
  366.  
  367. mov     (1)     INPUT_ARG0.0<1>:w       mba_result.4<0,1,0>:w   {align1};
  368. mov     (1)     INPUT_ARG0.4<1>:w       mbb_result.4<0,1,0>:w   {align1};
  369. mov     (1)     INPUT_ARG0.8<1>:w       mbc_result.4<0,1,0>:w   {align1};
  370. SAVE_RET        {align1};
  371.  jmpi   (1)     word_imedian;
  372. mov     (1)     mb_mvp_ref.0<1>:w               RET_ARG<0,1,0>:w        {align1};
  373. mov     (1)     INPUT_ARG0.0<1>:w       mba_result.6<0,1,0>:w   {align1};
  374. mov     (1)     INPUT_ARG0.4<1>:w       mbb_result.6<0,1,0>:w   {align1};
  375. mov     (1)     INPUT_ARG0.8<1>:w       mbc_result.6<0,1,0>:w   {align1};
  376. SAVE_RET        {align1};
  377. jmpi    (1)     word_imedian;
  378. mov     (1)     mb_mvp_ref.2<1>:w               RET_ARG<0,1,0>:w        {align1};
  379.  
  380. __mb_hwdep_end:
  381. asr     (2)     mb_ref_win.0<1>:w       mb_mvp_ref.0<2,2,1>:w   2:w     {align1};
  382. add     (2)     mb_ref_win.8<1>:w       mb_ref_win.0<2,2,1>:w   3:w     {align1};
  383. and     (2)     mb_ref_win.16<1>:uw     mb_ref_win.8<2,2,1>:uw  0xFFFC:uw {align1};
  384.  
  385. /* m2 */        
  386. mov  (8) vme_msg_2<1>:UD        0x0:UD {align1};
  387.  
  388. /* m3 */
  389. mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
  390. and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
  391. mov  (8) vme_msg_3<1>:UD        INEP_ROW.0<8,8,1>:UD {align1};        
  392.  
  393. /* m4 */
  394. mov  (8) vme_msg_4<1>:UD        0x0 {align1};
  395. mov (16) vme_msg_4.0<1>:UB      INEP_COL0.3<32,8,4>:UB {align1};
  396. mov  (1) vme_msg_4.16<1>:UD     INTRA_PREDICTORE_MODE {align1};
  397.  
  398.  
  399. /* m1 */
  400. mov  (8) vme_m1.0<1>:ud         0x0:ud  {align1};
  401. and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
  402. (f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE:uw {align1};
  403.  
  404. /* assign MB intra struct from the thread payload*/
  405. mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
  406.  
  407.  
  408. /* M0 */
  409. /* IME search */
  410. cmp.z.f0.0 (1)          null<1>:uw      quality_level_ub<0,1,0>:ub              LOW_QUALITY_LEVEL:uw   {align1};
  411. (f0.0) jmpi (1) __low_quality_search;
  412.  
  413. __high_quality_search:
  414. mov  (1) vme_m0.12<1>:UD   SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1};  
  415. /* 16x16 Source, 1/4 pixel, harr */
  416. mov  (1) vme_m0.22<1>:UW        REF_REGION_SIZE {align1};         /* Reference Width&Height, 48x40 */
  417.  
  418. mov  (1) vme_m0.0<1>:W          -16:W                   {align1};
  419. mov  (1) vme_m0.2<1>:W          -12:W                   {align1};
  420.  
  421. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
  422. (f0.0)  add     (1)     vme_m0.0<1>:w   vme_m0.0<0,1,0>:w       12:w    {align1};
  423. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
  424. (f0.0)  add     (1)     vme_m0.2<1>:w   vme_m0.2<0,1,0>:w       8:w     {align1};
  425.  
  426. jmpi __vme_msg;
  427.  
  428. __low_quality_search:
  429. mov  (1) vme_m0.12<1>:UD   SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_HALF:UD {align1};  
  430. /* 16x16 Source, 1/2 pixel, harr */
  431. mov  (1) vme_m0.22<1>:UW        MIN_REF_REGION_SIZE {align1};         /* Reference Width&Height, 32x32 */
  432.  
  433. mov  (1) vme_m0.0<1>:W          -8:W                    {align1};
  434. mov  (1) vme_m0.2<1>:W          -8:W                    {align1};
  435.  
  436. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
  437. (f0.0)  add     (1)     vme_m0.0<1>:w   vme_m0.0<0,1,0>:w       4:w     {align1};
  438. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
  439. (f0.0)  add     (1)     vme_m0.2<1>:w   vme_m0.2<0,1,0>:w       4:w     {align1};
  440.  
  441. __vme_msg:
  442. mov  (1) vme_m0.4<1>:UD         vme_m0.0<0,1,0>:UD      {align1};
  443. add  (2) vme_m0.0<1>:w          vme_m0.0<2,2,1>:w       mb_ref_win.16<2,2,1>:w  {align1};
  444. add  (2) vme_m0.4<1>:w          vme_m0.4<2,2,1>:w       mb_ref_win.16<2,2,1>:w  {align1};
  445. mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
  446.  
  447. /* m1 */
  448.  
  449. mov  (1) vme_m1.0<1>:UD         ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
  450. /* MV num is passed by constant buffer. R4.28 */
  451. mov  (1) vme_m1.4<1>:UB         r4.28<0,1,0>:UB {align1};
  452. add  (1) vme_m1.4<1>:UD         vme_m1.4<0,1,0>:UD      FB_PRUNING_DISABLE:UD {align1};
  453. mov  (1) vme_m1.8<1>:UD         START_CENTER + SEARCH_PATH_LEN:UD {align1};
  454.  
  455. /* Set the MV cost center */
  456. mov  (1) vme_m1.16<1>:ud        mb_mvp_ref.0<0,1,0>:ud  {align1};
  457. mov  (1) vme_m1.20<1>:ud        mb_mvp_ref.0<0,1,0>:ud  {align1};
  458. mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD {align1};
  459.  
  460.  
  461. send (8)
  462.        vme_msg_ind
  463.        vme_wb
  464.        null
  465.        vme(
  466.                BIND_IDX_VME,
  467.                0,
  468.                0,
  469.                VME_MESSAGE_TYPE_MIXED
  470.        )
  471.        mlen vme_msg_length
  472.        rlen vme_inter_wb_length
  473.        {align1};
  474.  
  475. and.z.f0.0      (1)     null<1>:ud              vme_wb0.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
  476.  
  477. (-f0.0)jmpi     (1)     __INTRA_INFO ;
  478.  
  479. __INTER_INFO:
  480. /* Write MV pairs */   
  481. mov  (8) msg_reg0.0<1>:UD       obw_m0.0<8,8,1>:UD {align1};
  482.  
  483. mov  (8) msg_reg1.0<1>:UD       vme_wb1.0<8,8,1>:UD   {align1};
  484.  
  485. mov  (8) msg_reg2.0<1>:UD       vme_wb2.0<8,8,1>:UD   {align1};
  486.  
  487. mov  (8) msg_reg3.0<1>:UD       vme_wb3.0<8,8,1>:UD   {align1};
  488.  
  489. mov  (8) msg_reg4.0<1>:UD       vme_wb4.0<8,8,1>:UD   {align1};                
  490. /* bind index 3, write  8 oword (128 bytes), msg type: 8(OWord Block Write) */
  491. send (16)
  492.        msg_ind
  493.        obw_wb
  494.        null
  495.        data_port(
  496.                OBW_CACHE_TYPE,
  497.                OBW_MESSAGE_TYPE,
  498.                OBW_CONTROL_8,
  499.                OBW_BIND_IDX,
  500.                OBW_WRITE_COMMIT_CATEGORY,
  501.                OBW_HEADER_PRESENT
  502.        )
  503.        mlen 5
  504.        rlen obw_wb_length
  505.        {align1};
  506.  
  507. mov             (1)     tmp_uw1<1>:uw           0:uw {align1} ;
  508. mov             (1)     tmp_ud1<1>:ud           0:ud {align1} ;
  509. and             (1)     tmp_uw1<1>:uw           vme_wb0.2<0,1,0>:uw     MV32_BIT_MASK:uw {align1} ;
  510. shr             (1)     tmp_uw1<1>:uw           tmp_uw1<1>:uw           MV32_BIT_SHIFT:uw {align1} ;
  511. mul             (1)     tmp_ud1<1>:ud           tmp_uw1<0,1,0>:uw       96:uw {align1} ;
  512. add             (1)     tmp_ud1<1>:ud           tmp_ud1<0,1,0>:ud       32:uw {align1} ;
  513. shl       (1)     tmp_uw1<1>:uw           tmp_uw1<0,1,0>:uw       MFC_MV32_BIT_SHIFT:uw {align1} ;
  514. add       (1)     tmp_uw1<1>:uw           tmp_uw1<0,1,0>:uw       MVSIZE_UW_BASE:uw {align1} ;
  515. add             (1)     tmp_uw1<1>:uw           tmp_uw1<0,1,0>:uw       CBP_DC_YUV_UW:uw {align1} ;
  516.  
  517. mov             (1)     msg_reg1.0<1>:uw        vme_wb0.0<0,1,0>:uw     {align1} ;
  518. mov             (1)     msg_reg1.2<1>:uw        tmp_uw1<0,1,0>:uw       {align1} ;
  519. mov             (1)     msg_reg1.4<1>:UD        vme_wb0.28<0,1,0>:UD    {align1};
  520. mov             (1)     msg_reg1.8<1>:ud        tmp_ud1<0,1,0>:ud       {align1} ;
  521. mov             (1)     msg_reg1.12<1>:ud        vme_wb0.0<0,1,0>:ud     {align1} ;
  522. mov             (1)     msg_reg1.16<1>:ud        0x25:ud     {align1} ;
  523. jmpi            (1)     __OUTPUT_INFO;
  524.  
  525. __INTRA_INFO:
  526. mov             (1)     msg_reg1.0<1>:UD        vme_wb.0<0,1,0>:UD      {align1};
  527. mov             (1)     msg_reg1.4<1>:UD        vme_wb.16<0,1,0>:UD     {align1};
  528. mov             (1)     msg_reg1.8<1>:UD        vme_wb.20<0,1,0>:UD     {align1};
  529. mov             (1)     msg_reg1.12<1>:UD       vme_wb.24<0,1,0>:UD     {align1};
  530. mov             (1)     msg_reg1.16<1>:ud        0x35:ud     {align1} ;
  531.  
  532. __OUTPUT_INFO:
  533.  
  534. mov     (1)     msg_reg1.20<1>:ud       obw_m0.8<0,1,0>:ud      {align1};
  535. add     (1)     obw_m0.8<1>:UD       obw_m0.8<0,1,0>:UD      INTER_VME_OUTPUT_MV_IN_OWS:UD {align1};
  536. mov     (8)     msg_reg0.0<1>:ud        obw_m0.0<8,8,1>:ud      {align1};
  537.  
  538.  
  539. /* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */
  540. send (16)
  541.        msg_ind
  542.        obw_wb
  543.        null
  544.        data_port(
  545.                OBW_CACHE_TYPE,
  546.                OBW_MESSAGE_TYPE,
  547.                OBW_CONTROL_2,
  548.                OBW_BIND_IDX,
  549.                OBW_WRITE_COMMIT_CATEGORY,
  550.                OBW_HEADER_PRESENT
  551.        )
  552.        mlen 2
  553.        rlen obw_wb_length
  554.        {align1};
  555.  
  556. /* Issue message fence so that the previous write message is committed */
  557. send (16)
  558.        mb_ind
  559.        mb_wb.0<1>:ud
  560.         NULL
  561.        data_port(
  562.                OBR_CACHE_TYPE,
  563.                OBR_MESSAGE_FENCE,
  564.                OBR_MF_COMMIT,
  565.                OBR_BIND_IDX,
  566.                OBR_WRITE_COMMIT_CATEGORY,
  567.                OBR_HEADER_PRESENT
  568.        )
  569.        mlen 1
  570.        rlen 1
  571.         {align1};
  572.  
  573. __EXIT:
  574. /*
  575. * kill thread
  576. */        
  577. mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
  578. send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
  579.  
  580.  
  581.         nop             ;
  582.         nop             ;
  583. /* Compare three word data to get the min value */
  584. word_imin:
  585.         cmp.le.f0.0 (1)         null:w          INPUT_ARG0.0<0,1,0>:w   INPUT_ARG0.4<0,1,0>:w {align1};
  586.         (f0.0) mov  (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w                    {align1};
  587.         (-f0.0) mov (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w                    {align1};
  588.         cmp.le.f0.0 (1)         null:w          TEMP_VAR0.0<0,1,0>:w    INPUT_ARG0.8<0,1,0>:w {align1};
  589.         (f0.0) mov  (1)         RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w                         {align1};
  590.         (-f0.0) mov (1)         RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w                        {align1};
  591.         RETURN          {align1};      
  592.        
  593. /* Compare three word data to get the max value */
  594. word_imax:
  595.         cmp.ge.f0.0 (1)         null:w          INPUT_ARG0.0<0,1,0>:w   INPUT_ARG0.4<0,1,0>:w {align1};
  596.         (f0.0) mov  (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w                    {align1};
  597.         (-f0.0) mov (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w                    {align1};
  598.         cmp.ge.f0.0 (1)         null:w          TEMP_VAR0.0<0,1,0>:w    INPUT_ARG0.8<0,1,0>:w {align1};
  599.         (f0.0) mov  (1)         RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w                         {align1};
  600.         (-f0.0) mov (1)         RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w                        {align1};
  601.         RETURN          {align1};      
  602.        
  603. word_imedian:
  604.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
  605.         (f0.0)  jmpi (1) cmp_a_ge_b;
  606.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  607.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
  608.         (f0.0) jmpi (1) cmp_end;
  609.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  610.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
  611.         (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
  612.         jmpi (1) cmp_end;
  613. cmp_a_ge_b:
  614.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  615.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
  616.         (f0.0) jmpi (1) cmp_end;
  617.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  618.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
  619.         (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
  620. cmp_end:
  621.         RETURN  {align1};
  622.  
  623.