Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © <2010>, Intel Corporation.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  * Authors: Zhao Yakui <yakui.zhao@intel.com>
  24.  */
  25. // Modual name: Inter_bframe_haswell.asm
  26. //
  27. // Make inter predition estimation for Inter frame for B-frame
  28. //
  29.  
  30. //
  31. //  Now, begin source code....
  32. //
  33.  
  34. #define SAVE_RET        add (1) RETURN_REG<1>:ud   ip:ud        32:ud
  35. #define RETURN          mov (1) ip:ud   RETURN_REG<0,1,0>:ud
  36.  
  37. /*
  38.  * __START
  39.  */
  40. __INTER_START:
  41. mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
  42. mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
  43. mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
  44. mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
  45.  
  46. shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
  47. add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
  48. add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
  49. mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
  50. mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
  51.  
  52. shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
  53. add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
  54. mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
  55. mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
  56.        
  57. shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
  58. mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  59.  
  60. mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
  61. add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
  62. mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 24:UD {align1};
  63. mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  64.        
  65. /*
  66.  * Media Read Message -- fetch Luma neighbor edge pixels
  67.  */
  68. /* ROW */
  69. mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
  70. send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
  71.  
  72. /* COL */
  73. mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
  74. send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
  75.        
  76. /*
  77.  * Media Read Message -- fetch Chroma neighbor edge pixels
  78.  */
  79. /* ROW */
  80. shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16 , y * 8 */
  81. mul  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D  2:W {align1};
  82. add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
  83. add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
  84. mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
  85. send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
  86.  
  87. /* COL */
  88. shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16, y * 8 */
  89. mul  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D  2:W {align1};
  90. add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
  91. mov  (1) read1_header.8<1>:UD   BLOCK_8X4 {align1};
  92. mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
  93. send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
  94.  
  95. mov  (8) vme_m1.0<1>:ud         0:ud            {align1};
  96. mov  (8) mb_mvp_ref.0<1>:ud     0:ud            {align1};
  97. mov  (8) mb_ref_win.0<1>:ud     0:ud            {align1};
  98. and.z.f0.0 (1)          null:uw mb_hwdep<0,1,0>:uw              0x04:uw   {align1};
  99. (f0.0) jmpi (1) __mb_hwdep_end;
  100.  
  101. /* read back the data for MB A */
  102. /* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
  103. *  rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
  104. */
  105. mov  (8) mba_result.0<1>:ud     0x0:ud          {align1};
  106. mov  (8) mbb_result.0<1>:ud     0x0:ud          {align1};
  107. mov  (8) mbc_result.0<1>:ud     0x0:ud          {align1};
  108. mba_start:
  109. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  110. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
  111. /* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
  112. (f0.0)  mov  (2)        mba_result.20<1>:w      -1:w    {align1};
  113. (f0.0)  jmpi (1)        mbb_start;
  114. mov  (1) mba_result.0<1>:d      MB_AVAIL                {align1};      
  115. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  116. add  (1) tmp_reg0.0<1>:w        tmp_reg0.0<0,1,0>:w     -1:w    {align1};
  117. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  118. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  119. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
  120. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  121.  
  122. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  123. send (16)
  124.        mb_ind
  125.        mb_wb.0<1>:ud
  126.         NULL
  127.        data_port(
  128.                OBR_CACHE_TYPE,
  129.                OBR_MESSAGE_TYPE,
  130.                OBR_CONTROL_4,
  131.                OBR_BIND_IDX,
  132.                OBR_WRITE_COMMIT_CATEGORY,
  133.                OBR_HEADER_PRESENT
  134.        )
  135.        mlen 1
  136.        rlen 2
  137.        {align1};
  138.  
  139. /* TODO: RefID is required after multi-references are added */
  140. cmp.l.f0.0 (1)          null:w  mb_intra_wb.16<0,1,0>:uw        mb_inter_wb.8<0,1,0>:uw {align1};
  141. (f0.0)   mov (2)        mba_result.20<1>:w                      -1:w    {align1};
  142. (f0.0)   jmpi   (1)     mbb_start;
  143.  
  144. add   (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:ud     3:ud {align1};
  145. /* Read MV for MB A */
  146. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  147. send (16)
  148.        mb_ind
  149.        mb_mv0.0<1>:ud
  150.         NULL
  151.        data_port(
  152.                OBR_CACHE_TYPE,
  153.                OBR_MESSAGE_TYPE,
  154.                OBR_CONTROL_8,
  155.                OBR_BIND_IDX,
  156.                OBR_WRITE_COMMIT_CATEGORY,
  157.                OBR_HEADER_PRESENT
  158.        )
  159.        mlen 1
  160.        rlen 4
  161.        {align1};
  162. /* TODO: RefID is required after multi-references are added */
  163. /* MV */
  164. mov     (2)     mba_result.4<1>:ud              mb_mv1.8<2,2,1>:ud      {align1};
  165. mov     (2)     mba_result.20<1>:w              -1:w            {align1};
  166. mov     (1)     INPUT_ARG0.0<1>:ud      mb_inter_wb.4<0,1,0>:ud {align1};
  167. mov     (1)     INPUT_ARG0.4<1>:ud      mb_inter_wb.0<0,1,0>:ud {align1};
  168. mov     (1)     INPUT_ARG0.8<1>:ud      INTER_BLOCK1:ud {align1};
  169. SAVE_RET        {align1};
  170. jmpi    (1)     mb_pred_func;
  171. mov     (1)     mb_pred_mode.0<1>:uw    RET_ARG<0,1,0>:uw       {align1};
  172. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L0 {align1};
  173. (f0.0)  mov   (1)       mba_result.16<1>:uw             MB_PRED_FLAG            {align1};
  174. (f0.0)  mov   (1)       mba_result.20<1>:w              0:w             {align1};
  175. (f0.0)  jmpi    (1) mbb_start;
  176. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L1 {align1};
  177. (f0.0)  mov   (1)       mba_result.18<1>:uw             MB_PRED_FLAG            {align1};
  178. (f0.0)  mov   (1)       mba_result.22<1>:w              0:w             {align1};
  179. (f0.0)  jmpi    (1) mbb_start;
  180. mov   (2)       mba_result.16<1>:uw             MB_PRED_FLAG            {align1};
  181. mov   (2)       mba_result.20<1>:w              0:w             {align1};
  182.  
  183. mbb_start:
  184. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  185. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
  186. /* MB B doesn't exist. Zero MV. mba_flag is zero */
  187. /* If MB B doesn't exist, neither MB C nor D exists */
  188. (f0.0)  mov  (2)        mbb_result.20<1>:w      -1:w            {align1};
  189. (f0.0)  mov  (2)        mbc_result.20<1>:w      -1:w            {align1};
  190. (f0.0)  jmpi (1)        mb_mvp_start;
  191. mov  (1) mbb_result.0<1>:d      MB_AVAIL                {align1};      
  192. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  193. add  (1) tmp_reg0.2<1>:w        tmp_reg0.2<0,1,0>:w     -1:w    {align1};
  194. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  195. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  196. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
  197. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  198.  
  199. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  200. send (16)
  201.        mb_ind
  202.        mb_wb.0<1>:ud
  203.         NULL
  204.        data_port(
  205.                OBR_CACHE_TYPE,
  206.                OBR_MESSAGE_TYPE,
  207.                OBR_CONTROL_4,
  208.                OBR_BIND_IDX,
  209.                OBR_WRITE_COMMIT_CATEGORY,
  210.                OBR_HEADER_PRESENT
  211.        )
  212.        mlen 1
  213.        rlen 2
  214.        {align1};
  215.  
  216. /* TODO: RefID is required after multi-references are added */
  217. cmp.l.f0.0 (1)          null:w  mb_intra_wb.16<0,1,0>:uw        mb_inter_wb.8<0,1,0>:uw {align1};
  218. (f0.0)   mov (2)        mbb_result.20<1>:w                      -1:w    {align1};
  219. (f0.0)   jmpi   (1)     mbc_start;
  220. add   (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:ud     3:ud {align1};
  221. /* Read MV for MB B */
  222. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  223. send (16)
  224.        mb_ind
  225.        mb_mv0.0<1>:ud
  226.         NULL
  227.        data_port(
  228.                OBR_CACHE_TYPE,
  229.                OBR_MESSAGE_TYPE,
  230.                OBR_CONTROL_8,
  231.                OBR_BIND_IDX,
  232.                OBR_WRITE_COMMIT_CATEGORY,
  233.                OBR_HEADER_PRESENT
  234.        )
  235.        mlen 1
  236.        rlen 4
  237.        {align1};
  238. /* TODO: RefID is required after multi-references are added */
  239. mov        (2)          mbb_result.4<1>:ud              mb_mv2.16<2,2,1>:ud     {align1};
  240. mov        (2)          mbb_result.20<1>:w              -1:w    {align1};
  241. mov     (1)     INPUT_ARG0.0<1>:ud      mb_inter_wb.4<0,1,0>:ud {align1};
  242. mov     (1)     INPUT_ARG0.4<1>:ud      mb_inter_wb.0<0,1,0>:ud {align1};
  243. mov     (1)     INPUT_ARG0.8<1>:ud      INTER_BLOCK2:ud {align1};
  244. SAVE_RET        {align1};
  245. jmpi    (1)     mb_pred_func;
  246. mov     (1)     mb_pred_mode.0<1>:uw    RET_ARG<0,1,0>:uw       {align1};
  247. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L0 {align1};
  248. (f0.0)  mov   (1)       mbb_result.16<1>:uw             MB_PRED_FLAG            {align1};
  249. (f0.0)  mov   (1)       mbb_result.20<1>:w              0:w             {align1};
  250. (f0.0)  jmpi    (1) mbc_start;
  251. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L1 {align1};
  252. (f0.0)  mov   (1)       mbb_result.18<1>:uw             MB_PRED_FLAG            {align1};
  253. (f0.0)  mov   (1)       mbb_result.22<1>:w              0:w             {align1};
  254. (f0.0)  jmpi    (1) mbc_start;
  255. mov   (2)       mbb_result.16<1>:uw             MB_PRED_FLAG            {align1};
  256. mov   (2)       mbb_result.20<1>:w              0:w             {align1};
  257.  
  258. mbc_start:
  259. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  260. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_C:uw   {align1};
  261. /* MB C doesn't exist. Zero MV. mba_flag is zero */
  262. /* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
  263. (f0.0)  jmpi (1)        mbd_start;
  264. mov  (1) mbc_result.0<1>:d      MB_AVAIL                {align1};      
  265. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  266. add  (1) tmp_reg0.2<1>:w        tmp_reg0.2<0,1,0>:w     -1:w    {align1};
  267. add  (1) tmp_reg0.0<1>:w        tmp_reg0.0<0,1,0>:w     1:w     {align1};
  268. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  269. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  270. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
  271. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  272.  
  273. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  274. send (16)
  275.        mb_ind
  276.        mb_wb.0<1>:ud
  277.         NULL
  278.        data_port(
  279.                OBR_CACHE_TYPE,
  280.                OBR_MESSAGE_TYPE,
  281.                OBR_CONTROL_4,
  282.                OBR_BIND_IDX,
  283.                OBR_WRITE_COMMIT_CATEGORY,
  284.                OBR_HEADER_PRESENT
  285.        )
  286.        mlen 1
  287.        rlen 2
  288.        {align1};
  289.  
  290. /* TODO: RefID is required after multi-references are added */
  291. cmp.l.f0.0 (1)          null:w  mb_intra_wb.16<0,1,0>:uw        mb_inter_wb.8<0,1,0>:uw {align1};
  292. (f0.0)   mov (2)        mbc_result.20<1>:w                      -1:w    {align1};
  293. (f0.0)   jmpi   (1)     mb_mvp_start;
  294. add   (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:ud     3:ud {align1};
  295. /* Read MV for MB C */
  296. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  297. send (16)
  298.        mb_ind
  299.        mb_mv0.0<1>:ud
  300.         NULL
  301.        data_port(
  302.                OBR_CACHE_TYPE,
  303.                OBR_MESSAGE_TYPE,
  304.                OBR_CONTROL_8,
  305.                OBR_BIND_IDX,
  306.                OBR_WRITE_COMMIT_CATEGORY,
  307.                OBR_HEADER_PRESENT
  308.        )
  309.        mlen 1
  310.        rlen 4
  311.        {align1};
  312. /* TODO: RefID is required after multi-references are added */
  313. /* Forward MV */
  314. mov        (2)          mbc_result.4<1>:ud              mb_mv2.16<2,2,1>:ud     {align1};
  315. mov        (2)          mbc_result.20<1>:w              -1:w    {align1};
  316. mov     (1)     INPUT_ARG0.0<1>:ud      mb_inter_wb.4<0,1,0>:ud {align1};
  317. mov     (1)     INPUT_ARG0.4<1>:ud      mb_inter_wb.0<0,1,0>:ud {align1};
  318. mov     (1)     INPUT_ARG0.8<1>:ud      INTER_BLOCK2:ud {align1};
  319. SAVE_RET        {align1};
  320. jmpi    (1)     mb_pred_func;
  321. mov     (1)     mb_pred_mode.0<1>:uw    RET_ARG<0,1,0>:uw       {align1};
  322. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L0 {align1};
  323. (f0.0)  mov    (1) mbc_result.16<1>:uw          MB_PRED_FLAG            {align1};
  324. (f0.0)  mov    (1) mbc_result.20<1>:w           0:w             {align1};
  325. (f0.0)  jmpi   (1) mb_mvp_start;
  326. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L1 {align1};
  327. (f0.0)  mov   (1)       mbc_result.18<1>:uw             MB_PRED_FLAG            {align1};
  328. (f0.0)  mov    (1) mbc_result.22<1>:w           0:w             {align1};
  329. (f0.0)  jmpi  (1) mb_mvp_start;
  330. mov  (2)  mbc_result.16<1>:uw           MB_PRED_FLAG            {align1};
  331. mov  (2)  mbc_result.20<1>:w            0:w     {align1};
  332.  
  333. jmpi   (1)    mb_mvp_start;
  334. mbd_start:
  335. mov  (8) mb_msg0.0<1>:ud        0:ud            {align1};
  336. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_D:uw   {align1};
  337. (f0.0)  jmpi (1)        mb_mvp_start;
  338. mov  (1) mbc_result.0<1>:d      MB_AVAIL                {align1};      
  339. mov  (2) tmp_reg0.0<1>:UW       orig_xy_ub<2,2,1>:UB    {align1};
  340. add  (2) tmp_reg0.0<1>:w        tmp_reg0.0<2,2,1>:w     -1:w    {align1};
  341. mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
  342. add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
  343. mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
  344. mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
  345.  
  346. /* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
  347. send (16)
  348.        mb_ind
  349.        mb_wb.0<1>:ud
  350.         NULL
  351.        data_port(
  352.                OBR_CACHE_TYPE,
  353.                OBR_MESSAGE_TYPE,
  354.                OBR_CONTROL_4,
  355.                OBR_BIND_IDX,
  356.                OBR_WRITE_COMMIT_CATEGORY,
  357.                OBR_HEADER_PRESENT
  358.        )
  359.        mlen 1
  360.        rlen 2
  361.        {align1};
  362.  
  363. cmp.l.f0.0 (1)          null:w  mb_intra_wb.16<0,1,0>:uw        mb_inter_wb.8<0,1,0>:uw {align1};
  364. (f0.0)   mov (2)        mbc_result.20<1>:w                      -1:w    {align1};
  365. (f0.0)   jmpi   (1)     mb_mvp_start;
  366.  
  367. add   (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:ud     3:ud {align1};
  368. /* Read MV for MB D */
  369. /* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
  370. send (16)
  371.        mb_ind
  372.        mb_mv0.0<1>:ub
  373.         NULL
  374.        data_port(
  375.                OBR_CACHE_TYPE,
  376.                OBR_MESSAGE_TYPE,
  377.                OBR_CONTROL_8,
  378.                OBR_BIND_IDX,
  379.                OBR_WRITE_COMMIT_CATEGORY,
  380.                OBR_HEADER_PRESENT
  381.        )
  382.        mlen 1
  383.        rlen 4
  384.        {align1};
  385.  
  386. /* TODO: RefID is required after multi-references are added */
  387.  
  388. /* Forward MV */
  389. mov        (2)          mbc_result.4<1>:ud              mb_mv3.24<2,2,1>:ud     {align1};
  390. mov        (2)          mbc_result.20<1>:w              -1:w    {align1};
  391. mov     (1)     INPUT_ARG0.0<1>:ud      mb_inter_wb.4<0,1,0>:ud {align1};
  392. mov     (1)     INPUT_ARG0.4<1>:ud      mb_inter_wb.0<0,1,0>:ud {align1};
  393. mov     (1)     INPUT_ARG0.8<1>:ud      INTER_BLOCK3:ud {align1};
  394. SAVE_RET        {align1};
  395. jmpi    (1)     mb_pred_func;
  396. mov     (1)     mb_pred_mode.0<1>:uw    RET_ARG<0,1,0>:uw       {align1};
  397. mov        (1)  mbc_result.18<1>:w              MB_PRED_FLAG            {align1};
  398. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L0 {align1};
  399. (f0.0)  mov    (1) mbc_result.16<1>:uw          MB_PRED_FLAG            {align1};
  400. (f0.0)  mov    (1) mbc_result.20<1>:w           0:w             {align1};
  401. (f0.0)  jmpi    (1) mb_mvp_start;
  402. cmp.e.f0.0 (1)  null:uw         mb_pred_mode.0<0,1,0>:uw        PRED_L1 {align1};
  403. (f0.0)  mov   (1)       mbc_result.18<1>:uw             MB_PRED_FLAG            {align1};
  404. (f0.0)  mov    (1) mbc_result.22<1>:w           0:w             {align1};
  405. (f0.0)  jmpi  (1) mb_mvp_start;
  406. mov  (2)  mbc_result.16<1>:uw           MB_PRED_FLAG            {align1};
  407. mov  (2)  mbc_result.20<1>:w            0:w     {align1};
  408.        
  409. mb_mvp_start:
  410. /*TODO: Add the skip prediction */
  411. /* Check whether both MB B and C are inavailable */
  412. add     (1)     tmp_reg0.0<1>:d         mbb_result.0<0,1,0>:d   mbc_result.0<0,1,0>:d   {align1};
  413. cmp.z.f0.0 (1)  null:d                  tmp_reg0.0<0,1,0>:d     0:d     {align1};
  414. (-f0.0) jmpi (1)        mb_median_start;
  415. cmp.nz.f0.0 (1) null:d  mba_result.0<0,1,0>:d           0:d             {align1};
  416. (f0.0)  mov     (2)     mbb_result.4<1>:ud              mba_result.4<2,2,1>:ud  {align1};      
  417. (f0.0)  mov     (2)     mbc_result.4<1>:ud              mba_result.4<2,2,1>:ud  {align1};      
  418. (f0.0)  mov     (2)     mbb_result.20<1>:uw             mba_result.20<2,2,1>:uw {align1};      
  419. (f0.0)  mov     (2)     mbc_result.20<1>:uw             mba_result.20<2,2,1>:uw {align1};      
  420. (f0.0)  mov     (2)     mb_mvp_ref.0<1>:ud              mba_result.4<2,2,1>:ud  {align1};
  421. (-f0.0) mov     (2)     mb_mvp_ref.0<1>:ud              0:ud                    {align1};
  422. jmpi    (1)     __mb_hwdep_end;
  423.        
  424. mb_median_start:
  425. /* forward_MVP */
  426. /* check whether only one neighbour MB has the same ref ID with the current MB */
  427. mov (8) tmp_reg0.0<1>:ud                0:ud            {align1};
  428. cmp.z.f0.0      (1)     null:d  mba_result.20<1>:w      0:w     {align1};
  429. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  430. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mba_result.4<0,1,0>:ud  {align1};
  431. cmp.z.f0.0      (1)     null:d  mbb_result.20<1>:w      0:w     {align1};
  432. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  433. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mbb_result.4<0,1,0>:ud  {align1};
  434. cmp.z.f0.0      (1)     null:d  mbc_result.20<1>:w      0:w     {align1};
  435. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  436. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mbc_result.4<0,1,0>:ud  {align1};
  437. cmp.e.f0.0      (1)     null:d  tmp_reg0.0<1>:w  1:w    {align1};
  438. (f0.0)  mov     (1)     mb_mvp_ref.0<1>:ud      tmp_reg0.4<0,1,0>:ud    {align1};
  439. (f0.0)  jmpi (1)  mvp_backward;
  440.  
  441. mov     (1)     INPUT_ARG0.0<1>:w       mba_result.4<0,1,0>:w   {align1};
  442. mov     (1)     INPUT_ARG0.4<1>:w       mbb_result.4<0,1,0>:w   {align1};
  443. mov     (1)     INPUT_ARG0.8<1>:w       mbc_result.4<0,1,0>:w   {align1};
  444. SAVE_RET        {align1};
  445.  jmpi   (1)     word_imedian;
  446. mov     (1)     mb_mvp_ref.0<1>:w               RET_ARG<0,1,0>:w        {align1};
  447. mov     (1)     INPUT_ARG0.0<1>:w       mba_result.6<0,1,0>:w   {align1};
  448. mov     (1)     INPUT_ARG0.4<1>:w       mbb_result.6<0,1,0>:w   {align1};
  449. mov     (1)     INPUT_ARG0.8<1>:w       mbc_result.6<0,1,0>:w   {align1};
  450. SAVE_RET        {align1};
  451. jmpi    (1)     word_imedian;
  452. mov     (1)     mb_mvp_ref.2<1>:w               RET_ARG<0,1,0>:w        {align1};
  453.  
  454.  
  455. mvp_backward:
  456. /* check whether only one neighbour MB has the same ref ID with the current MB */
  457. mov (8) tmp_reg0.0<1>:ud                0:ud            {align1};
  458. cmp.z.f0.0      (1)     null:d  mba_result.22<1>:w      0:w     {align1};
  459. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  460. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mba_result.8<0,1,0>:ud  {align1};
  461. cmp.z.f0.0      (1)     null:d  mbb_result.22<1>:w      0:w     {align1};
  462. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  463. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mbb_result.8<0,1,0>:ud  {align1};
  464. cmp.z.f0.0      (1)     null:d  mbc_result.22<1>:w      0:w     {align1};
  465. (f0.0)  add     (1)     tmp_reg0.0<1>:w         tmp_reg0.0<1>:w 1:w     {align1};
  466. (f0.0)  mov     (1)     tmp_reg0.4<1>:ud        mbc_result.8<0,1,0>:ud  {align1};
  467. cmp.e.f0.0      (1)     null:d  tmp_reg0.0<1>:w  1:w    {align1};
  468. (f0.0)  mov     (1)     mb_mvp_ref.4<1>:ud      tmp_reg0.4<0,1,0>:ud    {align1};
  469. (f0.0)  jmpi (1) __mb_hwdep_end;
  470.  
  471. mov     (1)     INPUT_ARG0.0<1>:w       mba_result.8<0,1,0>:w   {align1};
  472. mov     (1)     INPUT_ARG0.4<1>:w       mbb_result.8<0,1,0>:w   {align1};
  473. mov     (1)     INPUT_ARG0.8<1>:w       mbc_result.8<0,1,0>:w   {align1};
  474. SAVE_RET        {align1};
  475.  jmpi   (1)     word_imedian;
  476. mov     (1)     mb_mvp_ref.4<1>:w               RET_ARG<0,1,0>:w        {align1};
  477. mov     (1)     INPUT_ARG0.0<1>:w       mba_result.10<0,1,0>:w  {align1};
  478. mov     (1)     INPUT_ARG0.4<1>:w       mbb_result.10<0,1,0>:w  {align1};
  479. mov     (1)     INPUT_ARG0.8<1>:w       mbc_result.10<0,1,0>:w  {align1};
  480. SAVE_RET        {align1};
  481. jmpi    (1)     word_imedian;
  482. mov     (1)     mb_mvp_ref.6<1>:w               RET_ARG<0,1,0>:w        {align1};
  483.  
  484. __mb_hwdep_end:
  485. asr     (4)     mb_ref_win.0<1>:w       mb_mvp_ref.0<4,4,1>:w   2:w     {align1};
  486. add     (4)     mb_ref_win.8<1>:w       mb_ref_win.0<4,4,1>:w   3:w     {align1};
  487. and     (4)     mb_ref_win.16<1>:uw     mb_ref_win.8<4,4,1>:uw  0xFFFC:uw {align1};
  488. /* m2, get the MV/Mb cost passed from constant buffer when
  489. spawning thread by MEDIA_OBJECT */      
  490. mov (8) vme_m2<1>:UD            r1.0<8,8,1>:UD {align1};
  491.  
  492. mov (8) vme_msg_2<1>:UD         vme_m2.0<8,8,1>:UD {align1};
  493.  
  494. /* m3 */
  495. mov (8) vme_msg_3<1>:UD         0x0:UD {align1};               
  496.  
  497. /* m4 */
  498. mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
  499. and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
  500. mov  (8) vme_msg_4<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
  501.  
  502. /* m5 */        
  503. mov  (8) vme_msg_5<1>:UD         0x0:UD {align1};
  504. mov (16) vme_msg_5.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
  505. mov  (1) vme_msg_5.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
  506.  
  507. /* the penalty for Intra mode */
  508. mov  (1) vme_msg_5.28<1>:UD     0x010101:UD {align1};
  509. mov  (1) vme_msg_5.20<1>:UW      CHROMA_ROW.6<0,1,0>:UW {align1};
  510.  
  511.  
  512. /* m6 */
  513.  
  514. mov  (4) vme_msg_6.16<1>:UD      CHROMA_ROW.8<4,4,1>:UD {align1};
  515. mov  (8) vme_msg_6.0<1>:UW       CHROMA_COL.2<16,8,2>:UW {align1};
  516.  
  517. /*
  518. * SIC VME message
  519. */
  520. /* m0 */        
  521. mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
  522. mov  (1) tmp_reg0.0<1>:UW       LUMA_INTRA_MODE:UW {align1};
  523. /* Use the Luma mode */
  524. mov  (1) vme_msg_4.5<1>:UB      tmp_reg0.0<0,1,0>:UB {align1};
  525.  
  526. /* m1 */
  527. mov  (1) intra_flag<1>:UW       0x0:UW {align1}                     ;
  528. and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
  529. (f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1};
  530.  
  531. /* assign MB intra struct from the thread payload*/
  532. mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
  533.  
  534. /* Disable DC HAAR component when calculating HARR SATD block */
  535. mov  (1) tmp_reg0.0<1>:UW       DC_HARR_DISABLE:UW              {align1};
  536. mov  (1) vme_m1.30<1>:UB        tmp_reg0.0<0,1,0>:UB  {align1};
  537.  
  538. mov  (1) vme_m0.12<1>:UD        INTRA_SAD_HAAR:UD {align1};    /* 16x16 Source, Intra_harr */
  539. /* m0 */        
  540. mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
  541. mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
  542.  
  543. /* after verification it will be passed by using payload */
  544. send (8)
  545.        vme_msg_ind
  546.        vme_wb<1>:UD
  547.        null
  548.        cre(
  549.                BIND_IDX_VME,
  550.                VME_SIC_MESSAGE_TYPE
  551.        )
  552.        mlen sic_vme_msg_length
  553.        rlen vme_wb_length
  554.        {align1};
  555. /*
  556. * Oword Block Write message
  557. */
  558. mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
  559.        
  560. mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
  561. mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
  562. mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
  563. mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
  564.  
  565. /* Distortion, Intra (17-16), */
  566. mov  (1) msg_reg1.16<1>:UW      vme_wb.12<0,1,0>:UW     {align1};
  567.  
  568. mov  (1) msg_reg1.20<1>:UD      vme_wb.8<0,1,0>:UD     {align1};
  569. /* VME clock counts */
  570. mov  (1) msg_reg1.24<1>:UD      vme_wb.28<0,1,0>:UD     {align1};
  571.  
  572. mov  (1) msg_reg1.28<1>:UD      obw_m0.8<0,1,0>:UD     {align1};
  573.  
  574. /* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
  575. send (16)
  576.        msg_ind
  577.        obw_wb
  578.        null
  579.        data_port(
  580.                OBW_CACHE_TYPE,
  581.                OBW_MESSAGE_TYPE,
  582.                OBW_CONTROL_2,
  583.                OBW_BIND_IDX,
  584.                OBW_WRITE_COMMIT_CATEGORY,
  585.                OBW_HEADER_PRESENT
  586.        )
  587.        mlen 2
  588.        rlen obw_wb_length
  589.        {align1};
  590.  
  591. /* IME search */
  592. mov  (1) vme_m0.12<1>:UD        SEARCH_CTRL_DUAL_REFERENCE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1};    /* 16x16 Source, harr */
  593. mov  (1) vme_m0.22<1>:UW        DREF_REGION_SIZE {align1};        
  594. /* Dual Reference Width&Height,32x32 */
  595.  
  596. mov  (1) vme_m0.0<1>:UD         vme_m0.8<0,1,0>:UD      {align1};
  597.  
  598. /* Reference = (x-8,y-8)-(x+8,y+8) */
  599. add  (1) vme_m0.0<1>:W          vme_m0.0<0,1,0>:W -8:W {align1};               
  600. add  (1) vme_m0.2<1>:W          vme_m0.2<0,1,0>:W -8:W {align1};
  601.  
  602. mov  (1) vme_m0.0<1>:W          -8:W                    {align1};
  603. mov  (1) vme_m0.2<1>:W          -8:W                    {align1};
  604.  
  605. mov  (1) vme_m0.4<1>:UD         vme_m0.0<0,1,0>:UD      {align1};
  606. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
  607. (f0.0)  add     (1)     vme_m0.0<1>:w   vme_m0.0<0,1,0>:w       4:w     {align1};
  608. (f0.0)  add     (1)     vme_m0.4<1>:w   vme_m0.4<0,1,0>:w       4:w     {align1};
  609. and.z.f0.0 (1)          null:uw input_mb_intra_ub<0,1,0>:ub     INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
  610. (f0.0)  add     (1)     vme_m0.2<1>:w   vme_m0.2<0,1,0>:w       4:w     {align1};
  611. (f0.0)  add     (1)     vme_m0.6<1>:w   vme_m0.6<0,1,0>:w       4:w     {align1};
  612.  
  613. add  (2) vme_m0.0<1>:w          vme_m0.0<2,2,1>:w       mb_ref_win.16<2,2,1>:w  {align1};
  614. add  (2) vme_m0.4<1>:w          vme_m0.4<2,2,1>:w       mb_ref_win.20<2,2,1>:w  {align1};
  615.  
  616. mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
  617.  
  618. mov  (1) vme_m1.0<1>:UD         ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
  619. /* the Max MV number is passed by constant buffer */
  620. mov  (1) vme_m1.4<1>:UB         r4.28<0,1,0>:UB {align1};          
  621. mov  (1) vme_m1.8<1>:UD         DSTART_CENTER + DSEARCH_PATH_LEN:UD {align1};
  622. /* Set the MV cost center */
  623. mov  (1) vme_m1.16<1>:ud        mb_mvp_ref.0<0,1,0>:ud  {align1};
  624. mov  (1) vme_m1.20<1>:ud        mb_mvp_ref.4<0,1,0>:ud  {align1};
  625. mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD {align1};
  626.  
  627. mov (8) vme_msg_2<1>:UD         vme_m2.0<8,8,1>:UD {align1};
  628. /* M3/M4 search path */
  629.  
  630. mov  (1) vme_msg_3.0<1>:UD      0x10010101:UD {align1};
  631. mov  (1) vme_msg_3.4<1>:UD      0x100F0F0F:UD {align1};
  632. mov  (1) vme_msg_3.8<1>:UD      0x10010101:UD {align1};
  633. mov  (1) vme_msg_3.12<1>:UD     0x000F0F0F:UD {align1};
  634.  
  635. mov  (4) vme_msg_3.16<1>:UD     0x0:UD {align1};
  636. mov  (8) vme_msg_4.16<1>:UD     0x0:UD {align1};
  637.  
  638. send (8)
  639.        vme_msg_ind
  640.        vme_wb<1>:UD
  641.        null
  642.        vme(
  643.                BIND_IDX_VME,
  644.                0,
  645.                0,
  646.                VME_IME_MESSAGE_TYPE
  647.        )
  648.        mlen ime_vme_msg_length
  649.        rlen vme_wb_length {align1};
  650.  
  651. /* Set Macroblock-shape/mode for FBR */
  652.  
  653. mov  (1) vme_m2.20<1>:UD        0x0:UD {align1};
  654. mov  (1) vme_m2.21<1>:UB        vme_wb.25<0,1,0>:UB     {align1};
  655. mov  (1) vme_m2.22<1>:UB        vme_wb.26<0,1,0>:UB     {align1};
  656.  
  657. and  (1) tmp_reg0.0<1>:UW       vme_wb.0<0,1,0>:UW      0x03:UW {align1};
  658. mov  (1) vme_m2.20<1>:UB        tmp_reg0.0<0,1,0>:UB    {align1};
  659.  
  660. /* Send FBR message into CRE */
  661.  
  662. mov  (8) vme_msg_3.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
  663. mov  (8) vme_msg_4.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
  664. mov  (8) vme_msg_5.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
  665. mov  (8) vme_msg_6.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};                
  666.  
  667. /* 16x16 Source, 1/4 pixel, harr, BME ENABLE */
  668. mov  (1) vme_m0.12<1>:UD        INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_ENABLE:UD {align1};
  669.  
  670. mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD  {align1};
  671.  
  672. mov  (1) tmp_reg0.0<1>:uw       BI_WEIGHT       {align1};
  673. mov  (1) vme_m1.6<1>:UB         tmp_reg0.0<0,1,0>:ub    {align1};
  674. mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD  {align1};
  675.  
  676. mov  (8) vme_msg_2.0<1>:UD              vme_m2.0<8,8,1>:UD      {align1};
  677.  
  678. /* after verification it will be passed by using payload */
  679. send (8)
  680.        vme_msg_ind
  681.        vme_wb<1>:UD
  682.        null
  683.        cre(
  684.                BIND_IDX_VME,
  685.                VME_FBR_MESSAGE_TYPE
  686.        )
  687.        mlen fbr_vme_msg_length
  688.        rlen vme_wb_length
  689.        {align1};
  690.  
  691. add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
  692. mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
  693. /* write FME info */
  694. mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
  695.  
  696. mov  (1) msg_reg1.4<1>:UD       vme_wb.24<0,1,0>:UD     {align1};
  697. /* Inter distortion of FME */
  698. mov  (1) msg_reg1.8<1>:UD       vme_wb.8<0,1,0>:UD     {align1};
  699.  
  700. mov  (1) msg_reg1.12<1>:UD      vme_m2.20<0,1,0>:UD {align1};
  701.  
  702. /* bind index 3, write  oword (16bytes), msg type: 8(OWord Block Write) */
  703. send (16)
  704.        msg_ind
  705.        obw_wb
  706.        null
  707.        data_port(
  708.                OBW_CACHE_TYPE,
  709.                OBW_MESSAGE_TYPE,
  710.                OBW_CONTROL_0,
  711.                OBW_BIND_IDX,
  712.                OBW_WRITE_COMMIT_CATEGORY,
  713.                OBW_HEADER_PRESENT
  714.        )
  715.        mlen 2
  716.        rlen obw_wb_length
  717.        {align1};
  718.  
  719. /* Write FME/BME MV */
  720. add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x01:UD {align1};
  721. mov  (8) msg_reg0.0<1>:UD       obw_m0.0<8,8,1>:UD {align1};
  722.  
  723.  
  724. mov  (8) msg_reg1.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
  725. mov  (8) msg_reg2.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
  726. mov  (8) msg_reg3.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
  727. mov  (8) msg_reg4.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};                
  728. /* bind index 3, write  8 oword (128 bytes), msg type: 8(OWord Block Write) */
  729. send (16)
  730.        msg_ind
  731.        obw_wb
  732.        null
  733.        data_port(
  734.                OBW_CACHE_TYPE,
  735.                OBW_MESSAGE_TYPE,
  736.                OBW_CONTROL_8,
  737.                OBW_BIND_IDX,
  738.                OBW_WRITE_COMMIT_CATEGORY,
  739.                OBW_HEADER_PRESENT
  740.        )
  741.        mlen 5
  742.        rlen obw_wb_length
  743.        {align1};
  744.  
  745. /* Write FME/BME RefID */
  746. add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x08:UD {align1};
  747. mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
  748.  
  749. mov  (8) msg_reg1.0<1>:UD       vme_wb6.0<8,8,1>:UD {align1};
  750.  
  751. /* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
  752. send (16)
  753.        msg_ind
  754.        obw_wb
  755.        null
  756.        data_port(
  757.                OBW_CACHE_TYPE,
  758.                OBW_MESSAGE_TYPE,
  759.                OBW_CONTROL_2,
  760.                OBW_BIND_IDX,
  761.                OBW_WRITE_COMMIT_CATEGORY,
  762.                OBW_HEADER_PRESENT
  763.        )
  764.        mlen 2
  765.        rlen obw_wb_length
  766.        {align1};
  767.  
  768.  
  769. /* Issue message fence so that the previous write message is committed */
  770. send (16)
  771.        mb_ind
  772.        obw_wb
  773.         NULL
  774.        data_port(
  775.                OBR_CACHE_TYPE,
  776.                OBR_MESSAGE_FENCE,
  777.                OBR_MF_COMMIT,
  778.                OBR_BIND_IDX,
  779.                OBR_WRITE_COMMIT_CATEGORY,
  780.                OBR_HEADER_PRESENT
  781.        )
  782.        mlen 1
  783.        rlen 1
  784.        {align1};
  785.  
  786. __EXIT:
  787. /*
  788. * kill thread
  789. */        
  790. mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
  791. send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
  792.  
  793.  
  794.         nop             ;
  795.         nop             ;
  796. /* Compare three word data to get the min value */
  797. word_imin:
  798.         cmp.le.f0.0 (1)         null:w          INPUT_ARG0.0<0,1,0>:w   INPUT_ARG0.4<0,1,0>:w {align1};
  799.         (f0.0) mov  (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w                    {align1};
  800.         (-f0.0) mov (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w                    {align1};
  801.         cmp.le.f0.0 (1)         null:w          TEMP_VAR0.0<0,1,0>:w    INPUT_ARG0.8<0,1,0>:w {align1};
  802.         (f0.0) mov  (1)         RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w                         {align1};
  803.         (-f0.0) mov (1)         RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w                        {align1};
  804.         RETURN          {align1};      
  805.        
  806. /* Compare three word data to get the max value */
  807. word_imax:
  808.         cmp.ge.f0.0 (1)         null:w          INPUT_ARG0.0<0,1,0>:w   INPUT_ARG0.4<0,1,0>:w {align1};
  809.         (f0.0) mov  (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w                    {align1};
  810.         (-f0.0) mov (1)         TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w                    {align1};
  811.         cmp.ge.f0.0 (1)         null:w          TEMP_VAR0.0<0,1,0>:w    INPUT_ARG0.8<0,1,0>:w {align1};
  812.         (f0.0) mov  (1)         RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w                         {align1};
  813.         (-f0.0) mov (1)         RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w                        {align1};
  814.         RETURN          {align1};      
  815.        
  816. word_imedian:
  817.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
  818.         (f0.0)  jmpi (1) cmp_a_ge_b;
  819.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  820.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
  821.         (f0.0) jmpi (1) cmp_end;
  822.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  823.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
  824.         (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
  825.         jmpi (1) cmp_end;
  826. cmp_a_ge_b:
  827.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  828.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
  829.         (f0.0) jmpi (1) cmp_end;
  830.         cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
  831.         (f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
  832.         (-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
  833. cmp_end:
  834.         RETURN  {align1};
  835.  
  836. mb_pred_func:
  837.         mov     (8)     TEMP_VAR0.0<1>:ud       0:ud    {align1};
  838.         mov     (1)     TEMP_VAR0.0<1>:ub       INPUT_ARG0.2<0,1,0>:ub  {align1};
  839.         and     (1)   TEMP_VAR0.4<1>:uw   INPUT_ARG0.4<0,1,0>:uw        INTER_MASK:uw   {align1};
  840.         /* INTER16x16 mode. The bit1-0 is the prediction mode */
  841.         cmp.e.f0.0 (1) null:uw  TEMP_VAR0.4<1>:uw       INTER_16X16MODE:uw      {align1};
  842.         (f0.0)  and (1) RET_ARG<1>:uw   TEMP_VAR0.0<0,1,0>:uw   PRED_MASK {align1};
  843.         (f0.0)  jmpi (1) end_mb_pred;
  844.         /* Check whether it is INTER8x8 mode. */
  845.         cmp.e.f0.0 (1) null:uw  TEMP_VAR0.4<1>:uw       INTER_8X8MODE:uw        {align1};
  846.         (f0.0)  jmpi (1) mb_pred_func_8;
  847.        
  848.         /* Check whether it is INTER16x8 mode. */
  849.         cmp.e.f0.0 (1) null:uw  TEMP_VAR0.4<1>:uw       INTER_16X8MODE:uw       {align1};
  850.         (f0.0)  jmpi (1) mb_pred_func_168;
  851. mb_pred_func_816:      
  852.         /* Block 0/2 uses the bit1-0. Block 1/3 uses the bit3-2 */
  853.         mov     (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw    {align1};
  854.         and.z.f0.0 (1)  null:uw TEMP_VAR0.8<1>:uw       INTER_BLOCK1:uw {align1};
  855.         (f0.0)  and     (1) RET_ARG<1>:uw       TEMP_VAR0.0<0,1,0>:uw   PRED_MASK {align1};
  856.         (f0.0)  jmpi (1) end_mb_pred;
  857.         shr     (1)  TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw   2:uw {align1};
  858.         and     (1)  RET_ARG<1>:uw      TEMP_VAR0.16<1>:uw      PRED_MASK  {align1};   
  859.         jmpi    (1) end_mb_pred;
  860.  
  861. mb_pred_func_168:
  862.         /* Block 0/1 uses the bit1-0. Block 2/3 uses the bit3-2 */
  863.         mov     (1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw    {align1};
  864.         cmp.l.f0.0 (1)  null:uw TEMP_VAR0.8<1>:uw       INTER_BLOCK2:uw {align1};
  865.         (f0.0)  and  (1) RET_ARG<1>:uw  TEMP_VAR0.0<0,1,0>:uw   PRED_MASK {align1};
  866.         (f0.0)  jmpi (1) end_mb_pred;
  867.         shr     (1)  TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw   2:uw {align1};
  868.         and     (1)  RET_ARG<1>:uw      TEMP_VAR0.16<1>:uw      PRED_MASK  {align1};   
  869.         jmpi    (1) end_mb_pred;
  870.  
  871. mb_pred_func_8:
  872.         /* 8X8 mode. Every block uses two bits as the prediction mode. */
  873.         mul     (1)  TEMP_VAR0.8<1>:uw  INPUT_ARG0.8<0,1,0>:uw  2:uw {align1};
  874.         shr     (1)  TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw   TEMP_VAR0.8<0,1,0>:uw {align1};
  875.         and     (1)  RET_ARG<1>:uw      TEMP_VAR0.16<1>:uw      PRED_MASK  {align1};   
  876. end_mb_pred:
  877.         RETURN  {align1};
  878.        
  879.