Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Decode Intra_4x4 macroblock
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the
  7.  * "Software"), to deal in the Software without restriction, including
  8.  * without limitation the rights to use, copy, modify, merge, publish,
  9.  * distribute, sub license, and/or sell copies of the Software, and to
  10.  * permit persons to whom the Software is furnished to do so, subject to
  11.  * the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the
  14.  * next paragraph) shall be included in all copies or substantial portions
  15.  * of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  20.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  21.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  * This file was originally licensed under the following license
  26.  *
  27.  *  Licensed under the Apache License, Version 2.0 (the "License");
  28.  *  you may not use this file except in compliance with the License.
  29.  *  You may obtain a copy of the License at
  30.  *
  31.  *      http://www.apache.org/licenses/LICENSE-2.0
  32.  *
  33.  *  Unless required by applicable law or agreed to in writing, software
  34.  *  distributed under the License is distributed on an "AS IS" BASIS,
  35.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  36.  *  See the License for the specific language governing permissions and
  37.  *  limitations under the License.
  38.  *
  39.  */
  40. // Kernel name: Intra_4x4.asm
  41. //
  42. // Decoding of Intra_4x4 macroblock
  43. //
  44. //  $Revision: 12 $
  45. //  $Date: 10/18/06 4:10p $
  46. //
  47.  
  48. // ----------------------------------------------------
  49. //  Main: Intra_4x4
  50. // ----------------------------------------------------
  51.  
  52. #define INTRA_4X4
  53.  
  54. .kernel Intra_4x4
  55. INTRA_4x4:
  56.  
  57. #ifdef _DEBUG
  58. // WA for FULSIM so we'll know which kernel is being debugged
  59. mov (1) acc0:ud 0x02aa55a5:ud
  60. #endif
  61.  
  62. #include "SetupForHWMC.asm"
  63.  
  64. #undef          PPREDBUF_Y
  65. #define     PPREDBUF_Y          a0.3    // Pointer to predicted Y picture
  66.  
  67. #define         REG_INTRA_PRED_AVAIL    REG_INTRA_TEMP_4
  68. #define         REG_INTRA_4X4_PRED              REG_INTRA_TEMP_7                // Store predicted Intra_4x4 data
  69.  
  70. // Offset where 4x4 predicted data blocks are stored
  71. #define PREDSUBBLK0             0*GRFWIB
  72. #define PREDSUBBLK1             1*GRFWIB
  73. #define PREDSUBBLK2             2*GRFWIB
  74. #define PREDSUBBLK3             3*GRFWIB
  75. #define PREDSUBBLK4             4*GRFWIB
  76. #define PREDSUBBLK5             5*GRFWIB
  77. #define PREDSUBBLK6             6*GRFWIB
  78. #define PREDSUBBLK7             7*GRFWIB
  79. #define PREDSUBBLK8             8*GRFWIB
  80. #define PREDSUBBLK9             9*GRFWIB
  81. #define PREDSUBBLK10    10*GRFWIB
  82. #define PREDSUBBLK11    11*GRFWIB
  83. #define PREDSUBBLK12    12*GRFWIB
  84. #define PREDSUBBLK13    13*GRFWIB
  85. #define PREDSUBBLK14    14*GRFWIB
  86. #define PREDSUBBLK15    15*GRFWIB
  87.  
  88. // 4x4 error block byte offset within the 8x8 error block
  89. #define ERRBLK0         0
  90. #define ERRBLK1         8
  91. #define ERRBLK2         64
  92. #define ERRBLK3         72
  93.  
  94. #ifdef SW_SCOREBOARD    
  95.    CALL(scoreboard_start_intra,1)
  96. #endif
  97.  
  98. #ifdef SW_SCOREBOARD    
  99.         wait    n0:ud           //      Now wait for scoreboard to response
  100. #endif
  101.  
  102. //
  103. //  Decode Y blocks
  104. //
  105. //      Load reference data from neighboring macroblocks
  106.    CALL(load_Intra_Ref_Y,1)
  107.  
  108.         mov     (1)     PERROR<1>:w     ERRBUF*GRFWIB:w                 // Pointer to macroblock error data
  109.         mov     (1)     PPREDBUF_Y<1>:w PREDBUF*GRFWIB:w        // Pointer to predicted data
  110.         shr (2) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        0x40:v
  111.     and.nz.f0.0 (8)     NULLREG REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        4:w     // Top-right macroblock available for intra prediction?
  112.         (-f0.0.any8h) mov (8)   INTRA_REF_TOP(0,16)<1>  INTRA_REF_TOP(0,15)REGION(1,0)  // Extend right boundary of MB B to C
  113.  
  114. //      Intra predict Intra_4x4 luma blocks
  115. //
  116. //      Sub-macroblock 0 *****************
  117.         mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0)       // Top reference data
  118.         mov     (8)             REF_LEFT(0)<1>  INTRA_REF_LEFT(0)REGION(8,4)    // Left reference data
  119.         shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0)<1;2,0>       0x4040:v        // Expand IntraPredMode to 1 byte/block
  120.         CALL(intra_Pred_4x4_Y_4,1)
  121.     add (1)             PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block
  122.  
  123.         or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL<0;1,0>:w   0x1:w   // Left neighbor is available now
  124.  
  125. //      Sub-macroblock 1 *****************
  126.  
  127.         mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0,8)     // Top reference data
  128.         mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub   // Left reference data (top half)
  129.         mov     (4)             REF_LEFT(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub   // Left reference data (bottom half)
  130.         shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,2)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block
  131.         add     (1)             PPREDBUF_Y<1>:w PPREDBUF_Y<0;1,0>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 1
  132.         CALL(intra_Pred_4x4_Y_4,1)
  133.     add (1)             PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block
  134.  
  135.         or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL.1<0;1,0>:w 0x2:w   // Top neighbor is available now
  136.  
  137. //      Pack constructed data from word-aligned to byte-aligned format
  138. //      to speed up save_4x4_Y module later
  139. //      PPREDBUF_Y now points to sub-block #4
  140.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub          // Sub-block 0
  141.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub  // Sub-block 1
  142.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub          // Sub-block 2
  143.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub  // Sub-block 3
  144.  
  145.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub        r[PPREDBUF_Y]<32;16,2>:ub                               // Sub-block 4
  146.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub   // Sub-block 5
  147.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub        r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub           // Sub-block 6
  148.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub   // Sub-block 7
  149.  
  150. //      Sub-macroblock 2 *****************
  151.  
  152.         mov     (4)             REF_TOP0(0)<1>          INTRA_REF_LEFT0(0,28)REGION(4,1)                // Top-left reference data
  153.         mov     (8)             REF_TOP0(0,4)<1>        r[PPREDBUF_Y,0-2*GRFWIB+12]<16;4,1>:ub  // Top reference data from SB 2,3
  154.         mov     (8)             REF_TOP0(0,12)<1>       r[PPREDBUF_Y,0-GRFWIB+12]<16;4,1>:ub    // Top reference data from SB 6,7
  155.         mov     (8)             REF_TOP0(0,20)<1>       r[PPREDBUF_Y,0-GRFWIB+31]<0;1,0>:ub             // Top-right reference data
  156.         mov     (16)    REG_INTRA_REF_TOP<1>:w  REF_TOP_W(0)            // Store top reference data for SubMB #2 and #3.
  157.         mov     (8)             REF_LEFT(0)<1>          INTRA_REF_LEFT(1)REGION(8,4)    // Left reference data
  158.         shr     (4)             PRED_MODE<1>:w          INTRA_PRED_MODE(0,4)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block
  159.         CALL(intra_Pred_4x4_Y_4,1)
  160.     add (1)             PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block
  161.  
  162.         or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL<0;1,0>:w   0x1:w   // Left neighbor is available now
  163.  
  164. //      Sub-macroblock 3 *****************
  165.  
  166.         mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0,8)             // Top reference data
  167.         mov     (8)             REF_TOP0(0,16)<1>       INTRA_REF_TOP0(0,24)REGION(8,1) // Top reference data
  168.         mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub   // Left reference data (top half)
  169.         mov     (4)             REF_LEFT(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub   // Left reference data (bottom half)
  170.         shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,6)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block
  171.         add     (1)             PPREDBUF_Y<1>:w PPREDBUF_Y<0;1,0>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 3
  172.         CALL(intra_Pred_4x4_Y_4,1)
  173.  
  174. //      Pack constructed data from word-aligned to byte-aligned format
  175. //      to speed up save_4x4_Y module later
  176. //      PPREDBUF_Y now points to sub-block #12
  177.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub          // Sub-block 8
  178.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub  // Sub-block 9
  179.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub          // Sub-block 10
  180.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub  // Sub-block 11
  181.  
  182.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub        r[PPREDBUF_Y]<32;16,2>:ub                               // Sub-block 12
  183.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub   // Sub-block 13
  184.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub        r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub           // Sub-block 14
  185.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub   // Sub-block 15
  186.  
  187. //      All 4 sub-macroblock (containing 4 intra_4x4 blocks) have be constructed
  188. //      Save constructed Y picture
  189.         CALL(save_4x4_Y,1)              // Save Intra_4x4 predicted luma data.
  190. //
  191. //  Decode U/V blocks
  192. //
  193. //      Note: The decoding for chroma blocks will be the same for all intra prediction mode
  194. //
  195.         CALL(decode_Chroma_Intra,1)
  196.  
  197. #ifdef SW_SCOREBOARD
  198.    #include "scoreboard_update.asm"
  199. #endif
  200.  
  201. // Terminate the thread
  202. //
  203.    #include "EndIntraThread.asm"
  204.  
  205. // End of Intra_4x4
  206.