Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Decode Intra_4x4 macroblock
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * This program is licensed under the terms and conditions of the
  6.  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
  7.  * http://www.opensource.org/licenses/eclipse-1.0.php.
  8.  *
  9.  */
  10. // Kernel name: Intra_4x4.asm
  11. //
  12. // Decoding of Intra_4x4 macroblock
  13. //
  14. //  $Revision: 12 $
  15. //  $Date: 10/18/06 4:10p $
  16. //
  17.  
  18. // ----------------------------------------------------
  19. //  Main: Intra_4x4
  20. // ----------------------------------------------------
  21.  
  22. #define INTRA_4X4
  23.  
  24. .kernel Intra_4x4
  25. INTRA_4x4:
  26.  
  27. #ifdef _DEBUG
  28. // WA for FULSIM so we'll know which kernel is being debugged
  29. mov (1) acc0:ud 0x02aa55a5:ud
  30. #endif
  31.  
  32. #include "SetupForHWMC.asm"
  33.  
  34. #undef          PPREDBUF_Y
  35. #define     PPREDBUF_Y          a0.3    // Pointer to predicted Y picture
  36.  
  37. #define         REG_INTRA_PRED_AVAIL    REG_INTRA_TEMP_4
  38. #define         REG_INTRA_4X4_PRED              REG_INTRA_TEMP_7                // Store predicted Intra_4x4 data
  39.  
  40. // Offset where 4x4 predicted data blocks are stored
  41. #define PREDSUBBLK0             0*GRFWIB
  42. #define PREDSUBBLK1             1*GRFWIB
  43. #define PREDSUBBLK2             2*GRFWIB
  44. #define PREDSUBBLK3             3*GRFWIB
  45. #define PREDSUBBLK4             4*GRFWIB
  46. #define PREDSUBBLK5             5*GRFWIB
  47. #define PREDSUBBLK6             6*GRFWIB
  48. #define PREDSUBBLK7             7*GRFWIB
  49. #define PREDSUBBLK8             8*GRFWIB
  50. #define PREDSUBBLK9             9*GRFWIB
  51. #define PREDSUBBLK10    10*GRFWIB
  52. #define PREDSUBBLK11    11*GRFWIB
  53. #define PREDSUBBLK12    12*GRFWIB
  54. #define PREDSUBBLK13    13*GRFWIB
  55. #define PREDSUBBLK14    14*GRFWIB
  56. #define PREDSUBBLK15    15*GRFWIB
  57.  
  58. // 4x4 error block byte offset within the 8x8 error block
  59. #define ERRBLK0         0
  60. #define ERRBLK1         8
  61. #define ERRBLK2         64
  62. #define ERRBLK3         72
  63.  
  64. #ifdef SW_SCOREBOARD    
  65.    CALL(scoreboard_start_intra,1)
  66. #endif
  67.  
  68. #ifdef SW_SCOREBOARD    
  69.         wait    n0:ud           //      Now wait for scoreboard to response
  70. #endif
  71.  
  72. //
  73. //  Decode Y blocks
  74. //
  75. //      Load reference data from neighboring macroblocks
  76.    CALL(load_Intra_Ref_Y,1)
  77.  
  78.         mov     (1)     PERROR<1>:w     ERRBUF*GRFWIB:w                 // Pointer to macroblock error data
  79.         mov     (1)     PPREDBUF_Y<1>:w PREDBUF*GRFWIB:w        // Pointer to predicted data
  80.         shr (2) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        0x40:v
  81.     and.nz.f0.0 (8)     NULLREG REG_INTRA_PRED_AVAIL_FLAG_BYTE<0;1,0>:ub        4:w     // Top-right macroblock available for intra prediction?
  82.         (-f0.0.any8h) mov (8)   INTRA_REF_TOP(0,16)<1>  INTRA_REF_TOP(0,15)REGION(1,0)  // Extend right boundary of MB B to C
  83.  
  84. //      Intra predict Intra_4x4 luma blocks
  85. //
  86. //      Sub-macroblock 0 *****************
  87.         mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0)       // Top reference data
  88.         mov     (8)             REF_LEFT(0)<1>  INTRA_REF_LEFT(0)REGION(8,4)    // Left reference data
  89.         shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0)<1;2,0>       0x4040:v        // Expand IntraPredMode to 1 byte/block
  90.         CALL(intra_Pred_4x4_Y_4,1)
  91.     add (1)             PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block
  92.  
  93.         or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL<0;1,0>:w   0x1:w   // Left neighbor is available now
  94.  
  95. //      Sub-macroblock 1 *****************
  96.  
  97.         mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0,8)     // Top reference data
  98.         mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub   // Left reference data (top half)
  99.         mov     (4)             REF_LEFT(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub   // Left reference data (bottom half)
  100.         shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,2)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block
  101.         add     (1)             PPREDBUF_Y<1>:w PPREDBUF_Y<0;1,0>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 1
  102.         CALL(intra_Pred_4x4_Y_4,1)
  103.     add (1)             PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block
  104.  
  105.         or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL.1<0;1,0>:w 0x2:w   // Top neighbor is available now
  106.  
  107. //      Pack constructed data from word-aligned to byte-aligned format
  108. //      to speed up save_4x4_Y module later
  109. //      PPREDBUF_Y now points to sub-block #4
  110.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub          // Sub-block 0
  111.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub  // Sub-block 1
  112.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub          // Sub-block 2
  113.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub  // Sub-block 3
  114.  
  115.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub        r[PPREDBUF_Y]<32;16,2>:ub                               // Sub-block 4
  116.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub   // Sub-block 5
  117.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub        r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub           // Sub-block 6
  118.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub   // Sub-block 7
  119.  
  120. //      Sub-macroblock 2 *****************
  121.  
  122.         mov     (4)             REF_TOP0(0)<1>          INTRA_REF_LEFT0(0,28)REGION(4,1)                // Top-left reference data
  123.         mov     (8)             REF_TOP0(0,4)<1>        r[PPREDBUF_Y,0-2*GRFWIB+12]<16;4,1>:ub  // Top reference data from SB 2,3
  124.         mov     (8)             REF_TOP0(0,12)<1>       r[PPREDBUF_Y,0-GRFWIB+12]<16;4,1>:ub    // Top reference data from SB 6,7
  125.         mov     (8)             REF_TOP0(0,20)<1>       r[PPREDBUF_Y,0-GRFWIB+31]<0;1,0>:ub             // Top-right reference data
  126.         mov     (16)    REG_INTRA_REF_TOP<1>:w  REF_TOP_W(0)            // Store top reference data for SubMB #2 and #3.
  127.         mov     (8)             REF_LEFT(0)<1>          INTRA_REF_LEFT(1)REGION(8,4)    // Left reference data
  128.         shr     (4)             PRED_MODE<1>:w          INTRA_PRED_MODE(0,4)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block
  129.         CALL(intra_Pred_4x4_Y_4,1)
  130.     add (1)             PERROR<1>:w     PERROR<0;1,0>:w 0x0080:w        // Pointers to next error block
  131.  
  132.         or  (1) REG_INTRA_PRED_AVAIL<1>:w       REG_INTRA_PRED_AVAIL<0;1,0>:w   0x1:w   // Left neighbor is available now
  133.  
  134. //      Sub-macroblock 3 *****************
  135.  
  136.         mov     (16)    REF_TOP0(0)<1>  INTRA_REF_TOP0(0,8)             // Top reference data
  137.         mov     (8)             REF_TOP0(0,16)<1>       INTRA_REF_TOP0(0,24)REGION(8,1) // Top reference data
  138.         mov     (4)             REF_LEFT(0)<1>  r[PPREDBUF_Y,PREDSUBBLK1+6]<8;1,0>:ub   // Left reference data (top half)
  139.         mov     (4)             REF_LEFT(0,4)<1>        r[PPREDBUF_Y,PREDSUBBLK3+6]<8;1,0>:ub   // Left reference data (bottom half)
  140.         shr     (4)             PRED_MODE<1>:w  INTRA_PRED_MODE(0,6)<1;2,0>     0x4040:v        // Expand IntraPredMode to 1 byte/block
  141.         add     (1)             PPREDBUF_Y<1>:w PPREDBUF_Y<0;1,0>:w     4*GRFWIB:w      // Pointer to predicted sub-macroblock 3
  142.         CALL(intra_Pred_4x4_Y_4,1)
  143.  
  144. //      Pack constructed data from word-aligned to byte-aligned format
  145. //      to speed up save_4x4_Y module later
  146. //      PPREDBUF_Y now points to sub-block #12
  147.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK4]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK4]<32;16,2>:ub          // Sub-block 8
  148.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK4+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK3]<32;16,2>:ub  // Sub-block 9
  149.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK2]<1>:ub        r[PPREDBUF_Y,-PREDSUBBLK2]<32;16,2>:ub          // Sub-block 10
  150.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK2+16]<1>:ub    r[PPREDBUF_Y,-PREDSUBBLK1]<32;16,2>:ub  // Sub-block 11
  151.  
  152.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK3]<1>:ub        r[PPREDBUF_Y]<32;16,2>:ub                               // Sub-block 12
  153.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK3+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK1]<32;16,2>:ub   // Sub-block 13
  154.         mov (16)        r[PPREDBUF_Y,-PREDSUBBLK1]<1>:ub        r[PPREDBUF_Y,PREDSUBBLK2]<32;16,2>:ub           // Sub-block 14
  155.         mov (16)        r[PPREDBUF_Y,0-PREDSUBBLK1+16]<1>:ub    r[PPREDBUF_Y,PREDSUBBLK3]<32;16,2>:ub   // Sub-block 15
  156.  
  157. //      All 4 sub-macroblock (containing 4 intra_4x4 blocks) have be constructed
  158. //      Save constructed Y picture
  159.         CALL(save_4x4_Y,1)              // Save Intra_4x4 predicted luma data.
  160. //
  161. //  Decode U/V blocks
  162. //
  163. //      Note: The decoding for chroma blocks will be the same for all intra prediction mode
  164. //
  165.         CALL(decode_Chroma_Intra,1)
  166.  
  167. #ifdef SW_SCOREBOARD
  168.    #include "scoreboard_update.asm"
  169. #endif
  170.  
  171. // Terminate the thread
  172. //
  173.    #include "EndIntraThread.asm"
  174.  
  175. // End of Intra_4x4
  176.