Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Intra predict 16x16 luma block
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * This program is licensed under the terms and conditions of the
  6.  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
  7.  * http://www.opensource.org/licenses/eclipse-1.0.php.
  8.  *
  9.  */
  10. // Module name: intra_Pred_16x16_Y.asm
  11. //
  12. // Intra predict 16x16 luma block
  13. //
  14.         and     (1)     PINTRAPRED_Y<1>:w       INTRA_PRED_MODE(0)REGION(1,0)   0x0F:w
  15.         // WA for "jmpi" restriction
  16.         mov (1) REG_INTRA_TEMP_1<1>:ud  r[PINTRAPRED_Y, INTRA_16X16_OFFSET]:ub
  17.         jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d
  18.  
  19. // Mode 0
  20. INTRA_16x16_VERTICAL:
  21.     $for(0; <16; 2) {
  22.         mov (32)        PRED_YW(%1)<1>  INTRA_REF_TOP(0) {Compr}
  23.         }
  24.         jmpi (1) End_intra_Pred_16x16_Y
  25.  
  26. // Mode 1
  27. INTRA_16x16_HORIZONTAL:
  28.         mov (1)         PREF_LEFT_UD<1>:ud      INTRA_REF_LEFT_ID*GRFWIB*0x00010001+0x00040000:ud       // Set address registers for instruction compression
  29.     $for(0,0; <16; 2,8) {
  30.         mov (32)        PRED_YW(%1)<1>  r[PREF_LEFT,%2+3]<0;1,0>:ub {Compr}     // Actual left column reference data start at offset 3
  31.         }
  32.         jmpi (1) End_intra_Pred_16x16_Y
  33.  
  34. // Mode 2
  35. INTRA_16x16_DC:
  36.     and.nz.f0.0 (8)     NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG:ud     // Top macroblock available for intra prediction?
  37.     and (8)                     acc0<1>:ud      REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG+INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud  // Left macroblock available for intra prediction?
  38.     xor.z.f0.1 (8)      NULLREG         acc0:ud INTRA_PRED_LEFT_TH_AVAIL_FLAG+INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud  // Left macroblock available for intra prediction?
  39. // Rearrange reference samples for unified DC prediction code
  40. //
  41.         (-f0.0.any8h)   mov (8) INTRA_REF_TOP_W(0)<1>   0x8080:uw
  42.         (-f0.1.any8h)   mov (8) INTRA_REF_LEFT(0)<4>    INTRA_REF_TOP(0)REGION(8,1)
  43.         (-f0.1.any8h)   mov (8) INTRA_REF_LEFT(1)<4>    INTRA_REF_TOP(0,8)REGION(8,1)
  44.         (-f0.0.any8h)   mov (8) INTRA_REF_TOP(0)<1>             INTRA_REF_LEFT(0)REGION(8,4)
  45.         (-f0.0.any8h)   mov (8) INTRA_REF_TOP(0,8)<1>   INTRA_REF_LEFT(1)REGION(8,4)    // Split due to HW limitation
  46. // Perform DC prediction
  47. //
  48.         add (16)        PRED_YW(15)<1>  INTRA_REF_LEFT(0)REGION(8,4)    INTRA_REF_TOP(0)REGION(16,1)
  49.         add (8)         PRED_YW(15)<1>  PRED_YW(15)REGION(8,1)  PRED_YW(15,8)REGION(8,1)
  50.         add (4)         PRED_YW(15)<1>  PRED_YW(15)REGION(4,1)  PRED_YW(15,4)REGION(4,1)
  51.         add (2)         PRED_YW(15)<1>  PRED_YW(15)REGION(2,1)  PRED_YW(15,2)REGION(2,1)
  52.         add (32)        acc0<1>:w               PRED_YW(15)REGION(1,0)  PRED_YW(15,1)REGION(1,0) {Compr}        // Set up both acc0 and acc1
  53.         add     (32)    acc0<1>:w               acc0:16:w {Compr}
  54.  
  55.     $for(0; <16; 2) {
  56.         shr (32)        PRED_YW(%1)<1>  acc0:5:w {Compr}
  57.         }
  58.         jmpi (1) End_intra_Pred_16x16_Y
  59.  
  60. // Mode 3
  61. INTRA_16x16_PLANE:
  62. // Refer to H.264/AVC spec Section 8.3.3.4
  63.  
  64. #define A               REG_INTRA_TEMP_2.0              // All are WORD type
  65. #define B               REG_INTRA_TEMP_3.0
  66. #define C               REG_INTRA_TEMP_3.1
  67. #define YP              REG_INTRA_TEMP_0                // Store intermediate results of c*(y-7). Make sure it's an even GRF
  68. #define YP1             REG_INTRA_TEMP_1                // Store intermediate results of c*(y-7). Make sure it's an odd GRF, used in {Comp}
  69. #define XP              REG_INTRA_TEMP_5                // Store intermediate results of a+b*(x-7)+16. Make sure it's an odd GRF
  70.  
  71. // First Calculate constants H and V
  72. //      H1 = sum((-x'-1)*p[8+x',-1]), x'=0,1,...7
  73. //      H2 =  sum((-x'-1)*p[6-x',-1]), x'=7,6,...0
  74. //      H = -H1 + H2
  75. //      The same calculation holds for V
  76. //
  77.         mul (8) H1(0)<1>        INTRA_REF_TOP(0,8)REGION(8,1)           0x89ABCDEF:v
  78.         mul (8) H2(0)<1>        INTRA_REF_TOP(0,-1)REGION(8,1)          0xFEDCBA98:v
  79.  
  80.         mul (8) V1(0)<1>        INTRA_REF_LEFT(0,8*4)REGION(8,4)        0x89ABCDEF:v
  81.         mul (8) V2(0)<1>        INTRA_REF_LEFT(0)REGION(8,4)            0x0FEDCBA9:v
  82.         mul (1) V2(0,7)<1>      INTRA_REF_TOP(0,-1)<0;1,0>      -8:w            // Replace 0*p[-1,7] with -8*p[-1,-1]
  83.         // Now, REG_INTRA_TEMP_0 holds [H2, -H1] and REG_INTRA_TEMP_1 holds [V2, -V1]
  84.  
  85.         // Sum up [H2, -H1] and [V2, -V1] using instruction compression
  86.         // ExecSize = 16 is restricted by B-spec for instruction compression
  87.         // Actual intermediate results are in lower sub-registers after each summing step
  88.         add     (16)    H1(0)<1>        -H1(0)  H2(0)   {Compr} // Results in lower 8 WORDs
  89.         add     (16)    H1(0)<1>        H1(0)   H1(0,4) {Compr} // Results in lower 4 WORDs
  90.         add     (16)    H1(0)<1>        H1(0)   H1(0,2) {Compr} // Results in lower 2 WORDs
  91.         add     (16)    H1(0)<1>        H1(0)   H1(0,1) {Compr} // Results in lower 1 WORD
  92.  
  93. //      Calculate a, b, c and further derivations
  94.         mov     (16)    acc0<1>:w       32:w
  95.         mac     (2)             acc0<1>:w       H1(0)<16;1,0>   5:w
  96.         shr     (2)             B<1>:w          acc0:w  6:w             // Done b,c
  97.         mov     (16)    acc0<1>:w       16:w
  98.         mac     (16)    acc0<1>:w       INTRA_REF_TOP(0,15)<0;1,0>      16:w
  99.         mac     (16)    A<1>:w          INTRA_REF_LEFT(0,15*4)<0;1,0>   16:w    // A = a+16
  100.         mac (16)        XP<1>:w         B<0;1,0>:w              XY_7<16;16,1>:b                 // XP = A+b*(x-7)
  101.         mul     (8)             YP<1>:w         C<0;1,0>:w              XY_7<16;8,2>:b                  // YP = c*(y-7), even portion
  102.         mul     (8)             YP1<1>:w        C<0;1,0>:w              XY_7_1<16;8,2>:b                // YP = c*(y-7), odd portion
  103.  
  104. //      Finally the intra_16x16 plane prediction
  105.    $for(0,0; <16; 2,1) {
  106.         add (32)        acc0<1>:w               XP<16;16,1>:w   YP.%2<16;16,0>:w {Compr}        // Set Width!= 1 to trick EU to use YP_1.%2 for 2nd instruction
  107.         shr.sat (32)    PRED_Y(%1)<2>   acc0<16;16,1>:w 5:w {Compr}
  108.         }
  109.  
  110. End_intra_Pred_16x16_Y:
  111. // End of intra_Pred_16x16_Y
  112.