Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Intra predict 16x16 luma block
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the
  7.  * "Software"), to deal in the Software without restriction, including
  8.  * without limitation the rights to use, copy, modify, merge, publish,
  9.  * distribute, sub license, and/or sell copies of the Software, and to
  10.  * permit persons to whom the Software is furnished to do so, subject to
  11.  * the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the
  14.  * next paragraph) shall be included in all copies or substantial portions
  15.  * of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  20.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  21.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  * This file was originally licensed under the following license
  26.  *
  27.  *  Licensed under the Apache License, Version 2.0 (the "License");
  28.  *  you may not use this file except in compliance with the License.
  29.  *  You may obtain a copy of the License at
  30.  *
  31.  *      http://www.apache.org/licenses/LICENSE-2.0
  32.  *
  33.  *  Unless required by applicable law or agreed to in writing, software
  34.  *  distributed under the License is distributed on an "AS IS" BASIS,
  35.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  36.  *  See the License for the specific language governing permissions and
  37.  *  limitations under the License.
  38.  *
  39.  */
  40. // Module name: intra_Pred_16x16_Y.asm
  41. //
  42. // Intra predict 16x16 luma block
  43. //
  44.         and     (1)     PINTRAPRED_Y<1>:w       INTRA_PRED_MODE(0)REGION(1,0)   0x0F:w
  45.         // WA for "jmpi" restriction
  46.         mov (1) REG_INTRA_TEMP_1<1>:ud  r[PINTRAPRED_Y, INTRA_16X16_OFFSET]:ub
  47.         jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d
  48.  
  49. // Mode 0
  50. INTRA_16x16_VERTICAL:
  51.     $for(0; <16; 2) {
  52.         mov (32)        PRED_YW(%1)<1>  INTRA_REF_TOP(0) {Compr}
  53.         }
  54.         jmpi (1) End_intra_Pred_16x16_Y
  55.  
  56. // Mode 1
  57. INTRA_16x16_HORIZONTAL:
  58.         mov (1)         PREF_LEFT_UD<1>:ud      INTRA_REF_LEFT_ID*GRFWIB*0x00010001+0x00040000:ud       // Set address registers for instruction compression
  59.     $for(0,0; <16; 2,8) {
  60.         mov (32)        PRED_YW(%1)<1>  r[PREF_LEFT,%2+3]<0;1,0>:ub {Compr}     // Actual left column reference data start at offset 3
  61.         }
  62.         jmpi (1) End_intra_Pred_16x16_Y
  63.  
  64. // Mode 2
  65. INTRA_16x16_DC:
  66.     and.nz.f0.0 (8)     NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG:ud     // Top macroblock available for intra prediction?
  67.     and (8)                     acc0<1>:ud      REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG+INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud  // Left macroblock available for intra prediction?
  68.     xor.z.f0.1 (8)      NULLREG         acc0:ud INTRA_PRED_LEFT_TH_AVAIL_FLAG+INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud  // Left macroblock available for intra prediction?
  69. // Rearrange reference samples for unified DC prediction code
  70. //
  71.         (-f0.0.any8h)   mov (8) INTRA_REF_TOP_W(0)<1>   0x8080:uw
  72.         (-f0.1.any8h)   mov (8) INTRA_REF_LEFT(0)<4>    INTRA_REF_TOP(0)REGION(8,1)
  73.         (-f0.1.any8h)   mov (8) INTRA_REF_LEFT(1)<4>    INTRA_REF_TOP(0,8)REGION(8,1)
  74.         (-f0.0.any8h)   mov (8) INTRA_REF_TOP(0)<1>             INTRA_REF_LEFT(0)REGION(8,4)
  75.         (-f0.0.any8h)   mov (8) INTRA_REF_TOP(0,8)<1>   INTRA_REF_LEFT(1)REGION(8,4)    // Split due to HW limitation
  76. // Perform DC prediction
  77. //
  78.         add (16)        PRED_YW(15)<1>  INTRA_REF_LEFT(0)REGION(8,4)    INTRA_REF_TOP(0)REGION(16,1)
  79.         add (8)         PRED_YW(15)<1>  PRED_YW(15)REGION(8,1)  PRED_YW(15,8)REGION(8,1)
  80.         add (4)         PRED_YW(15)<1>  PRED_YW(15)REGION(4,1)  PRED_YW(15,4)REGION(4,1)
  81.         add (2)         PRED_YW(15)<1>  PRED_YW(15)REGION(2,1)  PRED_YW(15,2)REGION(2,1)
  82.         add (32)        acc0<1>:w               PRED_YW(15)REGION(1,0)  PRED_YW(15,1)REGION(1,0) {Compr}        // Set up both acc0 and acc1
  83.         add     (32)    acc0<1>:w               acc0:16:w {Compr}
  84.  
  85.     $for(0; <16; 2) {
  86.         shr (32)        PRED_YW(%1)<1>  acc0:5:w {Compr}
  87.         }
  88.         jmpi (1) End_intra_Pred_16x16_Y
  89.  
  90. // Mode 3
  91. INTRA_16x16_PLANE:
  92. // Refer to H.264/AVC spec Section 8.3.3.4
  93.  
  94. #define A               REG_INTRA_TEMP_2.0              // All are WORD type
  95. #define B               REG_INTRA_TEMP_3.0
  96. #define C               REG_INTRA_TEMP_3.1
  97. #define YP              REG_INTRA_TEMP_0                // Store intermediate results of c*(y-7). Make sure it's an even GRF
  98. #define YP1             REG_INTRA_TEMP_1                // Store intermediate results of c*(y-7). Make sure it's an odd GRF, used in {Comp}
  99. #define XP              REG_INTRA_TEMP_5                // Store intermediate results of a+b*(x-7)+16. Make sure it's an odd GRF
  100.  
  101. // First Calculate constants H and V
  102. //      H1 = sum((-x'-1)*p[8+x',-1]), x'=0,1,...7
  103. //      H2 =  sum((-x'-1)*p[6-x',-1]), x'=7,6,...0
  104. //      H = -H1 + H2
  105. //      The same calculation holds for V
  106. //
  107.         mul (8) H1(0)<1>        INTRA_REF_TOP(0,8)REGION(8,1)           0x89ABCDEF:v
  108.         mul (8) H2(0)<1>        INTRA_REF_TOP(0,-1)REGION(8,1)          0xFEDCBA98:v
  109.  
  110.         mul (8) V1(0)<1>        INTRA_REF_LEFT(0,8*4)REGION(8,4)        0x89ABCDEF:v
  111.         mul (8) V2(0)<1>        INTRA_REF_LEFT(0)REGION(8,4)            0x0FEDCBA9:v
  112.         mul (1) V2(0,7)<1>      INTRA_REF_TOP(0,-1)<0;1,0>      -8:w            // Replace 0*p[-1,7] with -8*p[-1,-1]
  113.         // Now, REG_INTRA_TEMP_0 holds [H2, -H1] and REG_INTRA_TEMP_1 holds [V2, -V1]
  114.  
  115.         // Sum up [H2, -H1] and [V2, -V1] using instruction compression
  116.         // ExecSize = 16 is restricted by B-spec for instruction compression
  117.         // Actual intermediate results are in lower sub-registers after each summing step
  118.         add     (16)    H1(0)<1>        -H1(0)  H2(0)   {Compr} // Results in lower 8 WORDs
  119.         add     (16)    H1(0)<1>        H1(0)   H1(0,4) {Compr} // Results in lower 4 WORDs
  120.         add     (16)    H1(0)<1>        H1(0)   H1(0,2) {Compr} // Results in lower 2 WORDs
  121.         add     (16)    H1(0)<1>        H1(0)   H1(0,1) {Compr} // Results in lower 1 WORD
  122.  
  123. //      Calculate a, b, c and further derivations
  124.         mov     (16)    acc0<1>:w       32:w
  125.         mac     (2)             acc0<1>:w       H1(0)<16;1,0>   5:w
  126.         shr     (2)             B<1>:w          acc0:w  6:w             // Done b,c
  127.         mov     (16)    acc0<1>:w       16:w
  128.         mac     (16)    acc0<1>:w       INTRA_REF_TOP(0,15)<0;1,0>      16:w
  129.         mac     (16)    A<1>:w          INTRA_REF_LEFT(0,15*4)<0;1,0>   16:w    // A = a+16
  130.         mac (16)        XP<1>:w         B<0;1,0>:w              XY_7<16;16,1>:b                 // XP = A+b*(x-7)
  131.         mul     (8)             YP<1>:w         C<0;1,0>:w              XY_7<16;8,2>:b                  // YP = c*(y-7), even portion
  132.         mul     (8)             YP1<1>:w        C<0;1,0>:w              XY_7_1<16;8,2>:b                // YP = c*(y-7), odd portion
  133.  
  134. //      Finally the intra_16x16 plane prediction
  135.    $for(0,0; <16; 2,1) {
  136.         add (32)        acc0<1>:w               XP<16;16,1>:w   YP.%2<16;16,0>:w {Compr}        // Set Width!= 1 to trick EU to use YP_1.%2 for 2nd instruction
  137.         shr.sat (32)    PRED_Y(%1)<2>   acc0<16;16,1>:w 5:w {Compr}
  138.         }
  139.  
  140. End_intra_Pred_16x16_Y:
  141. // End of intra_Pred_16x16_Y
  142.