Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Intra predict 8x8 chroma block
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the
  7.  * "Software"), to deal in the Software without restriction, including
  8.  * without limitation the rights to use, copy, modify, merge, publish,
  9.  * distribute, sub license, and/or sell copies of the Software, and to
  10.  * permit persons to whom the Software is furnished to do so, subject to
  11.  * the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the
  14.  * next paragraph) shall be included in all copies or substantial portions
  15.  * of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  20.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  21.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  * This file was originally licensed under the following license
  26.  *
  27.  *  Licensed under the Apache License, Version 2.0 (the "License");
  28.  *  you may not use this file except in compliance with the License.
  29.  *  You may obtain a copy of the License at
  30.  *
  31.  *      http://www.apache.org/licenses/LICENSE-2.0
  32.  *
  33.  *  Unless required by applicable law or agreed to in writing, software
  34.  *  distributed under the License is distributed on an "AS IS" BASIS,
  35.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  36.  *  See the License for the specific language governing permissions and
  37.  *  limitations under the License.
  38.  *
  39.  */
  40. #if !defined(__INTRA_PRED_CHROMA__)             // Make sure this is only included once
  41. #define __INTRA_PRED_CHROMA__
  42.  
  43. // Module name: intra_Pred_Chroma.asm
  44. //
  45. // Intra predict 8x8 chroma block
  46. //
  47.  
  48.         shr     (1)     PINTRAPRED_UV<1>:w      REG_INTRA_CHROMA_PRED_MODE<0;1,0>:ub    INTRA_CHROMA_PRED_MODE_SHIFT:w  // Bits 1:0 = intra chroma pred mode
  49.         // WA for "jmpi" restriction
  50.         mov (1) REG_INTRA_TEMP_1<1>:d   r[PINTRAPRED_UV, INTRA_CHROMA_OFFSET]:b
  51.         jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d
  52.  
  53. // Mode 0
  54. INTRA_CHROMA_DC:
  55.     and.nz.f0.0 (8)             NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_UP_AVAIL_FLAG:ud     // Top macroblock available for intra prediction?
  56.  
  57. // Calculate DC values for sub-block 0 and 3
  58. //
  59. // Rearrange reference samples for unified DC prediction code
  60. //      Need to check INTRA_PRED_LEFT_TH_AVAIL_FLAG for blk0 and INTRA_PRED_LEFT_BH_AVAIL_FLAG for blk3
  61. //
  62.         (-f0.0.any8h)   mov (8)         INTRA_REF_TOP_W(0)<1>   0x8080:uw       // Up not available
  63.  
  64.     and.nz.f0.1 (4)     NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG:ud
  65.         (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0)<2>  INTRA_REF_TOP_W(0)REGION(4,1)   // Left top half macroblock not available for intra prediction
  66.     and.nz.f0.1 (4)     NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_BH_AVAIL_FLAG:ud
  67.         (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0,8)<2>        INTRA_REF_TOP_W(0,4)REGION(4,1) // Left bottom half macroblock not available for intra prediction
  68.  
  69.         (-f0.0.any8h)   mov (8)         INTRA_REF_TOP_W(0)<1>   INTRA_REF_LEFT_W(0)REGION(8,2)  // Up not available
  70. // Calculate DC prediction
  71. //
  72.         add (16)        PRED_UVW(0)<1>  INTRA_REF_TOP(0)REGION(16,1)    INTRA_REF_LEFT_UV(0)<4;2,1>     // Sum of top and left reference
  73.         add (8)         PRED_UVW(0)<1>  PRED_UVW(0)<4;2,1>      PRED_UVW(0,2)<4;2,1>    // Sum of first half (blk #0) and second half (blk #3)
  74.  
  75.         add (8)         PRED_UVW(9)<1>  PRED_UVW(0)<0;2,1>      PRED_UVW(0,2)<0;2,1>    // Sum of blk #0
  76.         add (8)         PRED_UVW(11,8)<1>       PRED_UVW(0,4)<0;2,1>    PRED_UVW(0,6)<0;2,1>    // Sum of blk #3
  77.  
  78. // Calculate DC values for sub-block 1 and 2
  79. //
  80. // Rearrange reference samples for unified DC prediction code
  81. //
  82.         // Blk #2
  83.         (-f0.0.any4h)   mov (4)         INTRA_REF_TOP_W(0)<1>   0x8080:uw
  84.         (f0.1.any4h)    mov (4)         INTRA_REF_TOP_W(0)<1>   INTRA_REF_LEFT_W(0,8)REGION(4,2)        // Always use available left reference
  85.         (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0,8)<2>        INTRA_REF_TOP_W(0)REGION(4,1)
  86.  
  87.         // Blk #1
  88.     and.nz.f0.1 (4)     NULLREG         REG_INTRA_PRED_AVAIL_FLAG       INTRA_PRED_LEFT_TH_AVAIL_FLAG:ud
  89.         (-f0.1.any4h)   mov (4)         INTRA_REF_LEFT_W(0)<20x8080:uw
  90.         (f0.0.any4h)    mov (4)         INTRA_REF_LEFT_W(0)<2>  INTRA_REF_TOP_W(0,4)REGION(4,1) // Always use available top reference
  91.         (-f0.0.any4h)   mov (4)         INTRA_REF_TOP_W(0,4)<1> INTRA_REF_LEFT_W(0)REGION(4,2)
  92.  
  93. // Calculate DC prediction
  94. //
  95.         add (8) PRED_UVW(0)<1>          INTRA_REF_TOP(0)REGION(8,1)     INTRA_REF_LEFT_UV(0,16)<4;2,1>  // Sum of top and left reference for blk #2
  96.         add (8) PRED_UVW(0,8)<1>        INTRA_REF_LEFT_UV(0)<4;2,1>     INTRA_REF_TOP(0,8)REGION(8,1)   // Sum of top and left reference for blk #1
  97.         add (8) PRED_UVW(0)<1>          PRED_UVW(0)<4;2,1>              PRED_UVW(0,2)<4;2,1>    // Sum of first half (blk #2) and second half (blk #1)
  98.  
  99.         add (8) PRED_UVW(9,8)<1>        PRED_UVW(0,4)<0;2,1>    PRED_UVW(0,6)<0;2,1>    // Sum of blk #1
  100.         add (8) PRED_UVW(11)<1>         PRED_UVW(0)<0;2,1>              PRED_UVW(0,2)<0;2,1>    // Sum of blk #2
  101.  
  102. // Now, PRED_UVW(9) holds sums for blks #0 and #1 and PRED_UVW(11) holds sums for blks #2 and #3
  103. //
  104.         add (32)        acc0<1>:w       PRED_UVW(9)REGION(16,1)         4:w {Compr}             // Add rounder
  105.     $for(0; <4; 2) {
  106.         shr (32)        PRED_UVW(%1)<1> acc0:w          3:w {Compr}
  107.         }
  108.  
  109.         add (32)        acc0<1>:w       PRED_UVW(11)REGION(16,1)        4:w {Compr}             // Add rounder
  110.     $for(4; <8; 2) {
  111.         shr (32)        PRED_UVW(%1)<1> acc0:w          3:w {Compr}
  112.         }
  113.         jmpi (1)        End_of_intra_Pred_Chroma
  114.  
  115. // Mode 1
  116. INTRA_CHROMA_HORIZONTAL:
  117.         mov (1)         PREF_LEFT_UD<1>:ud      INTRA_REF_LEFT_ID*GRFWIB*0x00010001+0x00040000:ud       // Set address registers for instruction compression
  118.     $for(0,0; <8; 2,8) {
  119.         mov (32)        PRED_UVW(%1)<1> r[PREF_LEFT,%2+2]<0;2,1>:ub {Compr}     // Actual left column reference data start at offset 2
  120.         }
  121.         jmpi (1)        End_of_intra_Pred_Chroma
  122.  
  123. // Mode 2
  124. INTRA_CHROMA_VERTICAL:
  125.     $for(0; <8; 2) {
  126.         mov (32)        PRED_UVW(%1)<1> INTRA_REF_TOP(0) {Compr}
  127.         }
  128.         jmpi (1)        End_of_intra_Pred_Chroma
  129.  
  130. // Mode 3
  131. INTRA_Chroma_PLANE:
  132. // Refer to H.264/AVC spec Section 8.3.4.4
  133.  
  134. #undef  C
  135.  
  136. #define A               REG_INTRA_TEMP_2.0              // All are WORD type
  137. #define B               REG_INTRA_TEMP_3.0              // B[U] & B[V]
  138. #define C               REG_INTRA_TEMP_3.2              // C[U] & C[V]
  139. #define YP              REG_INTRA_TEMP_0                // Store intermediate results of c*(y-3). Make sure it's an even GRF
  140. #define YP1             REG_INTRA_TEMP_1                // Store intermediate results of c*(y-3). Make sure it's an odd GRF
  141. #define XP              REG_INTRA_TEMP_5                // Store intermediate results of a+b*(x-3)+16. Make sure it's an odd GRF
  142.  
  143. // First Calculate constants H and V
  144. //      H1 = sum((x'+1)*p[4+x',-1]), x'=0,1,2,3
  145. //      H2 =  sum((-x'-1)*p[2-x',-1]), x'=3,2,1,0
  146. //      H = H1 + H2
  147. //      The same calculation holds for V
  148. //
  149.         mul (8) H1(0)<1>        INTRA_REF_TOP(0,8)REGION(8,1)   0x44332211:v
  150.         mul (8) H2(0)<1>        INTRA_REF_TOP(0,-2)REGION(8,1)  0xFFEEDDCC:v
  151.  
  152.         mul (8) V1(0)<1>        INTRA_REF_LEFT_UV(0,4*4)<4;2,1> 0x44332211:v
  153.         mul (8) V2(0)<1>        INTRA_REF_LEFT_UV(0)<4;2,1>             0x00FFEEDD:v
  154.         mul (2) V2(0,6)<1>      INTRA_REF_TOP(0,-2)REGION(2,1)  -4:w            // Replace 0*p[-1,3] with -4*p[-1,-1]
  155.         // Now, REG_INTRA_TEMP_0 holds [H2, H1] and REG_INTRA_TEMP_1 holds [V2, V1]
  156.  
  157.         // Sum up [H2, H1] and [V2, V1] using instruction compression
  158.         // ExecSize = 16 is restricted by B-spec for instruction compression
  159.         // Actual intermediate results are in lower sub-registers after each summing step
  160.         add     (16)    H1(0)<1>        H1(0)   H2(0) {Compr}   // Results in lower 8 WORDs
  161.         add     (16)    H1(0)<1>        H1(0)   H1(0,4) {Compr} // Results in lower 4 WORDs
  162.         add     (16)    H1(0)<1>        H1(0)   H1(0,2) {Compr} // Results in lower 2 WORDs
  163.  
  164. //      Calculate a, b, c and further derivations
  165.         mov     (16)    acc0<1>:w       32:w
  166.         mac     (4)             acc0<1>:w       H1(0)<16;2,1>   34:w
  167.         shr     (4)             B<1>:w          acc0:w  6:w             // Done b,c
  168.         mov     (16)    acc0<1>:w       16:w
  169.         mac     (16)    acc0<1>:w       INTRA_REF_TOP(0,7*2)<0;2,1>             16:w
  170.         mac     (16)    A<1>:w          INTRA_REF_LEFT_UV(0,7*4)<0;2,1> 16:w    // A = a+16
  171.         mac (16)        XP<1>:w         B<0;2,1>:w              XY_3<1;2,0>:b           // XP = A+b*(x-3)
  172.         mul     (8)             YP<1>:w         C<0;2,1>:w              XY_3<2;2,0>:b           // YP = c*(y-3), Even portion
  173.         mul     (8)             YP1<1>:w        C<0;2,1>:w              XY_3_1<2;2,0>:b // YP = c*(y-3), Odd portion
  174.  
  175. //      Finally the intra_Chroma plane prediction
  176.    $for(0; <8; 2) {
  177.         add (32)        acc0<1>:w               XP<16;16,1>:w   YP.%1<0;2,1>:w {Compr}
  178.         shr.sat (32)    PRED_UV(%1)<2>  acc0<16;16,1>:w 5:w {Compr}
  179.         }
  180.  
  181. End_of_intra_Pred_Chroma:
  182.  
  183. // End of intra_Pred_Chroma
  184.  
  185. #endif  // !defined(__INTRA_PRED_CHROMA__)
  186.