Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © <2010>, Intel Corporation.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * This file was originally licensed under the following license
  25.  *
  26.  *  Licensed under the Apache License, Version 2.0 (the "License");
  27.  *  you may not use this file except in compliance with the License.
  28.  *  You may obtain a copy of the License at
  29.  *
  30.  *      http://www.apache.org/licenses/LICENSE-2.0
  31.  *
  32.  *  Unless required by applicable law or agreed to in writing, software
  33.  *  distributed under the License is distributed on an "AS IS" BASIS,
  34.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  35.  *  See the License for the specific language governing permissions and
  36.  *  limitations under the License.
  37.  *
  38.  */
  39. //////////////////////////////////////////////////////////////////////////////////////////////////////////////
  40. // AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
  41. //
  42. // First de-block vertical edges from left to right.
  43. // Second de-block horizontal edge from top to bottom.
  44. //
  45. // For 4:2:0, chroma is always de-blocked at 8x8.
  46. // NV12 format allows to filter U and V together.
  47. //
  48. //////////////////////////////////////////////////////////////////////////////////////////////////////////////
  49. #define AVC_ILDB
  50.  
  51. .kernel AVC_ILDB_CHILD_UV
  52. #if defined(COMBINED_KERNEL)
  53. ILDB_LABEL(AVC_ILDB_CHILD_UV):
  54. #endif
  55.  
  56. #include "SetupVPKernel.asm"
  57. #include "AVC_ILDB.inc"
  58.  
  59. #if defined(_DEBUG)
  60.         mov             (1)             EntrySignatureC:w                       0x9997:w
  61. #endif
  62.  
  63.         // Init local variables
  64.         shl (8)         ORIX_CUR<1>:w           ORIX<0;2,1>:w           4:w             // Expand addr to bytes, repeat (x,y) 4 times
  65.  
  66.         // Init addr register for vertical control data
  67.         mov (1)         ECM_AddrReg<1>:w                CNTRL_DATA_BASE:w               // Init ECM_AddrReg
  68.  
  69.         //=== Null Kernel ===============================================================
  70. //      jmpi ILDB_LABEL(POST_ILDB_UV_UV)
  71.         //===============================================================================
  72.  
  73. #if defined(DEV_CL)    
  74.         mov     (1)             acc0.0:w                240:w  
  75. #else
  76.         //====================================================================================
  77.         // For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.
  78.         // MB_offset = MBsCntX * CurRow + CurCol
  79.         // MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
  80.         mul (1) CntrlDataOffsetY:ud             MBsCntX:w                               ORIY:w
  81.         add (1) CntrlDataOffsetY:ud             CntrlDataOffsetY:ud             ORIX:w
  82.  
  83.         // Assign to MSGSRC.2:ud for memory access
  84.         // mul (1) CntrlDataOffsetY:ud          CntrlDataOffsetY:ud             64:uw
  85.         mul (1) MSGSRC.2:ud             CntrlDataOffsetY:ud             64:uw
  86.                
  87.         mov     (1)             acc0.0:w                320:w  
  88. #endif
  89.         mac (1)         URBOffsetC:w    ORIY:w                  4:w                             // UV URB entries are right after Y entries            
  90.  
  91.  
  92.         // Init local variables
  93. //      shl (8)         ORIX_CUR<1>:w           ORIX<0;2,1>:w           4:w             // Expand addr to bytes, repeat (x,y) 4 times
  94.         add (1)         ORIX_LEFT:w                     ORIX_LEFT:w                     -4:w
  95.         add (1)         ORIY_TOP:w                      ORIY_TOP:w                      -4:w
  96.  
  97.         // Build a ramp from 0 to 15
  98.         mov     (16)    RRampW(0)<1>            RampConstC<0;8,1>:ub
  99.         add (8)         RRampW(0,8)<1>          RRampW(0,8)                     8:w             // RRampW = ramp 15-0
  100.  
  101.         // Load current MB control data
  102. #if defined(DEV_CL)
  103.         #if defined(_APPLE)
  104.                 #include "Load_ILDB_Cntrl_Data_22DW.asm"        // Crestline for Apple, progressive only
  105.         #else
  106.                 #include "Load_ILDB_Cntrl_Data_64DW.asm"        // Crestline
  107.         #endif 
  108. #else
  109.         #include "Load_ILDB_Cntrl_Data_16DW.asm"        // Cantiga and beyond
  110. #endif
  111.  
  112.         // Check loaded control data
  113.         #if defined(_APPLE)
  114.                 and.z.f0.1  (8) null<1>:uw      r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw              0xFFFF:uw               // Skip ILDB?
  115.                 (f0.1) and.z.f0.1 (2) null<1>:uw        r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw             0xFFFF:uw               // Skip ILDB?
  116.         #else
  117.                 and.z.f0.1  (16) null<1>:uw     r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw    0xFFFF:uw               // Skip ILDB?          
  118.         #endif 
  119.                
  120.         and.nz.f0.0  (1) null:w         r[ECM_AddrReg, ExtBitFlags]:ub          DISABLE_ILDB_FLAG:w             // Skip ILDB?
  121.        
  122.         mov     (1)             GateWayOffsetC:uw       ORIY:uw         // Use row # as Gateway offset
  123.  
  124.         #if defined(_APPLE)
  125.                 (f0.1.all8h)    jmpi    ILDB_LABEL(READ_FOR_URB_UV)                             // Skip ILDB
  126.         #else
  127.                 (f0.1.all16h)   jmpi    ILDB_LABEL(READ_FOR_URB_UV)                             // Skip ILDB
  128.         #endif 
  129.  
  130.         (f0.0)                  jmpi    ILDB_LABEL(READ_FOR_URB_UV)                                     // Skip ILDB
  131.  
  132.  
  133.  
  134.         #include "load_Cur_UV_8x8T.asm"                         // Load transposed data 8x8
  135. //      #include "load_Left_UV_2x8T.asm"
  136.         #include "load_Top_UV_8x2.asm"                          // Load top MB (8x2) Y data from memory if exists
  137.  
  138.         #include "Transpose_Cur_UV_8x8.asm"
  139. //      #include "Transpose_Left_UV_2x8.asm"
  140.  
  141.  
  142.         //---------- Perform vertical ILDB filting on UV ----------
  143.         #include "AVC_ILDB_Filter_UV_v.asm"    
  144.         //---------------------------------------------------------
  145.  
  146.         #include "save_Left_UV_8x2T.asm"                        // Write left MB (2x8) Y data to memory if exists
  147.         #include "Transpose_Cur_UV_8x8.asm"                     // Transpose a MB for horizontal edge de-blocking
  148.  
  149.         //---------- Perform horizontal ILDB filting on UV ----------
  150.         #include "AVC_ILDB_Filter_UV_h.asm"    
  151.         //-----------------------------------------------------------
  152.  
  153.         #include "save_Cur_UV_8x8.asm"                          // Write 8x8
  154.         #include "save_Top_UV_8x2.asm"                          // Write top MB (8x2) if not the top row
  155.  
  156.         //---------- Write right most 4 columns of cur MB to URB ----------
  157.         // Transpose the right most 2 cols 2x8 (word) in GRF to 8x2 in BUF_D.  It is 2 left most cols in cur MB.
  158.         #include "Transpose_Cur_UV_2x8.asm"                                            
  159.                
  160. ILDB_LABEL(WRITE_URB_UV):
  161.         mov (8)         m1<1>:ud                LEFT_TEMP_D(1)<8;8,1>                   // Copy 1 GRF to 1 URB entry (U+V)
  162.        
  163.         #include "writeURB_UV_Child.asm"       
  164.         //-----------------------------------------------------------------
  165.  
  166.         //=========== Check write commit of the last write ============
  167.     mov (8)     WritebackResponse(0)<1>         WritebackResponse(0)   
  168.  
  169. ILDB_LABEL(POST_ILDB_UV):
  170.         //---------------------------------            
  171.        
  172.         // Send notification thru Gateway to root thread, update chroma Status[CurRow]
  173.         #include "AVC_ILDB_ForwardMsg.asm"
  174.  
  175. #if !defined(GW_DCN)            // For non-ILK chipsets
  176.         //child send EOT : Request type = 1
  177.         END_CHILD_THREAD
  178. #endif  // !defined(DEV_ILK)
  179.        
  180.         // The thread finishs here
  181.         //------------------------------------------------------------------------------
  182.        
  183. ILDB_LABEL(READ_FOR_URB_UV):
  184.         // Still need to prepare URB data for the right neighbor MB
  185.         #include "load_Cur_UV_Right_Most_2x8.asm"               // Load cur MB ( right most 4x16) Y data from memory
  186.         #include "Transpose_Cur_UV_Right_Most_2x8.asm"                                         
  187. //      jmpi ILDB_LABEL(WRITE_URB_UV)
  188.  
  189.         mov (8)         m1<1>:ud                LEFT_TEMP_D(1)<8;8,1>                   // Copy 1 GRF to 1 URB entry (U+V)
  190.        
  191.         #include "writeURB_UV_Child.asm"       
  192.         //-----------------------------------------------------------------
  193.  
  194.         // Send notification thru Gateway to root thread, update chroma Status[CurRow]
  195.         #include "AVC_ILDB_ForwardMsg.asm"
  196.  
  197. #if !defined(GW_DCN)            // For non-ILK chipsets
  198.         //child send EOT : Request type = 1
  199.         END_CHILD_THREAD
  200. #endif  // !defined(DEV_ILK)
  201.        
  202.         // The thread finishs here
  203.         //------------------------------------------------------------------------------
  204.        
  205.        
  206.         ////////////////////////////////////////////////////////////////////////////////
  207.         // Include other subrutines being called
  208. //      #include "AVC_ILDB_Luma_Core.asm"
  209.         #include "AVC_ILDB_Chroma_Core.asm"
  210.  
  211.        
  212. #if !defined(COMBINED_KERNEL)           // For standalone kernel only
  213. .end_code
  214.  
  215. .end_kernel
  216. #endif
  217.