Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © <2010>, Intel Corporation.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * This file was originally licensed under the following license
  25.  *
  26.  *  Licensed under the Apache License, Version 2.0 (the "License");
  27.  *  you may not use this file except in compliance with the License.
  28.  *  You may obtain a copy of the License at
  29.  *
  30.  *      http://www.apache.org/licenses/LICENSE-2.0
  31.  *
  32.  *  Unless required by applicable law or agreed to in writing, software
  33.  *  distributed under the License is distributed on an "AS IS" BASIS,
  34.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  35.  *  See the License for the specific language governing permissions and
  36.  *  limitations under the License.
  37.  *
  38.  */
  39. //---------- Check dependency and spawn all MBs ----------
  40.  
  41. // Launch the 1st round of child threads for Vertical ILDB
  42. #if defined(_DEBUG)
  43.         mov             (1)             EntrySignature:w                        0x3333:w
  44. #endif
  45.  
  46. //=====================================================================
  47. // Jump Table 1
  48.         // 0 0 ---> Goto ALL_SPAWNED
  49.         // 0 1 ---> Goto ALL_SPAWNED
  50.         // 1 0 ---> Goto SLEEP_ENTRY
  51.         // 1 1 ---> Goto POST_SLEEP
  52.         mov (2)         JumpTable.0<1>:d        0:d                             { NoDDClr }
  53. #if defined(CHROMA_ROOT)               
  54.         mov (1)         JumpTable.2:d   SLEEP_ENTRY_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d             { NoDDClr, NoDDChk }
  55.         mov (1)         JumpTable.3:d   POST_SLEEP_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d              { NoDDChk }
  56. #else
  57.         mov (1)         JumpTable.2:d   SLEEP_ENTRY_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d               { NoDDClr, NoDDChk }
  58.         mov (1)         JumpTable.3:d   POST_SLEEP_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d                { NoDDChk }
  59. #endif
  60. //=====================================================================
  61.  
  62.         mov (2)         f0.0<1>:w               0:w
  63.  
  64.         // Get m0 most of fields ready for URB write
  65.         mov     (8)                     MRF0<1>:ud              MSGSRC.0<8;8,1>:ud
  66.  
  67.         // Add child kernel offset
  68.         add (1)         CT_R0Hdr.2:ud                   r0.2:ud                 CHILD_OFFSET:w
  69.  
  70.         // Init
  71.         mov (1)         Col_Boundary:w                  2:w
  72.         mov (1)         Row_Boundary:w                  LastRow:w
  73.         mov (1)         TopRowForScan:w                 0:w
  74.         mov (2)         OutstandingThreads<1>:w 0:w
  75.  
  76.         // Init Scoreboard  (idle = 0x00FF, busy = 0x0000)
  77.         // Low word is saved col.  High word is busy/idle status
  78.         mov     (16)            GatewayAperture(0)<1>   0x00FF00FF:ud           // Init r6-r7
  79.         mov     (16)            GatewayAperture(2)<1>   0x00FF00FF:ud           // Init r8-r9
  80.         mov     (16)            GatewayAperture(4)<1>   0x00FF00FF:ud           // Init r10-r11
  81.         mov     (16)            GatewayAperture(6)<1>   0x00FF00FF:ud           // Init r12-r13
  82.         mov     (16)            GatewayAperture(8)<1>   0x00FF00FF:ud           // Init r14-r15
  83.  
  84.         mul     (1)                     StatusAddr:w            CurRow:w                4:w             // dword to bytes offset conversion
  85.  
  86.         //=====================================================================
  87.  
  88. //SPAWN_LOOP:
  89.         //===== OutstandingThreads < ThreadLimit ? ============================
  90.         cmp.l.f0.1 (1)  null:w          OutstandingThreads:w    ThreadLimit:w           // Check the thread limit
  91. #if defined(CHROMA_ROOT)
  92.     (f0.1) jmpi         ILDB_LABEL(POST_SLEEP_UV)
  93. #else   // LUMA_ROOT
  94.     (f0.1) jmpi         ILDB_LABEL(POST_SLEEP_Y)
  95. #endif
  96.  
  97. #if defined(CHROMA_ROOT)
  98. ILDB_LABEL(SLEEP_ENTRY_UV):
  99. #else   // LUMA_ROOT
  100. ILDB_LABEL(SLEEP_ENTRY_Y):
  101. #endif
  102.     //===== Goto Sleep ====================================================
  103.     // Either reached max thread limit or no child thread can be spawned due to dependency.
  104.         add     (1)                     OutstandingThreads:w    OutstandingThreads:w    -1:w // Do this before wait is faster
  105.         wait                    n0.0:d                                                                                         
  106.  
  107. #if defined(CHROMA_ROOT)
  108. ILDB_LABEL(POST_SLEEP_UV):
  109. #else   // LUMA_ROOT
  110. ILDB_LABEL(POST_SLEEP_Y):
  111. #endif
  112.         //===== Luma Status[CurRow] == busy ? =====
  113.         cmp.z.f0.0 (1)  null:uw         r[StatusAddr, GatewayApertureB+ScoreBd_Idx]:uw          0:uw                    // Check west neighbor
  114.         cmp.g.f0.1 (1)  null:w          CurCol:w                LastCol:w               // Check if the curCol > LastCol
  115.  
  116. #if defined(CHROMA_ROOT)
  117.         mov     (16)            acc0.0<1>:w             URBOffsetUVBase<0;1,0>:w                        // Add offset to UV base (MBsCntY * URB_EBTRIES_PER_MB)
  118.         mac (1)                 URBOffset:w             CurRow:w                        4:w                             // 4 entries per row
  119. #else
  120.         mul     (1)                     URBOffset:w             CurRow:w                        4:w                             // 4 entries per row
  121. #endif
  122.  
  123. #if defined(CHROMA_ROOT)
  124.         (f0.0) jmpi             ILDB_LABEL(SLEEP_ENTRY_UV)                                                              // Current row has a child thread running, can not spawn a new child thread, go back to sleep
  125.         (f0.1) jmpi             ILDB_LABEL(NEXT_MB_UV)                                                                  // skip MB if the curCol > LastCol
  126. #else   // LUMA_ROOT
  127.         (f0.0) jmpi             ILDB_LABEL(SLEEP_ENTRY_Y)                                                               // Current row has a child thread running, can not spawn a new child thread, go back to sleep
  128.         (f0.1) jmpi             ILDB_LABEL(NEXT_MB_Y)                                                                   // skip MB if the curCol > LastCol
  129. #endif
  130.                
  131.         //========== Spwan a child thread ========================================
  132.         // Save cur col and set Status[CurRow] to busy
  133.         mov (2)                 r[StatusAddr, GatewayApertureB]<1>:uw           CurColB<2;2,1>:ub               // Store the new col
  134.                        
  135.         // Increase OutstandingThreads and ProcessedMBs by 1
  136.         add     (2)                     OutstandingThreads<1>:w         OutstandingThreads<2;2,1>:w             1:w  
  137.  
  138.         #include "AVC_ILDB_SpawnChild.asm"
  139.  
  140.         //===== Find next MB ===================================================
  141. #if defined(CHROMA_ROOT)
  142. ILDB_LABEL(NEXT_MB_UV):
  143. #else   // LUMA_ROOT
  144. ILDB_LABEL(NEXT_MB_Y):
  145. #endif
  146.         // Check pic boundary, results are in f0.0 bit0 and bit1
  147.         cmp.ge.f0.0     (2)     null<1>:w   CurCol<2;2,1>:w     Col_Boundary<2;2,1>:w
  148.  
  149.         // Update TopRowForScan if the curCol = LastCol
  150.         (f0.1) add (1)  TopRowForScan:w         CurRow:w                1:w    
  151.  
  152. //      cmp.l.f0.1 (1)  null<1>:w               ProcessedMBs:w          TotalBlocks:w           // Processed all blocks ?
  153.         // 2 sets compare
  154.         // ProcessedMBs:w < TotalBlocks:w               OutstandingThreads:w < ThreadLimit:wProcessedMBs:w
  155.         // 0 0 ---> Goto ALL_SPAWNED
  156.         // 0 1 ---> Goto ALL_SPAWNED
  157.         // 1 0 ---> Goto SLEEP_ENTRY
  158.         // 1 1 ---> Goto POST_SLEEP
  159.         cmp.l.f0.1 (2)  null<1>:w               OutstandingThreads<2;2,1>:w     ThreadLimit<2;2,1>:w
  160.  
  161.         // Just do it in stalled cycles
  162.         mov (1)         acc0.0:w                4:w
  163.         mac     (1)             StatusAddr:w            CurRow:w                4:w                                             // dword to bytes offset conversion    
  164.         add (2)         CurCol<1>:w             CurCol<2;2,1>:w         StepToNextMB<2;2,1>:b   // CurCol -= 2 and CurRow += 1
  165.                
  166.         // Set f0.0 if turning around is needed, assuming bit 15 - 2 are zeros for correct comparison.
  167.         cmp.nz.f0.0 (1) null<1>:w       f0.0:w          0x01:w
  168.                
  169.         mul (1)         JumpAddr:w              f0.1:w          4:w             // byte offet in dword count
  170.                
  171.         // The next MB is at the row TopRowForScan
  172.         (f0.0) mul (1)          StatusAddr:w    TopRowForScan:w         4:w                             // dword to bytes offset conversion
  173.         (f0.0) mov (1)          CurRow:w                TopRowForScan:w                                                         { NoDDClr }     // Restart from the top row that has MBs not deblocked yet.
  174.         (f0.0) add (1)          CurCol:w                r[StatusAddr, GatewayApertureB]:uw              1:w             { NoDDChk }
  175.        
  176.         //===== Processed all blocks ? =========================================
  177.         // (f0.1) jmpi          SPAWN_LOOP
  178.  
  179.         jmpi    r[JumpAddr, JUMPTABLE_BASE]:d
  180. //JUMP_BASE:
  181.  
  182.         //======================================================================
  183.  
  184.         // All MB are spawned at this point, check for outstanding thread count
  185. #if defined(CHROMA_ROOT)
  186. ILDB_LABEL(ALL_SPAWNED_UV):
  187. #else   // LUMA_ROOT
  188. ILDB_LABEL(ALL_SPAWNED_Y):
  189. #endif
  190.         cmp.e.f0.1 (1)  null:w          OutstandingThreads:w            0:w                     // Check before goto sleep
  191. #if defined(CHROMA_ROOT)
  192.         (f0.1) jmpi             ILDB_LABEL(ALL_DONE_UV)
  193. #else   // LUMA_ROOT
  194.         (f0.1) jmpi             ILDB_LABEL(ALL_DONE_Y)
  195. #endif
  196.        
  197.         wait                    n0.0:d                                                                                          // Wake up by a finished child thread
  198.         add     (1)                     OutstandingThreads:w    OutstandingThreads:w    -1:w
  199.  
  200. #if defined(CHROMA_ROOT)
  201.         // One thread is free and give it to luma thread limit --- Increase luma thread limit by one.
  202.         #include "AVC_ILDB_LumaThrdLimit.asm"
  203. #endif
  204.  
  205. #if defined(CHROMA_ROOT)
  206.     jmpi                        ILDB_LABEL(ALL_SPAWNED_UV)                                                      // Waked up and goto dependency check
  207. #else   // LUMA_ROOT
  208.     jmpi                        ILDB_LABEL(ALL_SPAWNED_Y)                                                       // Waked up and goto dependency check
  209. #endif
  210.  
  211.         // All child threads are finsihed at this point
  212. #if defined(CHROMA_ROOT)
  213. ILDB_LABEL(ALL_DONE_UV):
  214. #else   // LUMA_ROOT
  215. ILDB_LABEL(ALL_DONE_Y):
  216. #endif
  217.