Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * All Video Processing kernels
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the
  7.  * "Software"), to deal in the Software without restriction, including
  8.  * without limitation the rights to use, copy, modify, merge, publish,
  9.  * distribute, sub license, and/or sell copies of the Software, and to
  10.  * permit persons to whom the Software is furnished to do so, subject to
  11.  * the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the
  14.  * next paragraph) shall be included in all copies or substantial portions
  15.  * of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  20.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  21.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  * This file was originally licensed under the following license
  26.  *
  27.  *  Licensed under the Apache License, Version 2.0 (the "License");
  28.  *  you may not use this file except in compliance with the License.
  29.  *  You may obtain a copy of the License at
  30.  *
  31.  *      http://www.apache.org/licenses/LICENSE-2.0
  32.  *
  33.  *  Unless required by applicable law or agreed to in writing, software
  34.  *  distributed under the License is distributed on an "AS IS" BASIS,
  35.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  36.  *  See the License for the specific language governing permissions and
  37.  *  limitations under the License.
  38.  *
  39.  * Authors:
  40.  *    Halley Zhao <halley.zhao@intel.com>
  41.  */
  42.  
  43. // Module name: PL16x8_PL8x4.asm
  44. //----------------------------------------------------------------
  45.  
  46. #include "RGBX_Load_16x8.inc"
  47.  
  48. #if (0)
  49.     #define nTEMP0          34        // transformation coefficient
  50.     #define nTEMP1          35        // one row of Y (first half register is used)
  51.     #define nTEMP2          36        // first  half of one row
  52.     #define nTEMP3          37        // second half of one row
  53.     #define nTEMP4          38        // mul and add
  54.     #define nTEMP5          39        // mul and add
  55.     #define nTEMP6          40        // mul and add
  56.     #define nTEMP7          41        // mul and add
  57.     #define nTEMP8          42        // sum of mul
  58.     #define nTEMP10         44
  59.     #define nTEMP12         46
  60.     #define nTEMP14         48
  61.     #define nTEMP16         50
  62.     #define nTEMP17         51
  63.     #define nTEMP18         52
  64.    
  65.     #define nTEMP24         58
  66. #endif
  67.  
  68. $for(0; <nY_NUM_OF_ROWS; 1) {
  69.     // BGRX | B | G | R | X |
  70.     // ###### do on row for Y
  71.     // #### mul and add
  72.     mul (16)  REG2(r, nTEMP4, 0)<1>:uw      r[SRC_RGBA_OFFSET_1, %1*32 +  0]<0; 16,1>:ub        ubRGB_to_Y_Coef_Fix<0;4,1>:ub
  73.     mul (16)  REG2(r, nTEMP5, 0)<1>:uw      r[SRC_RGBA_OFFSET_1, %1*32 + 16]<0; 16,1>:ub        ubRGB_to_Y_Coef_Fix<0;4,1>:ub
  74.     mul (16)  REG2(r, nTEMP6, 0)<1>:uw      r[SRC_RGBA_OFFSET_2, %1*32 +  0]<0; 16,1>:ub        ubRGB_to_Y_Coef_Fix<0;4,1>:ub
  75.     mul (16)  REG2(r, nTEMP7, 0)<1>:uw      r[SRC_RGBA_OFFSET_2, %1*32 + 16]<0; 16,1>:ub        ubRGB_to_Y_Coef_Fix<0;4,1>:ub
  76.  
  77.     add (4)   REG2(r, nTEMP4, 0)<4>:uw      REG2(r, nTEMP4, 0)<0;4,4>:uw      REG2(r, nTEMP4, 1)<0;4,4>:uw      
  78.     add (4)   REG2(r, nTEMP5, 0)<4>:uw      REG2(r, nTEMP5, 0)<0;4,4>:uw      REG2(r, nTEMP5, 1)<0;4,4>:uw      
  79.     add (4)   REG2(r, nTEMP6, 0)<4>:uw      REG2(r, nTEMP6, 0)<0;4,4>:uw      REG2(r, nTEMP6, 1)<0;4,4>:uw      
  80.     add (4)   REG2(r, nTEMP7, 0)<4>:uw      REG2(r, nTEMP7, 0)<0;4,4>:uw      REG2(r, nTEMP7, 1)<0;4,4>:uw      
  81.     add (4)   REG2(r, nTEMP4, 0)<4>:uw      REG2(r, nTEMP4, 0)<0;4,4>:uw      REG2(r, nTEMP4, 2)<0;4,4>:uw      
  82.     add (4)   REG2(r, nTEMP5, 0)<4>:uw      REG2(r, nTEMP5, 0)<0;4,4>:uw      REG2(r, nTEMP5, 2)<0;4,4>:uw      
  83.     add (4)   REG2(r, nTEMP6, 0)<4>:uw      REG2(r, nTEMP6, 0)<0;4,4>:uw      REG2(r, nTEMP6, 2)<0;4,4>:uw      
  84.     add (4)   REG2(r, nTEMP7, 0)<4>:uw      REG2(r, nTEMP7, 0)<0;4,4>:uw      REG2(r, nTEMP7, 2)<0;4,4>:uw      
  85.  
  86.     // ####  write Y to the 1 row
  87.     mov (4)  REG2(r, nTEMP8,  0)<1>:uw    REG2(r, nTEMP4, 0)<0; 4, 4>:uw
  88.     mov (4)  REG2(r, nTEMP8,  4)<1>:uw    REG2(r, nTEMP5, 0)<0; 4, 4>:uw
  89.     mov (4)  REG2(r, nTEMP8,  8)<1>:uw    REG2(r, nTEMP6, 0)<0; 4, 4>:uw
  90.     mov (4)  REG2(r, nTEMP8, 12)<1>:uw    REG2(r, nTEMP7, 0)<0; 4, 4>:uw
  91.     add (16) REG2(r, nTEMP8,  0)<1>:uw    REG2(r, nTEMP8, 0)<0; 16, 1>:uw    0x1080:uw
  92.     mov (16) REG2(r, nTEMP8,  0)<1>:ub    REG2(r, nTEMP8, 1)<0; 16, 2>:ub
  93.     mov (16) uwDEST_Y(%1)<1>  REG2(r,nTEMP8, 0)<0;16,1>:ub
  94.  
  95.     // ######  do one row for U
  96.     // #### mul and add
  97.     mul (16)  REG2(r, nTEMP4, 0)<1>:w      r[SRC_RGBA_OFFSET_1, %1*32 +  0]<0; 16,1>:ub        bRGB_to_U_Coef_Fix<0;4,1>:b
  98.     mul (16)  REG2(r, nTEMP5, 0)<1>:w      r[SRC_RGBA_OFFSET_1, %1*32 + 16]<0; 16,1>:ub        bRGB_to_U_Coef_Fix<0;4,1>:b
  99.     mul (16)  REG2(r, nTEMP6, 0)<1>:w      r[SRC_RGBA_OFFSET_2, %1*32 +  0]<0; 16,1>:ub        bRGB_to_U_Coef_Fix<0;4,1>:b
  100.     mul (16)  REG2(r, nTEMP7, 0)<1>:w      r[SRC_RGBA_OFFSET_2, %1*32 + 16]<0; 16,1>:ub        bRGB_to_U_Coef_Fix<0;4,1>:b
  101.    
  102.     add (4)   REG2(r, nTEMP4, 0)<4>:w      REG2(r, nTEMP4, 0)<0;4,4>:w      REG2(r, nTEMP4, 1)<0;4,4>:w      
  103.     add (4)   REG2(r, nTEMP5, 0)<4>:w      REG2(r, nTEMP5, 0)<0;4,4>:w      REG2(r, nTEMP5, 1)<0;4,4>:w      
  104.     add (4)   REG2(r, nTEMP6, 0)<4>:w      REG2(r, nTEMP6, 0)<0;4,4>:w      REG2(r, nTEMP6, 1)<0;4,4>:w      
  105.     add (4)   REG2(r, nTEMP7, 0)<4>:w      REG2(r, nTEMP7, 0)<0;4,4>:w      REG2(r, nTEMP7, 1)<0;4,4>:w      
  106.     add (4)   REG2(r, nTEMP4, 0)<4>:w      REG2(r, nTEMP4, 0)<0;4,4>:w      REG2(r, nTEMP4, 2)<0;4,4>:w      
  107.     add (4)   REG2(r, nTEMP5, 0)<4>:w      REG2(r, nTEMP5, 0)<0;4,4>:w      REG2(r, nTEMP5, 2)<0;4,4>:w      
  108.     add (4)   REG2(r, nTEMP6, 0)<4>:w      REG2(r, nTEMP6, 0)<0;4,4>:w      REG2(r, nTEMP6, 2)<0;4,4>:w      
  109.     add (4)   REG2(r, nTEMP7, 0)<4>:w      REG2(r, nTEMP7, 0)<0;4,4>:w      REG2(r, nTEMP7, 2)<0;4,4>:w      
  110.  
  111.     // #### write U to the 1 row
  112.     mov (4)  REG2(r, nTEMP8,  0)<1>:w    REG2(r, nTEMP4, 0)<0; 4, 4>:w
  113.     mov (4)  REG2(r, nTEMP8,  4)<1>:w    REG2(r, nTEMP5, 0)<0; 4, 4>:w
  114.     mov (4)  REG2(r, nTEMP8,  8)<1>:w    REG2(r, nTEMP6, 0)<0; 4, 4>:w
  115.     mov (4)  REG2(r, nTEMP8, 12)<1>:w    REG2(r, nTEMP7, 0)<0; 4, 4>:w
  116.     add (16) REG2(r, nTEMP8,  0)<1>:uw    REG2(r, nTEMP8, 0)<0; 16, 1>:w    0x8080:uw
  117.     mov (16) REG2(r, nTEMP8,  0)<1>:ub    REG2(r, nTEMP8, 1)<0; 16, 2>:ub
  118.     mov (16) uwDEST_U(%1)<1>  REG2(r,nTEMP8, 0)<0;16,1>:ub
  119.  
  120.     // ###### do one row for V
  121.     // #### mul and add
  122.     mul (16)  REG2(r, nTEMP4, 0)<1>:w      r[SRC_RGBA_OFFSET_1, %1*32 +  0]<0; 16,1>:ub        bRGB_to_V_Coef_Fix<0;4,1>:b
  123.     mul (16)  REG2(r, nTEMP5, 0)<1>:w      r[SRC_RGBA_OFFSET_1, %1*32 + 16]<0; 16,1>:ub        bRGB_to_V_Coef_Fix<0;4,1>:b
  124.     mul (16)  REG2(r, nTEMP6, 0)<1>:w      r[SRC_RGBA_OFFSET_2, %1*32 +  0]<0; 16,1>:ub        bRGB_to_V_Coef_Fix<0;4,1>:b
  125.     mul (16)  REG2(r, nTEMP7, 0)<1>:w      r[SRC_RGBA_OFFSET_2, %1*32 + 16]<0; 16,1>:ub        bRGB_to_V_Coef_Fix<0;4,1>:b
  126.    
  127.     add (4)   REG2(r, nTEMP4, 0)<4>:w      REG2(r, nTEMP4, 0)<0;4,4>:w      REG2(r, nTEMP4, 1)<0;4,4>:w      
  128.     add (4)   REG2(r, nTEMP5, 0)<4>:w      REG2(r, nTEMP5, 0)<0;4,4>:w      REG2(r, nTEMP5, 1)<0;4,4>:w      
  129.     add (4)   REG2(r, nTEMP6, 0)<4>:w      REG2(r, nTEMP6, 0)<0;4,4>:w      REG2(r, nTEMP6, 1)<0;4,4>:w      
  130.     add (4)   REG2(r, nTEMP7, 0)<4>:w      REG2(r, nTEMP7, 0)<0;4,4>:w      REG2(r, nTEMP7, 1)<0;4,4>:w      
  131.     add (4)   REG2(r, nTEMP4, 0)<4>:w      REG2(r, nTEMP4, 0)<0;4,4>:w      REG2(r, nTEMP4, 2)<0;4,4>:w      
  132.     add (4)   REG2(r, nTEMP5, 0)<4>:w      REG2(r, nTEMP5, 0)<0;4,4>:w      REG2(r, nTEMP5, 2)<0;4,4>:w      
  133.     add (4)   REG2(r, nTEMP6, 0)<4>:w      REG2(r, nTEMP6, 0)<0;4,4>:w      REG2(r, nTEMP6, 2)<0;4,4>:w      
  134.     add (4)   REG2(r, nTEMP7, 0)<4>:w      REG2(r, nTEMP7, 0)<0;4,4>:w      REG2(r, nTEMP7, 2)<0;4,4>:w      
  135.  
  136.     // #### write V to the 1 row
  137.     mov (4)  REG2(r, nTEMP8,  0)<1>:w    REG2(r, nTEMP4, 0)<0; 4, 4>:w
  138.     mov (4)  REG2(r, nTEMP8,  4)<1>:w    REG2(r, nTEMP5, 0)<0; 4, 4>:w
  139.     mov (4)  REG2(r, nTEMP8,  8)<1>:w    REG2(r, nTEMP6, 0)<0; 4, 4>:w
  140.     mov (4)  REG2(r, nTEMP8, 12)<1>:w    REG2(r, nTEMP7, 0)<0; 4, 4>:w
  141.     add (16) REG2(r, nTEMP8,  0)<1>:uw    REG2(r, nTEMP8, 0)<0; 16, 1>:w    0x8080:uw
  142.     mov (16) REG2(r, nTEMP8,  0)<1>:ub    REG2(r, nTEMP8, 1)<0; 16, 2>:ub
  143.     mov (16) uwDEST_V(%1)<1>  REG2(r,nTEMP8, 0)<0;16,1>:ub
  144. }
  145.  
  146.