Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © <2010>, Intel Corporation.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * This file was originally licensed under the following license
  25.  *
  26.  *  Licensed under the Apache License, Version 2.0 (the "License");
  27.  *  you may not use this file except in compliance with the License.
  28.  *  You may obtain a copy of the License at
  29.  *
  30.  *      http://www.apache.org/licenses/LICENSE-2.0
  31.  *
  32.  *  Unless required by applicable law or agreed to in writing, software
  33.  *  distributed under the License is distributed on an "AS IS" BASIS,
  34.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  35.  *  See the License for the specific language governing permissions and
  36.  *  limitations under the License.
  37.  *
  38.  */
  39. //////////////////////////////////////////////////////////////////////////////////////////
  40. //      Module name: Transpose_Y_16x16.asm
  41. //     
  42. //      Transpose Y 16x16 block.
  43. //
  44. //----------------------------------------------------------------------------------------
  45. //  Symbols need to be defined before including this module
  46. //
  47. //      Source region is :ub
  48. //      SRC_YB:                 SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub    // 8 GRFs
  49. //
  50. //  Temp buffer:
  51. //      CUR_TEMP_B:             BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub             // 8 GRFs
  52. //
  53. //////////////////////////////////////////////////////////////////////////////////////////
  54.  
  55. #if defined(_DEBUG)
  56.         mov             (1)             EntrySignatureC:w                       0xDDDA:w
  57. #endif
  58.  
  59.  
  60. // Transpose Y (16x16 bytes)
  61.  
  62. // The first step
  63. mov (16)        CUR_TEMP_B(0,0)<1>              SRC_YB(0,0)<16;4,1>             { NoDDClr }
  64. mov (16)        CUR_TEMP_B(0,16)<1>             SRC_YB(2,0)<16;4,1>             { NoDDChk }
  65. mov (16)        CUR_TEMP_B(1,0)<1>              SRC_YB(4,0)<16;4,1>             { NoDDClr }
  66. mov (16)        CUR_TEMP_B(1,16)<1>             SRC_YB(6,0)<16;4,1>             { NoDDChk }
  67.  
  68. mov (16)        CUR_TEMP_B(2,0)<1>              SRC_YB(0,4)<16;4,1>             { NoDDClr }
  69. mov (16)        CUR_TEMP_B(2,16)<1>             SRC_YB(2,4)<16;4,1>             { NoDDChk }
  70. mov (16)        CUR_TEMP_B(3,0)<1>              SRC_YB(4,4)<16;4,1>             { NoDDClr }
  71. mov (16)        CUR_TEMP_B(3,16)<1>             SRC_YB(6,4)<16;4,1>             { NoDDChk }
  72.  
  73. mov (16)        CUR_TEMP_B(4,0)<1>              SRC_YB(0,8)<16;4,1>             { NoDDClr }
  74. mov (16)        CUR_TEMP_B(4,16)<1>             SRC_YB(2,8)<16;4,1>             { NoDDChk }
  75. mov (16)        CUR_TEMP_B(5,0)<1>              SRC_YB(4,8)<16;4,1>             { NoDDClr }
  76. mov (16)        CUR_TEMP_B(5,16)<1>             SRC_YB(6,8)<16;4,1>             { NoDDChk }
  77.  
  78. mov (16)        CUR_TEMP_B(6,0)<1>              SRC_YB(0,12)<16;4,1>    { NoDDClr }
  79. mov (16)        CUR_TEMP_B(6,16)<1>             SRC_YB(2,12)<16;4,1>    { NoDDChk }
  80. mov (16)        CUR_TEMP_B(7,0)<1>              SRC_YB(4,12)<16;4,1>    { NoDDClr }
  81. mov (16)        CUR_TEMP_B(7,16)<1>             SRC_YB(6,12)<16;4,1>    { NoDDChk }
  82.  
  83. // The second step
  84. mov (16)        SRC_YB(0,0)<1>          CUR_TEMP_B(0,0)<32;8,4>         { NoDDClr }
  85. mov (16)        SRC_YB(0,16)<1>         CUR_TEMP_B(0,1)<32;8,4>         { NoDDChk }
  86. mov (16)        SRC_YB(1,0)<1>          CUR_TEMP_B(0,2)<32;8,4>         { NoDDClr }
  87. mov (16)        SRC_YB(1,16)<1>         CUR_TEMP_B(0,3)<32;8,4>         { NoDDChk }
  88.  
  89. mov (16)        SRC_YB(2,0)<1>          CUR_TEMP_B(2,0)<32;8,4>         { NoDDClr }
  90. mov (16)        SRC_YB(2,16)<1>         CUR_TEMP_B(2,1)<32;8,4>         { NoDDChk }
  91. mov (16)        SRC_YB(3,0)<1>          CUR_TEMP_B(2,2)<32;8,4>         { NoDDClr }
  92. mov (16)        SRC_YB(3,16)<1>         CUR_TEMP_B(2,3)<32;8,4>         { NoDDChk }
  93.  
  94. mov (16)        SRC_YB(4,0)<1>          CUR_TEMP_B(4,0)<32;8,4>         { NoDDClr }
  95. mov (16)        SRC_YB(4,16)<1>         CUR_TEMP_B(4,1)<32;8,4>         { NoDDChk }
  96. mov (16)        SRC_YB(5,0)<1>          CUR_TEMP_B(4,2)<32;8,4>         { NoDDClr }
  97. mov (16)        SRC_YB(5,16)<1>         CUR_TEMP_B(4,3)<32;8,4>         { NoDDChk }
  98.  
  99. mov (16)        SRC_YB(6,0)<1>          CUR_TEMP_B(6,0)<32;8,4>         { NoDDClr }
  100. mov (16)        SRC_YB(6,16)<1>         CUR_TEMP_B(6,1)<32;8,4>         { NoDDChk }
  101. mov (16)        SRC_YB(7,0)<1>          CUR_TEMP_B(6,2)<32;8,4>         { NoDDClr }
  102. mov (16)        SRC_YB(7,16)<1>         CUR_TEMP_B(6,3)<32;8,4>         { NoDDChk }
  103.  
  104. // Y is transposed.
  105.