Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * All Video Processing kernels
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the
  7.  * "Software"), to deal in the Software without restriction, including
  8.  * without limitation the rights to use, copy, modify, merge, publish,
  9.  * distribute, sub license, and/or sell copies of the Software, and to
  10.  * permit persons to whom the Software is furnished to do so, subject to
  11.  * the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the
  14.  * next paragraph) shall be included in all copies or substantial portions
  15.  * of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  20.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  21.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24.  *
  25.  * This file was originally licensed under the following license
  26.  *
  27.  *  Licensed under the Apache License, Version 2.0 (the "License");
  28.  *  you may not use this file except in compliance with the License.
  29.  *  You may obtain a copy of the License at
  30.  *
  31.  *      http://www.apache.org/licenses/LICENSE-2.0
  32.  *
  33.  *  Unless required by applicable law or agreed to in writing, software
  34.  *  distributed under the License is distributed on an "AS IS" BASIS,
  35.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  36.  *  See the License for the specific language governing permissions and
  37.  *  limitations under the License.
  38.  *
  39.  */
  40.  
  41. #ifndef COMMON_INC
  42. #define COMMON_INC
  43.  
  44. // Module name: common.inc
  45. //
  46. // Common header file for all Video-Processing kernels
  47. //
  48.  
  49. .default_execution_size (16)
  50. .default_register_type  :ub
  51.  
  52. .reg_count_total        80
  53. .reg_count_payload      4
  54.  
  55.  
  56. //========== Common constants ==========
  57.  
  58. // Bit position constants
  59. #define BIT0    0x01
  60. #define BIT1    0x02
  61. #define BIT2    0x04
  62. #define BIT3    0x08
  63. #define BIT4    0x10
  64. #define BIT5    0x20
  65. #define BIT6    0x40
  66. #define BIT7    0x80
  67. #define BIT8    0x0100
  68. #define BIT9    0x0200
  69. #define BIT10   0x0400
  70. #define BIT11   0x0800
  71. #define BIT12   0x1000
  72. #define BIT13   0x2000
  73. #define BIT14   0x4000
  74. #define BIT15   0x8000
  75. #define BIT16   0x00010000
  76. #define BIT17   0x00020000
  77. #define BIT18   0x00040000
  78. #define BIT19   0x00080000
  79. #define BIT20   0x00100000
  80. #define BIT21   0x00200000
  81. #define BIT22   0x00400000
  82. #define BIT23   0x00800000
  83. #define BIT24   0x01000000
  84. #define BIT25   0x02000000
  85. #define BIT26   0x04000000
  86. #define BIT27   0x08000000
  87. #define BIT28   0x10000000
  88. #define BIT29   0x20000000
  89. #define BIT30   0x40000000
  90. #define BIT31   0x80000000
  91.  
  92. #define nGRFWIB             32      // GRF register width in byte
  93. #define nGRFWIW             16      // GRF register width in word
  94. #define nGRFWID             8       // GRF register width in dword
  95.  
  96. #define nTOP_FIELD          0
  97. #define nBOTTOM_FIELD       1
  98.  
  99. #define nPREVIOUS_FRAME     0       // Previous frame
  100. #define nCURRENT_FRAME      1       // Current frame
  101. #define nNEXT_FRAME         2       // Next frame
  102.  
  103. #ifdef GT
  104. // GT DI Kernel
  105. #else // ILK
  106. // ILK DI Kernel
  107. #endif
  108.  
  109. //===================================
  110.  
  111. //========== Macros ==========
  112. #define REGION(Width,HStride) <Width*HStride;Width,HStride> // Region definition when ExecSize = Width
  113.  
  114. #define RegFile(a) a
  115. #define REG(r,n) _REG(RegFile(r),n)
  116. #define _REG(r,n) __REG(r,n)
  117. #define __REG(r,n) r##n.0
  118. #define REG2(r,n,s) _REG2(RegFile(r),n,s)
  119. #define _REG2(r,n,s) __REG2(r,n,s)
  120. #define __REG2(r,n,s) r##n.##s
  121.  
  122. #define dNULLREG     null<1>:d
  123. #define wNULLREG     null<1>:w
  124.    
  125. #define KERNEL_ID(kernel_ID)    mov NULLREG kernel_ID:ud
  126.  
  127.  
  128. #define NODDCLR                        
  129. #define NODDCLR_NODDCHK        
  130. #define NODDCHK                    
  131.  
  132. //#define NODDCLR                       { NoDDClr }
  133. //#define NODDCLR_NODDCHK       { NoDDClr, NoDDChk }
  134. //#define NODDCHK                               { NoDDChk }
  135.  
  136.  
  137. //========== Defines ====================
  138.  
  139.  
  140. //========== GRF partition ==========
  141. // r0 header            :   r0          (1 GRF)
  142. // Static parameters    :   r1 - r5     (5 GRFS)
  143. // Inline parameters    :   r6 - r7     (2 GRFs)
  144. // MSGSRC               :   r9          (1 GRF)
  145. // Top IO region        :   r10 - r33   (24 GRFS 8 for each component Y,U,V 16X8:w)
  146. // Free space           :   r34 - r55   (22 GRFS)
  147. // Bottom IO region     :   r56 - r79   (24 GRFS 8 for each component Y,U,V 16X8:w)
  148. //===================================
  149.  
  150.  
  151. //========== Static Parameters ==========
  152. // r1
  153. #define fPROCAMP_C0             r1.0    // DWORD 0, Procamp constant C0 in :f
  154. #define wPROCAMP_C0             r1.0    // DWORD 0, Procamp constant C0 in :w
  155. #define NUMBER_0002                                                     r1.1            // DWORD 0, 0x0002 used in procamp for GT
  156. #define udCP_MessageFormat      r1.0    // DWORD 0, bits 2:3 of DWORD. (CE)
  157. #define udCP_StatePointer       r1.0    // DWORD 0, bits 31:5 of DWORD.(CE)
  158.  
  159. #define ubSRC_CF_OFFSET         r1.4    // DWORD 1, byte 0-2. SRC packed color format YUV offset in :ub
  160.  
  161. #define ubDEST_RGB_FORMAT        r1.8    // DWORD 2, byte 0. Dest RGB color format (0:ARGB FF:XRGB)
  162. #define ubDEST_CF_OFFSET        r1.8    // DWORD 2, byte 0-2. Dest packed color format YUV offset in :ub
  163.  
  164. #define fPROCAMP_C1             r1.3    // DWORD 3, Procamp constant C1 in :f  
  165. #define wPROCAMP_C1             r1.6    // DWORD 3, Procamp constant C1 in :w  
  166. #define NUMBER_0100                                                     r1.7            // DWORD 3, 0x0100 used in procamp for GT
  167.  
  168. #define fPROCAMP_C2             r1.4    // DWORD 4, Procamp constant C2 in :f
  169. #define wPROCAMP_C2             r1.8    // DWORD 4, Procamp constant C2 in :w
  170.  
  171. #define uwSPITCH_DIV2           r1.10   // DWORD 5, byte 0-1. statistics surface pitch divided by 2
  172.  
  173. #define fVIDEO_STEP_Y           r1.6    // DWORD 6, :f, AVS normalized reciprocal of Y Scaling factor
  174. #define ubSTMM_SHIFT            r1.24   // DWORD 6, byte 0. Amount of right shift for the DI blending equation
  175. #define ubSTMM_MIN              r1.25   // DWORD 6, byte 1. Min STMM for DI blending equation
  176. #define ubSTMM_MAX              r1.26   // DWORD 6, byte 2. Max STMM for DI blending equation
  177. #define ubTFLD_FIRST            r1.27   // DWORD 6, byte 3. Field parity order
  178.  
  179. #define fPROCAMP_C5             r1.7    // DWORD 7, Procamp constant C3 in :f
  180. #define wPROCAMP_C5             r1.14   // DWORD 7, Procamp constant C3 in :w
  181.  
  182. // r2
  183. #define fPROCAMP_C3             r2.0    // DWORD 0, Procamp constant C4 in :f
  184. #define wPROCAMP_C3             r2.0    // DWORD 0, Procamp constant C4 in :w
  185.                    
  186. #define fCSC_C5                                 r2.2    // DWORD 2. WG+CSC constant C5
  187. #define wCSC_C5                                 r2.4    // DWORD 2. WG+CSC constant C5
  188.  
  189. #define fPROCAMP_C4             r2.3    // DWORD 3, Procamp constant C5 in :f
  190. #define wPROCAMP_C4             r2.6    // DWORD 3, Procamp constant C5 in :w
  191.  
  192. #define fCSC_C8                                 r2.4    // DWORD 4. WG+CSC constant C8
  193. #define wCSC_C8                                 r2.8    // DWORD 4. WG+CSC constant C8
  194. #define fCSC_C9                                 r2.7    // DWORD 7. WG+CSC constant C9
  195. #define wCSC_C9                                 r2.14   // DWORD 7. WG+CSC constant C9
  196.  
  197. // r3
  198. #define fCSC_C0                                 r3.0    // DWORD 0. WG+CSC constant C0
  199. #define wCSC_C0                                 r3.0    // DWORD 0. WG+CSC constant C0
  200.  
  201. #define fSCALING_STEP_RATIO     r3.1    // DWORD 1, = Alpha_X_Scaling_Step / Video_X_scaling_Step :f (blending)
  202. #define fALPHA_STEP_X           r3.1    // DWORD 1, = 1/Scale X, 0.5 = 2x, in :f (blending)
  203.  
  204. #define fALPHA_STEP_Y           r3.2    // DWORD 2, = 1/Scale Y, in :f
  205.  
  206. #define fCSC_C4                                 r3.3    // DWORD 3. WG+CSC constant C4
  207. #define wCSC_C4                                 r3.6    // DWORD 3. WG+CSC constant C4
  208. #define fCSC_C1                                 r3.4    // DWORD 4. WG+CSC constant C1
  209. #define wCSC_C1                                 r3.8    // DWORD 4. WG+CSC constant C1
  210.  
  211. #define wSRC_H_ORI_OFFSET       r3.10   // DWORD 5, bytes 0,1 :w    
  212. #define wSRC_V_ORI_OFFSET       r3.11   // DWORD 5, bytes 2,3 :w
  213.  
  214. #define dCOLOR_PIXEL            r3.6    // DWORD 6. Color pixel for Colorfill
  215.  
  216. #define fCSC_C2                                 r3.6    // DWORD 6. WG+CSC constant C2
  217. #define wCSC_C2                                 r3.12   // DWORD 6. WG+CSC constant C2
  218. #define fCSC_C3                                 r3.7    // DWORD 7. WG+CSC constant C3
  219. #define wCSC_C3                                 r3.14   // DWORD 7. WG+CSC constant C3
  220.  
  221. // r4
  222. #define fCSC_C6                                 r4.0    // DWORD 0. WG+CSC constant C6
  223. #define wCSC_C6                                 r4.0    // DWORD 0. WG+CSC constant C6
  224.  
  225. #define wFRAME_ENDX             r4.2    // DWORD 1, word 0. Horizontal end = Origin+Width (in pixels)(for multiple blocks)
  226. #define wNUM_BLKS               r4.3    // DWORD 1, word 1. Number of blocks to process (for multiple blocks)
  227.  
  228. #define wCOPY_ORIX              r4.5    // DWORD 2, word 1. A copy of X origin (for multiple blocks)
  229. #define uwNLAS_ENABLE           r4.4    // DWORD 2, bit 15, NLAS enble bit
  230.  
  231. #define fCSC_C7                                 r4.3    // DWORD 3. WG+CSC constant C7
  232. #define wCSC_C7                                 r4.6    // DWORD 3. WG+CSC constant C7
  233. #define fCSC_C10                                r4.4    // DWORD 4. WG+CSC constant C10
  234. #define wCSC_C10                                r4.8    // DWORD 4. WG+CSC constant C10
  235.  
  236. #define fFRAME_VID_ORIX         r4.5    // DWORD 5, Frame horizontal origin normalized for scale kernel
  237.  
  238. #define fFRAME_ALPHA_ORIX       r4.6    // DWORD 6. Normalized alpha horiz origin for the frame
  239.  
  240. #define fCSC_C11                                r4.7    // DWORD 7. WG+CSC constant C11
  241. #define wCSC_C11                                r4.14   // DWORD 7. WG+CSC constant C11
  242.  
  243. //========================================
  244.  
  245. //========== Inline parameters ===========
  246. // r5
  247. #define wORIX                   r5.0    // DWORD 0, byte 0-1. :w, Destination Block Horizontal Origin in pel
  248. #define wORIY                   r5.1    // DWORD 0, byte 2-3. :w, Destination Block Vertical   Origin in pel
  249.  
  250. #define fSRC_VID_H_ORI          r5.1    // DWORD 1, :f, SRC Y horizontal origin normalized for scale kernel
  251.  
  252. #define fSRC_VID_V_ORI          r5.2    // DWORD 2, :f, SRC Y vertical origin normalized for scale kernel
  253.  
  254. #define fSRC_ALPHA_H_ORI        r5.3    // DWORD 3, :f, Normalized alpha horizontal origin
  255.  
  256. #define fSRC_ALPHA_V_ORI        r5.4    // DWORD 4, :f, Normalized alpha vertical origin
  257.  
  258. #define uwALPHA_MASK_X          r5.10   // DWORD 5, byte 0-1 :w, H. alpha mask
  259. #define ubALPHA_MASK_Y          r5.22   // DWORD 5, byte 2.  :ub,V. alpha mask
  260. #define ubBLK_CNT_X             r5.23   // DWORD 5, byte 3,  :ub, Horizontal Block Count per thread
  261.  
  262. // mask is used for each block. it will be reloaded from r6 below for the last block.
  263. #define udBLOCK_MASK            r5.6    // DWORD 6
  264. #define uwBLOCK_MASK_H          r5.12   // DWORD 6, byte 0-1 :uw, Block horizontal mask used in non-DWord aligned kernels
  265. #define ubBLOCK_MASK_V          r5.26   // DWORD 6, byte 2   :ub, Block vertical mask used in non-DWord aligned kernels
  266. #define ubNUM_BLKS              r5.27   // DWORD 6, byte 3,  :ub, Total Block Count per thread
  267.  
  268. #define fVIDEO_STEP_X           r5.7    // DWORD 7. :f, AVS normalized reciprocal of X Scaling factor
  269.  
  270. // r6
  271. #define fVIDEO_STEP_DELTA       r6.0    // DWORD 0. :f, AVS normalized delta between 2 adjacent scaling steps (used for non-linear scaling)
  272.  
  273. // mask is used for the last block (assume only M*1 and 1*N block partation aer supported)
  274. #define udBLOCK_MASK_2            r6.1    // DWORD 1
  275. #define uwBLOCK_MASK_H_RIGHT      r6.2    // DWORD 1, byte 0-1 :uw, Block horizontal mask used in non-DWord aligned kernels (right)
  276. #define ubBLOCK_MASK_V_BOTTOM     r6.6    // DWORD 1, byte 2   :ub, Block vertical mask used in non-DWord aligned kernels
  277. #define uwBLOCK_MASK_H_MIDDLE     r6.4    // DWORD 2, byte 0-1 :uw, Block horizontal mask used in non-DWord aligned kernels (left)
  278.  
  279.  
  280. //====================== Binding table =========================================
  281.  
  282. #if defined(DNDI)
  283.     // DNDI Surface Binding Table
  284.     //#define nBI_SRC_CURR        0       // Current input frame surface
  285.     //#define nBI_SRC_PRIV        1       // Denoised previous input frame surface
  286.     //#define nBI_SRC_STAT        2       // Statistics input surface (STMM / Noise motion history)
  287.     //#define nBI_DEST_1ST        3       // 1st deinterlaced output frame surface
  288. //    #define nBI_DEST_YUV        3       // Dest frame YUV (for DN only)
  289.     //#define nBI_DEST_Y          3       // Dest frame Y (for DN only)
  290.     //#define nBI_DEST_2ND        4       // 2nd deinterlaced output frame surface
  291.     //#define nBI_DEST_DN_CURR    6       // Denoised current output frame surface
  292.     //#define nBI_DEST_STAT       7       // Statistics output surface (STMM / Noise motion history)
  293. //    #define nBI_DEST_U          8       // Dest frame U (for DN only)
  294. //    #define nBI_DEST_V          9       // Dest frame V (for DN only)
  295. //    #define nBI_SRC_U          10       // Src frame U (for DN only)
  296. //    #define nBI_SRC_V          11       // Src frame V (for DN only)
  297. //    #define nBI_SRC_UV         10       // Current src frame for UV
  298.    
  299. #endif
  300.  
  301. #if defined(INPUT_PL3)
  302.     // PL3 Surface Binding Table
  303. //    #define nBI_SRC_ALPHA       0       // Alpha
  304. //    #define nBI_SRC_Y           1       // Current src frame
  305. //    #define nBI_SRC_U           2       // Current src frame
  306. //    #define nBI_SRC_V           3       // Current src frame
  307. //    #define nBI_DEST_Y         10       // Dest frame
  308. //    #define nBI_DEST_U         11       // Dest frame
  309. //    #define nBI_DEST_V         12       // Dest frame
  310. //    #define nBI_DEST_YUV        7       // Dest frame
  311. //    #define nBI_DEST_RGB        7       // same num as BI_DEST_YUV, never used at the same time
  312. #endif
  313.  
  314. #if defined(INPUT_PL2)
  315.     // PL2 Surface Binding Table
  316. //    #define nBI_SRC_ALPHA       0       // Alpha
  317. //    #define nBI_SRC_Y           1       // Current src frame for Y + offseted UV
  318. //    #define nBI_SRC_YUV         1       // Current src frame for YUV in case of NV12_AVS
  319. //    #define nBI_SRC_UV          2       // Current src frame for UV
  320. //    #define nBI_DEST_YUV        7       // Current dest frame for Y + offseted UV
  321. //    #define nBI_DEST_RGB        7       // same num as BI_DEST_YUV, never used at the same time
  322. //    #define nBI_DEST_Y         10       // Dest frame
  323. //    #define nBI_DEST_U         11       // Dest frame
  324. //    #define nBI_DEST_V         12       // Dest frame
  325. #endif
  326.  
  327. #if defined(INPUT_PA) || defined(COLORFILL)
  328.     // Packed Surface Binding Table
  329. //    #define nBI_SRC_ALPHA       0       // Alpha    
  330. //    #define nBI_SRC_YUV         1       // Current src frame
  331. //    #define nBI_DEST_YUV        3       // Dest frame
  332. //    #define nBI_DEST_RGB        3       // same num as BI_DEST_YUV, never used at the same time
  333. #endif
  334.  
  335.  
  336. //supper binding table
  337. #define nBI_ALPHA_SRC                   0
  338. #define nBI_CURRENT_SRC_YUV             1
  339. #define nBI_FIELD_COPY_SRC_1_YUV        1
  340. #define nBI_CURRENT_SRC_Y               1
  341. #define nBI_FIELD_COPY_SRC_1_Y          1
  342. #define nBI_CURRENT_SRC_RGB             1  
  343. #define nBI_CURRENT_SRC_UV              2
  344. #define nBI_FIELD_COPY_SRC_1_UV         2
  345. #define nBI_CURRENT_SRC_U               2
  346. #define nBI_FIELD_COPY_SRC_1_U          2
  347. #define nBI_CURRENT_SRC_V               3
  348. #define nBI_FIELD_COPY_SRC_1_V          3
  349. #define nBI_TEMPORAL_REFERENCE_YUV      4
  350. #define nBI_FIELD_COPY_SRC_2_YUV        4
  351. #define nBI_TEMPORAL_REFERENCE_Y        4
  352. #define nBI_FIELD_COPY_SRC_2_Y          4
  353. #define nBI_CURRENT_SRC_YUV_HW_DI       4
  354. #define nBI_TEMPORAL_REFERENCE_UV       5
  355. #define nBI_FIELD_COPY_SRC_2_UV         5
  356. #define nBI_TEMPORAL_REFERENCE_U        5
  357. #define nBI_FIELD_COPY_SRC_2_U          5
  358. #define nBI_DENOISED_PREV_HW_DI         5
  359. #define nBI_TEMPORAL_REFERENCE_V        6
  360. #define nBI_FIELD_COPY_SRC_2_V          6
  361. #define nBI_STMM_HISTORY                6
  362. #define nBI_DESTINATION_YUV             7
  363. #define nBI_DESTINATION_RGB             7
  364. #define nBI_DESTINATION_Y               7
  365. #define nBI_DESTINATION_UV              8
  366. #define nBI_DESTINATION_U               8
  367. #define nBI_DESTINATION_V               9
  368. #define nBI_DESTINATION_1_YUV           10
  369. #define nBI_DESTINATION_1_Y             10
  370. #define nBI_DESTINATION_1_UV            11
  371. #define nBI_DESTINATION_1_U             11
  372. #define nBI_DESTINATION_1_V             12
  373. #define nBI_DESTINATION_2_YUV           13
  374. #define nBI_DESTINATION_2_Y             13
  375. #define nBI_DESTINATION_2_UV            14
  376. #define nBI_DESTINATION_2_U             14
  377. #define nBI_DESTINATION_2_V             15
  378. #define nBI_STMM_HISTORY_OUTPUT         20
  379. #define nBI_TEMPORAL_REFERENCE_YUV_PDI  21
  380. #define nBI_TEMPORAL_REFERENCE_Y_PDI    21
  381. #define nBI_TEMPORAL_REFERENCE_UV_PDI   22
  382. #define nBI_TEMPORAL_REFERENCE_U_PDI    22
  383. #define nBI_TEMPORAL_REFERENCE_V_PDI    23
  384. #define nBI_SUBVIDEO_YUV                26
  385. #define nBI_SUBVIDEO_Y                  26
  386. #define nBI_SUBVIDEO_UV                 27
  387. #define nBI_SUBVIDEO_U                  27
  388. #define nBI_SUBVIDEO_V                  28
  389. #define nBI_SUBPICTURE_YUV              29
  390. #define nBI_SUBPICTURE_P8               29
  391. #define nBI_SUBPICTURE_A8               30
  392. #define nBI_GRAPHIC_YUV                 31
  393. #define nBI_GRAPHIC_P8                  31
  394. #define nBI_GRAPHIC_A8                  32
  395.  
  396.  
  397.  
  398. //========== Planar Sampler State Table Index ==========
  399. #define nSI_SRC_ALPHA           0x000   // Sampler State for Alpha
  400.  
  401. //Sampler Index for AVS/IEF messages
  402. #define nSI_SRC_Y               0x400   // Sampler State for Y
  403. #define nSI_SRC_U               0x800   // Sampler State for U
  404. #define nSI_SRC_V               0xC00   // Sampler State for V
  405. #define nSI_SRC_UV              0x800   // For NV12 surfaces
  406. #define nSI_SRC_YUV             0x400   // For Packed surfaces  
  407. #define nSI_SRC_RGB             0x400   // For ARGB surfaces
  408.  
  409. //Sampler Index for SIMD16 sampler messages
  410. #define nSI_SRC_SIMD16_Y        0x100   // Sampler State for Y
  411. #define nSI_SRC_SIMD16_U        0x200   // Sampler State for U
  412. #define nSI_SRC_SIMD16_V        0x300   // Sampler State for V
  413. #define nSI_SRC_SIMD16_UV       0x200   // For NV12 surfaces
  414. #define nSI_SRC_SIMD16_YUV      0x100   // For Packed surfaces  
  415. #define nSI_SRC_SIMD16_RGB      0x100   // For ARGB surfaces
  416.  
  417.  
  418.  
  419. // Common Registers
  420. #define pCF_Y_OFFSET            a0.4    // Address register holding Y offset
  421. #define pCF_U_OFFSET            a0.5    // Address register holding U offset
  422. #define pCF_V_OFFSET            a0.6    // Address register holding V offset
  423.  
  424. // #define YUV_ORI             ORIX    // Used by writing packed data to dport
  425.  
  426.  
  427. //================= Message Payload Header fields ==============================
  428. #define IDP     r0.2:ud     // Interface Descriptor Pointer
  429.  
  430. //================= Common Message Descriptor  TBD add common load and save =====
  431. // Message descriptor for dataport media write
  432. #ifdef GT
  433.         // Message Descriptors
  434.                 //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)        
  435.                 //                  1 (header present 1) 0 0 1010 (media block write) 00000
  436.                 //                  00000000 (binding table index - set later)
  437.                 //                = 0x02094000
  438.         #define nDPMW_MSGDSC      0x02094000
  439.         #define nDPMR_MSGDSC      0x02098000  // Data Port Media Block Read Message Descriptor
  440.         // TBD
  441. #else // ILK
  442.         // Message Descriptors
  443.                 //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)        
  444.                 //                  1 (header present 1) 000 0 010 (media block write) 0000
  445.                 //                  00000000 (binding table index - set later)
  446.                 //                = 0x02082000
  447.         #define nDPMW_MSGDSC      0x02082000  // Data Port Media Block Write Message Descriptor
  448.         #define nDPMR_MSGDSC      0x0208A000  // Data Port Media Block Read Message Descriptor
  449. #endif
  450.  
  451. // Message Length defines
  452. #define nMSGLEN_1      0x02000000 // Message Length of 1 GRF for Send
  453. #define nMSGLEN_2      0x04000000 // Message Length of 2 GRF for Send
  454. #define nMSGLEN_4      0x08000000 // Message Length of 4 GRF for Send
  455. #define nMSGLEN_8      0x10000000 // Message Length of 8 GRF for Send
  456.  
  457. // Response Length defines
  458. #define nRESLEN_1      0x00100000 // Message Response Length of 1  GRF from Send
  459. #define nRESLEN_2      0x00200000 // Message Response Length of 2  GRF from Send
  460. #define nRESLEN_3      0x00300000 // Message Response Length of 3  GRF from Send
  461. #define nRESLEN_4      0x00400000 // Message Response Length of 4  GRF from Send
  462. #define nRESLEN_5      0x00500000 // Message Response Length of 5  GRF from Send
  463. #define nRESLEN_8      0x00800000 // Message Response Length of 8  GRF from Send
  464. #define nRESLEN_9      0x00900000 // Message Response Length of 9  GRF from Send
  465. #define nRESLEN_11     0x00B00000 // Message Response Length of 11 GRF from Send
  466. #define nRESLEN_12     0x00C00000 // Message Response Length of 12 GRF from Send
  467. #define nRESLEN_16     0x01000000 // Message Response Length of 16 GRF from Send
  468.  
  469. // Block Width and Height Size defines
  470. #define nBLOCK_WIDTH_4   0x00000003      // Block Width  4
  471. #define nBLOCK_WIDTH_5   0x00000004      // Block Width  5
  472. #define nBLOCK_WIDTH_8   0x00000007      // Block Width  8
  473. #define nBLOCK_WIDTH_9   0x00000008      // Block Width  9
  474. #define nBLOCK_WIDTH_12  0x0000000B      // Block Width  12
  475. #define nBLOCK_WIDTH_16  0x0000000F      // Block Width  16
  476. #define nBLOCK_WIDTH_20  0x00000013      // Block Width  20
  477. #define nBLOCK_WIDTH_32  0x0000001F      // Block Width  32
  478. #define nBLOCK_HEIGHT_1  0x00000000      // Block Height 1
  479. #define nBLOCK_HEIGHT_2  0x00010000      // Block Height 2
  480. #define nBLOCK_HEIGHT_4  0x00030000      // Block Height 4
  481. #define nBLOCK_HEIGHT_5  0x00040000      // Block Height 5
  482. #define nBLOCK_HEIGHT_8  0x00070000      // Block Height 8
  483.  
  484. // Extended Message Descriptors
  485. #define nEXTENDED_MATH      0x1
  486. #define nSMPL_ENGINE        0x2
  487. #define nMESSAGE_GATEWAY    0x3
  488. #define nDATAPORT_READ      0x4
  489. #define nDATAPORT_WRITE     0x5
  490. #define nURB                0x6
  491. #define nTS_EOT             0x27    // with End-Of-Thread bit ON
  492.  
  493. // Common message descriptors:
  494. #ifdef GT
  495.         #define nEOT_MSGDSC       0x02000010  // End of Thread Message Descriptor    
  496.         #define IF_NULL                                         null:uw null:uw null:uw         //for different if instructions on ILK and Gen6
  497. #else //ILK
  498.         #define nEOT_MSGDSC       0x02000000  // End of Thread Message Descriptor        
  499.         #define IF_NULL
  500. #endif    
  501.  
  502.  
  503. //===================== Math Function Control ===================================
  504. #define mfcINV                  0x1     // reciprocal
  505. #define mfcLOG                  0x2     // log
  506. #define mfcEXP                  0x3     // exponent
  507. #define mfcSQRT                 0x4     // square root
  508. #define mfcRSQ                  0x5     // reciprocal square root
  509. #define mfcSIN                  0x6     // sine (in radians)
  510. #define mfcCOS                  0x7     // cosine (in radians)
  511. #define mfcSINCOS               0x8     // dst0 = sin of src0, dst1 = cosine of src0 (in radians) - GT+ ONLY
  512. #define mfcPOW                  0xA     // abs(src0) raised to the src1 power    
  513. #define mfcINT_DIV_QR           0xB     // return quotient and remainder
  514. #define mfcINT_DIV_Q            0xC     // return quotient
  515. #define mfcINT_DIV_R            0xD     // return remainder
  516.  
  517.  
  518. //=================== Message related registers =================================
  519.  
  520. #ifdef GT
  521.         #define udDUMMY_NULL
  522. #else   // _ILK
  523.         #define udDUMMY_NULL    null:ud         // Used in send inst as src0
  524. #endif
  525.  
  526.  
  527. //----------- Message Registers ------------
  528. #define mMSGHDR      m1     // Message Payload Header
  529. #define mMSGHDRY     m1     // Message Payload Header register for Y data
  530. #define mMSGHDRU     m2     // Message Payload Header register for U data
  531. #define mMSGHDRV     m3     // Message Payload Header register for V data
  532. #define mMSGHDRYA    m4     // Second Message Payload Header register for Y data
  533. #define mMSGHDRH     m5     // Message Payload Header register for motion history
  534. #define mMSGHDRY1    m1     // Message Payload Header register for first  Y data
  535. #define mMSGHDRY2    m2     // Message Payload Header register for second Y data
  536. #define mMSGHDRY3    m3     // Message Payload Header register for third  Y data
  537. #define mMSGHDRY4    m4     // Message Payload Header register for fourth Y data
  538. #define mMSGHDRY5    m5     // Message Payload Header register for fifth Y data
  539. #define mMSGHDRY6    m6     // Message Payload Header register for sixth Y data
  540. #define mMSGHDR_EOT  m15    // Dummy Message Register for EOT
  541.  
  542. #define rMSGSRC     r8      // Message source register
  543. #define pMSGDSC     a0.0:ud // Message Descriptor register (type DWORD)
  544.  
  545. #define udMH_ORI    rMSGSRC.0   // Data Port Media Block R/W message header block offset
  546. #define udMH_ORIX   rMSGSRC.0   // Data Port Media Block R/W message header X offset
  547. #define udMH_ORIY   rMSGSRC.1   // Data Port Media Block R/W message header Y offset
  548. #define udMH_SIZE   rMSGSRC.2   // Data Port Media Block R/W message header block width & height
  549.  
  550. //  M2 - M9 for message data payload
  551. .declare    mubMSGPAYLOAD  Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
  552. .declare    muwMSGPAYLOAD  Base=m2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw
  553. .declare    mudMSGPAYLOAD  Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  554. .declare    mfMSGPAYLOAD   Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=f
  555.  
  556. //=================== End of thread instruction ===========================
  557. #ifdef GT
  558.         #define END_THREAD          mov  (8) mMSGHDR_EOT<1>:ud    r0.0<8;8,1>:ud \n\
  559.                                                                 send (1) null<1>:d mMSGHDR_EOT nTS_EOT nEOT_MSGDSC
  560. #else   // ILK                          This should be changed to 1 instruction; I have tested it and it works - vK
  561.         #define END_THREAD          mov  (8) mMSGHDR_EOT<1>:ud    r0.0<8;8,1>:ud \n\
  562.                                 send (1) dNULLREG mMSGHDR_EOT udDUMMY_NULL  nTS_EOT nEOT_MSGDSC:ud
  563. #endif
  564.  
  565.  
  566. //=======================================================================
  567. // Region declarations for SRC and DEST as TOP and BOT
  568.  
  569. // Common I/O regions
  570. #define nREGION_1       1
  571. #define nREGION_2       2
  572.  
  573. //*** These region base GRFs are fixed regardless planar/packed, and data alignment.
  574. //*** Each kernel is responsible to select the correct region declaration below.
  575. //*** YUV regions are not necessarily next to each other.
  576. #define nTOP_Y          10      // r10 - r17  (8 GRFs)
  577. #define nTOP_U          18      // r18 - r25 (8 GRFs)
  578. #define nTOP_V          26      // r26 - r33 (8 GRFs)
  579.  
  580. #define nBOT_Y          56      // r56 - r63 (8 GRFs)
  581. #define nBOT_U          64      // r64 - r71 (8 GRFs)
  582. #define nBOT_V          72      // r72 - r79 (8 GRFs)
  583.  
  584. // Define temp space for any usages
  585. #define nTEMP0          34
  586. #define nTEMP1          35
  587. #define nTEMP2          36
  588. #define nTEMP3          37
  589. #define nTEMP4          38
  590. #define nTEMP5          39
  591. #define nTEMP6          40
  592. #define nTEMP7          41
  593. #define nTEMP8          42
  594. #define nTEMP10         44
  595. #define nTEMP12         46
  596. #define nTEMP14         48
  597. #define nTEMP16         50
  598. #define nTEMP17         51
  599. #define nTEMP18         52
  600.  
  601. #define nTEMP24                 58
  602.  
  603. // Common region 1
  604. .declare ubTOP_Y        Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(16,1) DstRegion=<1> Type=ub
  605. .declare ubTOP_U        Base=REG(r,nTOP_U) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
  606. .declare ubTOP_V        Base=REG(r,nTOP_V) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
  607.                        
  608. .declare uwTOP_Y        Base=REG(r,nTOP_Y) ElementSize=2 SrcRegion=REGION(16,1) DstRegion=<1> Type=uw
  609. .declare uwTOP_U        Base=REG(r,nTOP_U) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
  610. .declare uwTOP_V        Base=REG(r,nTOP_V) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
  611. .declare ub2TOP_Y       Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(16,2) DstRegion=<1> Type=ub
  612. .declare ub2TOP_U       Base=REG(r,nTOP_U) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub
  613. .declare ub2TOP_V       Base=REG(r,nTOP_V) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub
  614.  
  615. .declare ub4TOP_Y       Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
  616. .declare ub4TOP_U       Base=REG(r,nTOP_U) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
  617. .declare ub4TOP_V       Base=REG(r,nTOP_V) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
  618.  
  619. .declare ubTOP_ARGB     Base=REG(r,nTOP_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
  620.  
  621. // Used by "send" instruction
  622. .declare udTOP_Y_IO     Base=REG(r,nTOP_Y) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  623. .declare udTOP_U_IO     Base=REG(r,nTOP_U) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  624. .declare udTOP_V_IO     Base=REG(r,nTOP_V) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  625.  
  626. // Common region 2
  627. .declare ubBOT_Y        Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(16,1) DstRegion=<1> Type=ub
  628. .declare ubBOT_U        Base=REG(r,nBOT_U) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
  629. .declare ubBOT_V        Base=REG(r,nBOT_V) ElementSize=1 SrcRegion=REGION(8,1) DstRegion=<1> Type=ub
  630.                        
  631. .declare uwBOT_Y        Base=REG(r,nBOT_Y) ElementSize=2 SrcRegion=REGION(16,1) DstRegion=<1> Type=uw
  632. .declare uwBOT_U        Base=REG(r,nBOT_U) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
  633. .declare uwBOT_V        Base=REG(r,nBOT_V) ElementSize=2 SrcRegion=REGION(8,1) DstRegion=<1> Type=uw
  634. .declare ub2BOT_Y       Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(16,2) DstRegion=<1> Type=ub
  635. .declare ub2BOT_U       Base=REG(r,nBOT_U) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub
  636. .declare ub2BOT_V       Base=REG(r,nBOT_V) ElementSize=1 SrcRegion=REGION(8,2) DstRegion=<1> Type=ub
  637.  
  638. .declare ubBOT_ARGB     Base=REG(r,nBOT_Y) ElementSize=1 SrcRegion=REGION(8,4) Type=ub
  639.  
  640. // Used by "send" instruction
  641. .declare udBOT_Y_IO     Base=REG(r,nBOT_Y) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  642. .declare udBOT_U_IO     Base=REG(r,nBOT_U) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  643. .declare udBOT_V_IO     Base=REG(r,nBOT_V) ElementSize=4 SrcRegion=REGION(8,1) Type=ud
  644.  
  645. // End of common.inc
  646.  
  647. #endif    // COMMON_INC
  648.