Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * All Video Processing kernels
  3.  * Copyright © <2010>, Intel Corporation.
  4.  *
  5.  * This program is licensed under the terms and conditions of the
  6.  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
  7.  * http://www.opensource.org/licenses/eclipse-1.0.php.
  8.  *
  9.  */
  10.  
  11. // Module name: RGB16x8_Save_Y416.asm
  12. //
  13. // Save packed ARGB 444 frame data block of size 16x8
  14. //
  15. // To save 16x8 block (128x8 byte layout for ARGB 16bit per component) we need 4 send instructions
  16. //  -----------------
  17. //  | 1 | 2 | 3 | 4 |
  18. //  -----------------
  19.  
  20. #include "RGB16x8_Save_RGB.inc"
  21.  
  22.     shl (1) rMSGSRC.0<1>:d      wORIX<0;1,0>:w            3:w  { NoDDClr }             // H. block origin need to become 8 times
  23.     mov (1) rMSGSRC.1<1>:d      wORIY<0;1,0>:w                 { NoDDClr, NoDDChk }    // Block origin (1st quadrant)
  24.     mov (1) rMSGSRC.2<1>:ud     nDPW_BLOCK_SIZE_ARGB:ud        { NoDDChk }             // Block width and height (32x8)
  25.  
  26.     mov (8) mMSGHDR<1>:ud       rMSGSRC<8;8,1>:ud
  27. /*      Not needed for validation kernels for now -vK
  28. //Use the mask to determine which pixels shouldn't be over-written
  29.    and (1)        acc0.0<1>:ud udBLOCK_MASK<0;1,0>:ud   0x00FFFFFF:ud
  30.    cmp.ge.f0.0(1) dNULLREG     acc0.0<0;1,0>:ud         0x00FFFFFF:ud   //Check if all pixels in the block need to be modified
  31.    (f0.0)  jmpi WriteARGBToDataPort
  32.  
  33.    //If mask is not all 1's, then load the entire 64x8 block
  34.     //so that only those bytes may be modified that need to be (using the mask)
  35.  
  36.     // Load first block 16x8 packed ARGB 444 ---------------------------------------
  37.     or (1)         acc0.0<1>:ud udBLOCK_MASK<0;1,0>:ud   0xFF00FF00:ud   //Check first block
  38.     cmp.e.f0.0 (1) dNULLREG     acc0.0<0;1,0>:ud         0xFFFFFFFF:ud  
  39.     (f0.0)  jmpi SkipFirstBlockMerge                                     //If full mask then skip this block
  40.  
  41.     send (8) udSRC_ARGB(0)<1>   mMSGHDR     udDUMMY_NULL    nDATAPORT_READ    nDPMR_MSGDSC+nDPR_MSG_SIZE_ARGB+nBI_DESTINATION_RGB:ud
  42.     mov  (8) mMSGHDR<1>:ud      rMSGSRC<8;8,1>:ud
  43.  
  44.     //Merge the data
  45.     mov (1)           f0.0:uw             ubBLOCK_MASK_V:ub    //Load the mask on flag reg
  46.     (f0.0)  mov (8)   rMASK_TEMP<1>:uw    uwBLOCK_MASK_H:uw    //use sel instruction - vK
  47.     (-f0.0) mov (8)   rMASK_TEMP<1>:uw    0:uw
  48.  
  49.     $for(0, 0; <nY_NUM_OF_ROWS; 1, 2) {               //take care of the lines in the block, they are different in the src and dest
  50.         mov (1)             f0.1:uw                   uwMASK_TEMP(0,%1)<0;1,0>
  51.         (-f0.1) mov (8)     udDEST_ARGB(%2)<1>        udSRC_ARGB(%1)
  52.     }
  53.  
  54. SkipFirstBlockMerge:
  55.     // Load second block 16x8 packed ARGB 444 ---------------------------------------
  56.     or (1)         acc0.0<1>:ud udBLOCK_MASK<0;1,0>:ud   0xFF0000FF:ud   //Check second block
  57.     cmp.e.f0.0 (1) dNULLREG     acc0.0<0;1,0>:ud         0xFFFFFFFF:ud  
  58.     (f0.0)  jmpi WriteARGBToDataPort                                     //If full mask then skip this block
  59.  
  60.     add  (1) mMSGHDR.0<1>:d     rMSGSRC.0<0;1,0>:d       32:d     // Point to 2nd part
  61.     send (8) udSRC_ARGB(0)<1>   mMSGHDR    udDUMMY_NULL  nDATAPORT_READ    nDPMR_MSGDSC+nDPR_MSG_SIZE_ARGB+nBI_DESTINATION_RGB:ud
  62.     mov  (8) mMSGHDR<1>:ud      rMSGSRC<8;8,1>:ud                 // Point to 1st part again
  63.  
  64.     //Merge the data
  65.     mov (1)           f0.0:uw             ubBLOCK_MASK_V:ub    //Load the mask on flag reg
  66.     (f0.0)  shr (8)   rMASK_TEMP<1>:uw    uwBLOCK_MASK_H:uw    8:uw    //load the mask for second block
  67.     (-f0.0) mov (8)   rMASK_TEMP<1>:uw    0:uw
  68.  
  69.     $for(0, 1; <nY_NUM_OF_ROWS; 1, 2) {               //take care of the lines in the block, they are different in the src and dest
  70.         mov (1)             f0.1:uw                   uwMASK_TEMP(0,%1)<0;1,0>
  71.         (-f0.1) mov (8)     udDEST_ARGB(%2)<1>        udSRC_ARGB(%1)
  72.     }
  73. */
  74. WriteARGBToDataPort:
  75.     // Move packed data to MRF and output
  76.    
  77.     //Write 1st 4X8 pixels  
  78.     $for(0; <nY_NUM_OF_ROWS; 1) {
  79.         mov (8) mudMSGPAYLOAD(%1)<1>       udDEST_ARGB(%1*4)
  80.     }
  81.     send (8)    dNULLREG    mMSGHDR   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nDPW_MSG_SIZE_ARGB+nBI_DESTINATION_RGB:ud
  82.  
  83.         //Write 2nd 4X8 pixels  
  84.     mov  (8)    mMSGHDR<1>:ud         rMSGSRC<8;8,1>:ud
  85.     add  (1)    mMSGHDR.0<1>:d        rMSGSRC.0<0;1,0>:d       32:d   // Point to 2nd part
  86.     $for(0; <nY_NUM_OF_ROWS; 1) {
  87.         mov (8) mudMSGPAYLOAD(%1)<1>       udDEST_ARGB(%1*4+1)
  88.     }
  89.     send (8)    dNULLREG    mMSGHDR   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nDPW_MSG_SIZE_ARGB+nBI_DESTINATION_RGB:ud
  90.  
  91.         //Write 3rd 4X8 pixels  
  92.     mov  (8)    mMSGHDR<1>:ud         rMSGSRC<8;8,1>:ud
  93.     add  (1)    mMSGHDR.0<1>:d        rMSGSRC.0<0;1,0>:d       64:d   // Point to 2nd part
  94.     $for(0; <nY_NUM_OF_ROWS; 1) {
  95.         mov (8) mudMSGPAYLOAD(%1)<1>       udDEST_ARGB(%1*4+2)
  96.     }
  97.     send (8)    dNULLREG    mMSGHDR   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nDPW_MSG_SIZE_ARGB+nBI_DESTINATION_RGB:ud
  98.  
  99.         //Write 4th 4X8 pixels  
  100.     mov  (8)    mMSGHDR<1>:ud         rMSGSRC<8;8,1>:ud
  101.     add  (1)    mMSGHDR.0<1>:d        rMSGSRC.0<0;1,0>:d       96:d   // Point to 2nd part
  102.     $for(0; <nY_NUM_OF_ROWS; 1) {
  103.         mov (8) mudMSGPAYLOAD(%1)<1>       udDEST_ARGB(%1*4+3)
  104.     }
  105.     send (8)    dNULLREG    mMSGHDR   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nDPW_MSG_SIZE_ARGB+nBI_DESTINATION_RGB:ud
  106.  
  107. // End of RGB16x8_Save_Y416
  108.