Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. ;******************************************************************************
  2. ;* MMX optimized DSP utils
  3. ;* Copyright (c) 2008 Loren Merritt
  4. ;* Copyright (c) 2003-2013 Michael Niedermayer
  5. ;* Copyright (c) 2013 Daniel Kang
  6. ;*
  7. ;* This file is part of FFmpeg.
  8. ;*
  9. ;* FFmpeg is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* FFmpeg is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with FFmpeg; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;******************************************************************************
  23.  
  24. %include "libavutil/x86/x86util.asm"
  25.  
  26. SECTION .text
  27.  
  28. %macro op_avgh 3
  29.     movh   %3, %2
  30.     pavgb  %1, %3
  31.     movh   %2, %1
  32. %endmacro
  33.  
  34. %macro op_avg 2
  35.     pavgb  %1, %2
  36.     mova   %2, %1
  37. %endmacro
  38.  
  39. %macro op_puth 2-3
  40.     movh   %2, %1
  41. %endmacro
  42.  
  43. %macro op_put 2
  44.     mova   %2, %1
  45. %endmacro
  46.  
  47. ; void pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
  48. %macro PIXELS4_L2 1
  49. %define OP op_%1h
  50. cglobal %1_pixels4_l2, 6,6
  51.     movsxdifnidn r3, r3d
  52.     movsxdifnidn r4, r4d
  53.     test        r5d, 1
  54.     je        .loop
  55.     movd         m0, [r1]
  56.     movd         m1, [r2]
  57.     add          r1, r4
  58.     add          r2, 4
  59.     pavgb        m0, m1
  60.     OP           m0, [r0], m3
  61.     add          r0, r3
  62.     dec         r5d
  63. .loop:
  64.     mova         m0, [r1]
  65.     mova         m1, [r1+r4]
  66.     lea          r1, [r1+2*r4]
  67.     pavgb        m0, [r2]
  68.     pavgb        m1, [r2+4]
  69.     OP           m0, [r0], m3
  70.     OP           m1, [r0+r3], m3
  71.     lea          r0, [r0+2*r3]
  72.     mova         m0, [r1]
  73.     mova         m1, [r1+r4]
  74.     lea          r1, [r1+2*r4]
  75.     pavgb        m0, [r2+8]
  76.     pavgb        m1, [r2+12]
  77.     OP           m0, [r0], m3
  78.     OP           m1, [r0+r3], m3
  79.     lea          r0, [r0+2*r3]
  80.     add          r2, 16
  81.     sub         r5d, 4
  82.     jne       .loop
  83.     REP_RET
  84. %endmacro
  85.  
  86. INIT_MMX mmxext
  87. PIXELS4_L2 put
  88. PIXELS4_L2 avg
  89.  
  90. ; void pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
  91. %macro PIXELS8_L2 1
  92. %define OP op_%1
  93. cglobal %1_pixels8_l2, 6,6
  94.     movsxdifnidn r3, r3d
  95.     movsxdifnidn r4, r4d
  96.     test        r5d, 1
  97.     je        .loop
  98.     mova         m0, [r1]
  99.     mova         m1, [r2]
  100.     add          r1, r4
  101.     add          r2, 8
  102.     pavgb        m0, m1
  103.     OP           m0, [r0]
  104.     add          r0, r3
  105.     dec         r5d
  106. .loop:
  107.     mova         m0, [r1]
  108.     mova         m1, [r1+r4]
  109.     lea          r1, [r1+2*r4]
  110.     pavgb        m0, [r2]
  111.     pavgb        m1, [r2+8]
  112.     OP           m0, [r0]
  113.     OP           m1, [r0+r3]
  114.     lea          r0, [r0+2*r3]
  115.     mova         m0, [r1]
  116.     mova         m1, [r1+r4]
  117.     lea          r1, [r1+2*r4]
  118.     pavgb        m0, [r2+16]
  119.     pavgb        m1, [r2+24]
  120.     OP           m0, [r0]
  121.     OP           m1, [r0+r3]
  122.     lea          r0, [r0+2*r3]
  123.     add          r2, 32
  124.     sub         r5d, 4
  125.     jne       .loop
  126.     REP_RET
  127. %endmacro
  128.  
  129. INIT_MMX mmxext
  130. PIXELS8_L2 put
  131. PIXELS8_L2 avg
  132.  
  133. ; void pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
  134. %macro PIXELS16_L2 1
  135. %define OP op_%1
  136. cglobal %1_pixels16_l2, 6,6
  137.     movsxdifnidn r3, r3d
  138.     movsxdifnidn r4, r4d
  139.     test        r5d, 1
  140.     je        .loop
  141.     mova         m0, [r1]
  142.     mova         m1, [r1+8]
  143.     pavgb        m0, [r2]
  144.     pavgb        m1, [r2+8]
  145.     add          r1, r4
  146.     add          r2, 16
  147.     OP           m0, [r0]
  148.     OP           m1, [r0+8]
  149.     add          r0, r3
  150.     dec         r5d
  151. .loop:
  152.     mova         m0, [r1]
  153.     mova         m1, [r1+8]
  154.     add          r1, r4
  155.     pavgb        m0, [r2]
  156.     pavgb        m1, [r2+8]
  157.     OP           m0, [r0]
  158.     OP           m1, [r0+8]
  159.     add          r0, r3
  160.     mova         m0, [r1]
  161.     mova         m1, [r1+8]
  162.     add          r1, r4
  163.     pavgb        m0, [r2+16]
  164.     pavgb        m1, [r2+24]
  165.     OP           m0, [r0]
  166.     OP           m1, [r0+8]
  167.     add          r0, r3
  168.     add          r2, 32
  169.     sub         r5d, 2
  170.     jne       .loop
  171.     REP_RET
  172. %endmacro
  173.  
  174. INIT_MMX mmxext
  175. PIXELS16_L2 put
  176. PIXELS16_L2 avg
  177.