Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. ;******************************************************************************
  2. ;* SIMD-optimized fullpel functions
  3. ;* Copyright (c) 2008 Loren Merritt
  4. ;* Copyright (c) 2003-2013 Michael Niedermayer
  5. ;* Copyright (c) 2013 Daniel Kang
  6. ;*
  7. ;* This file is part of FFmpeg.
  8. ;*
  9. ;* FFmpeg is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* FFmpeg is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with FFmpeg; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;******************************************************************************
  23.  
  24. %include "libavutil/x86/x86util.asm"
  25.  
  26. SECTION .text
  27.  
  28. %macro PAVGB_MMX 4
  29.     LOAD   %3, %1
  30.     por    %3, %2
  31.     pxor   %2, %1
  32.     pand   %2, %4
  33.     psrlq  %2, 1
  34.     psubb  %3, %2
  35.     SWAP   %2, %3
  36. %endmacro
  37.  
  38. ; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
  39. ;                        ptrdiff_t line_size, int h)
  40. %macro OP_PIXELS 2
  41. %if %2 == mmsize/2
  42. %define LOAD movh
  43. %define SAVE movh
  44. %define LEN  mmsize
  45. %else
  46. %define LOAD movu
  47. %define SAVE mova
  48. %define LEN  %2
  49. %endif
  50. cglobal %1_pixels%2, 4,5,4
  51.     movsxdifnidn r2, r2d
  52.     lea          r4, [r2*3]
  53. %ifidn %1, avg
  54. %if notcpuflag(mmxext)
  55.     pcmpeqd      m6, m6
  56.     paddb        m6, m6
  57. %endif
  58. %endif
  59. .loop:
  60. %assign %%i 0
  61. %rep LEN/mmsize
  62.     LOAD         m0, [r1 + %%i]
  63.     LOAD         m1, [r1+r2 + %%i]
  64.     LOAD         m2, [r1+r2*2 + %%i]
  65.     LOAD         m3, [r1+r4 + %%i]
  66. %ifidn %1, avg
  67. %if notcpuflag(mmxext)
  68.     PAVGB_MMX    [r0 + %%i], m0, m4, m6
  69.     PAVGB_MMX    [r0+r2 + %%i], m1, m5, m6
  70.     PAVGB_MMX    [r0+r2*2 + %%i], m2, m4, m6
  71.     PAVGB_MMX    [r0+r4 + %%i], m3, m5, m6
  72. %else
  73.     pavgb        m0, [r0 + %%i]
  74.     pavgb        m1, [r0+r2 + %%i]
  75.     pavgb        m2, [r0+r2*2 + %%i]
  76.     pavgb        m3, [r0+r4 + %%i]
  77. %endif
  78. %endif
  79.     SAVE       [r0 + %%i], m0
  80.     SAVE    [r0+r2 + %%i], m1
  81.     SAVE  [r0+r2*2 + %%i], m2
  82.     SAVE    [r0+r4 + %%i], m3
  83. %assign %%i %%i+mmsize
  84. %endrep
  85.     sub         r3d, 4
  86.     lea          r1, [r1+r2*4]
  87.     lea          r0, [r0+r2*4]
  88.     jne       .loop
  89.     RET
  90. %endmacro
  91.  
  92. INIT_MMX mmx
  93. OP_PIXELS put, 4
  94. OP_PIXELS avg, 4
  95. OP_PIXELS put, 8
  96. OP_PIXELS avg, 8
  97. OP_PIXELS put, 16
  98. OP_PIXELS avg, 16
  99.  
  100. INIT_MMX mmxext
  101. OP_PIXELS avg, 4
  102. OP_PIXELS avg, 8
  103. OP_PIXELS avg, 16
  104.  
  105. INIT_XMM sse2
  106. OP_PIXELS put, 16
  107. OP_PIXELS avg, 16
  108.