Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. ;*****************************************************************************
  2. ;* SIMD-optimized pixel operations
  3. ;*****************************************************************************
  4. ;* Copyright (c) 2000, 2001 Fabrice Bellard
  5. ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  6. ;*
  7. ;* This file is part of FFmpeg.
  8. ;*
  9. ;* FFmpeg is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* FFmpeg is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with FFmpeg; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;*****************************************************************************
  23.  
  24. %include "libavutil/x86/x86util.asm"
  25.  
  26. SECTION .text
  27.  
  28. INIT_MMX mmx
  29. ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size)
  30. cglobal get_pixels, 3,4
  31.     add          r0, 128
  32.     mov          r3, -128
  33.     pxor         m7, m7
  34. .loop:
  35.     mova         m0, [r1]
  36.     mova         m2, [r1+r2]
  37.     mova         m1, m0
  38.     mova         m3, m2
  39.     punpcklbw    m0, m7
  40.     punpckhbw    m1, m7
  41.     punpcklbw    m2, m7
  42.     punpckhbw    m3, m7
  43.     mova [r0+r3+ 0], m0
  44.     mova [r0+r3+ 8], m1
  45.     mova [r0+r3+16], m2
  46.     mova [r0+r3+24], m3
  47.     lea          r1, [r1+r2*2]
  48.     add          r3, 32
  49.     js .loop
  50.     REP_RET
  51.  
  52. INIT_XMM sse2
  53. cglobal get_pixels, 3, 4, 5
  54.     lea          r3, [r2*3]
  55.     pxor         m4, m4
  56.     movh         m0, [r1]
  57.     movh         m1, [r1+r2]
  58.     movh         m2, [r1+r2*2]
  59.     movh         m3, [r1+r3]
  60.     lea          r1, [r1+r2*4]
  61.     punpcklbw    m0, m4
  62.     punpcklbw    m1, m4
  63.     punpcklbw    m2, m4
  64.     punpcklbw    m3, m4
  65.     mova       [r0], m0
  66.     mova  [r0+0x10], m1
  67.     mova  [r0+0x20], m2
  68.     mova  [r0+0x30], m3
  69.     movh         m0, [r1]
  70.     movh         m1, [r1+r2*1]
  71.     movh         m2, [r1+r2*2]
  72.     movh         m3, [r1+r3]
  73.     punpcklbw    m0, m4
  74.     punpcklbw    m1, m4
  75.     punpcklbw    m2, m4
  76.     punpcklbw    m3, m4
  77.     mova  [r0+0x40], m0
  78.     mova  [r0+0x50], m1
  79.     mova  [r0+0x60], m2
  80.     mova  [r0+0x70], m3
  81.     RET
  82.  
  83. INIT_MMX mmx
  84. ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
  85. ;                         int stride);
  86. cglobal diff_pixels, 4,5
  87.     movsxdifnidn r3, r3d
  88.     pxor         m7, m7
  89.     add          r0,  128
  90.     mov          r4, -128
  91. .loop:
  92.     mova         m0, [r1]
  93.     mova         m2, [r2]
  94.     mova         m1, m0
  95.     mova         m3, m2
  96.     punpcklbw    m0, m7
  97.     punpckhbw    m1, m7
  98.     punpcklbw    m2, m7
  99.     punpckhbw    m3, m7
  100.     psubw        m0, m2
  101.     psubw        m1, m3
  102.     mova  [r0+r4+0], m0
  103.     mova  [r0+r4+8], m1
  104.     add          r1, r3
  105.     add          r2, r3
  106.     add          r4, 16
  107.     jne .loop
  108.     REP_RET
  109.  
  110. INIT_XMM sse2
  111. cglobal diff_pixels, 4, 5, 5
  112.     movsxdifnidn r3, r3d
  113.     pxor         m4, m4
  114.     add          r0,  128
  115.     mov          r4, -128
  116. .loop:
  117.     movh         m0, [r1]
  118.     movh         m2, [r2]
  119.     movh         m1, [r1+r3]
  120.     movh         m3, [r2+r3]
  121.     punpcklbw    m0, m4
  122.     punpcklbw    m1, m4
  123.     punpcklbw    m2, m4
  124.     punpcklbw    m3, m4
  125.     psubw        m0, m2
  126.     psubw        m1, m3
  127.     mova [r0+r4+0 ], m0
  128.     mova [r0+r4+16], m1
  129.     lea          r1, [r1+r3*2]
  130.     lea          r2, [r2+r3*2]
  131.     add          r4, 32
  132.     jne .loop
  133.     RET
  134.