Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. ;******************************************************************************
  2. ;* optimized bswap buffer functions
  3. ;* Copyright (c) 2008 Loren Merritt
  4. ;* Copyright (c) 2003-2013 Michael Niedermayer
  5. ;* Copyright (c) 2013 Daniel Kang
  6. ;*
  7. ;* This file is part of FFmpeg.
  8. ;*
  9. ;* FFmpeg is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* FFmpeg is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with FFmpeg; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;******************************************************************************
  23.  
  24. %include "libavutil/x86/x86util.asm"
  25.  
  26. SECTION_RODATA
  27. pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
  28.  
  29. cextern pb_80
  30.  
  31. SECTION .text
  32.  
  33. ; %1 = aligned/unaligned
  34. %macro BSWAP_LOOPS  1
  35.     mov      r3d, r2d
  36.     sar      r2d, 3
  37.     jz       .left4_%1
  38. .loop8_%1:
  39.     mov%1    m0, [r1 +  0]
  40.     mov%1    m1, [r1 + 16]
  41. %if cpuflag(ssse3)
  42.     pshufb   m0, m2
  43.     pshufb   m1, m2
  44.     mov%1    [r0 +  0], m0
  45.     mov%1    [r0 + 16], m1
  46. %else
  47.     pshuflw  m0, m0, 10110001b
  48.     pshuflw  m1, m1, 10110001b
  49.     pshufhw  m0, m0, 10110001b
  50.     pshufhw  m1, m1, 10110001b
  51.     mova     m2, m0
  52.     mova     m3, m1
  53.     psllw    m0, 8
  54.     psllw    m1, 8
  55.     psrlw    m2, 8
  56.     psrlw    m3, 8
  57.     por      m2, m0
  58.     por      m3, m1
  59.     mov%1    [r0 +  0], m2
  60.     mov%1    [r0 + 16], m3
  61. %endif
  62.     add      r0, 32
  63.     add      r1, 32
  64.     dec      r2d
  65.     jnz      .loop8_%1
  66. .left4_%1:
  67.     mov      r2d, r3d
  68.     test     r3d, 4
  69.     jz       .left
  70.     mov%1    m0, [r1]
  71. %if cpuflag(ssse3)
  72.     pshufb   m0, m2
  73.     mov%1    [r0], m0
  74. %else
  75.     pshuflw  m0, m0, 10110001b
  76.     pshufhw  m0, m0, 10110001b
  77.     mova     m2, m0
  78.     psllw    m0, 8
  79.     psrlw    m2, 8
  80.     por      m2, m0
  81.     mov%1    [r0], m2
  82. %endif
  83.     add      r1, 16
  84.     add      r0, 16
  85. %endmacro
  86.  
  87. ; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
  88. %macro BSWAP32_BUF 0
  89. %if cpuflag(ssse3)
  90. cglobal bswap32_buf, 3,4,3
  91.     mov      r3, r1
  92.     mova     m2, [pb_bswap32]
  93. %else
  94. cglobal bswap32_buf, 3,4,5
  95.     mov      r3, r1
  96. %endif
  97.     or       r3, r0
  98.     test     r3, 15
  99.     jz       .start_align
  100.     BSWAP_LOOPS  u
  101.     jmp      .left
  102. .start_align:
  103.     BSWAP_LOOPS  a
  104. .left:
  105. %if cpuflag(ssse3)
  106.     test     r2d, 2
  107.     jz       .left1
  108.     movq     m0, [r1]
  109.     pshufb   m0, m2
  110.     movq     [r0], m0
  111.     add      r1, 8
  112.     add      r0, 8
  113. .left1:
  114.     test     r2d, 1
  115.     jz       .end
  116.     mov      r2d, [r1]
  117.     bswap    r2d
  118.     mov      [r0], r2d
  119. %else
  120.     and      r2d, 3
  121.     jz       .end
  122. .loop2:
  123.     mov      r3d, [r1]
  124.     bswap    r3d
  125.     mov      [r0], r3d
  126.     add      r1, 4
  127.     add      r0, 4
  128.     dec      r2d
  129.     jnz      .loop2
  130. %endif
  131. .end:
  132.     RET
  133. %endmacro
  134.  
  135. INIT_XMM sse2
  136. BSWAP32_BUF
  137.  
  138. INIT_XMM ssse3
  139. BSWAP32_BUF
  140.