Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. ;******************************************************************************
  2. ;* Vorbis x86 optimizations
  3. ;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
  4. ;*
  5. ;* This file is part of FFmpeg.
  6. ;*
  7. ;* FFmpeg is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* FFmpeg is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with FFmpeg; if not, write to the Free Software
  19. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21.  
  22. %include "libavutil/x86/x86util.asm"
  23.  
  24. SECTION_RODATA
  25.  
  26. pdw_80000000: times 4 dd 0x80000000
  27.  
  28. SECTION .text
  29.  
  30. %if ARCH_X86_32
  31. INIT_MMX 3dnow
  32. cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
  33.     pxor                     m7, m7
  34.     lea                    magq, [magq+block_sizeq*4]
  35.     lea                    angq, [angq+block_sizeq*4]
  36.     neg             block_sizeq
  37. .loop:
  38.     mova                     m0, [magq+block_sizeq*4]
  39.     mova                     m1, [angq+block_sizeq*4]
  40.     mova                     m2, m0
  41.     mova                     m3, m1
  42.     pfcmpge                  m2, m7     ; m <= 0.0
  43.     pfcmpge                  m3, m7     ; a <= 0.0
  44.     pslld                    m2, 31     ; keep only the sign bit
  45.     pxor                     m1, m2
  46.     mova                     m4, m3
  47.     pand                     m3, m1
  48.     pandn                    m4, m1
  49.     pfadd                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
  50.     pfsub                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
  51.     mova   [angq+block_sizeq*4], m3
  52.     mova   [magq+block_sizeq*4], m0
  53.     add             block_sizeq, 2
  54.     jl .loop
  55.     femms
  56.     RET
  57. %endif
  58.  
  59. INIT_XMM sse
  60. cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr
  61.     mova                     m5, [pdw_80000000]
  62.     xor                   cntrq, cntrq
  63. align 16
  64. .loop:
  65.     mova                     m0, [magq+cntrq*4]
  66.     mova                     m1, [angq+cntrq*4]
  67.     xorps                    m2, m2
  68.     xorps                    m3, m3
  69.     cmpleps                  m2, m0     ; m <= 0.0
  70.     cmpleps                  m3, m1     ; a <= 0.0
  71.     andps                    m2, m5     ; keep only the sign bit
  72.     xorps                    m1, m2
  73.     mova                     m4, m3
  74.     andps                    m3, m1
  75.     andnps                   m4, m1
  76.     addps                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
  77.     subps                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
  78.     mova         [angq+cntrq*4], m3
  79.     mova         [magq+cntrq*4], m0
  80.     add                   cntrq, 4
  81.     cmp                   cntrq, block_sizeq
  82.     jl .loop
  83.     RET
  84.