Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. ;******************************************************************************
  2. ;* SIMD-optimized clear block functions
  3. ;* Copyright (c) 2002 Michael Niedermayer
  4. ;* Copyright (c) 2008 Loren Merritt
  5. ;* Copyright (c) 2009 Fiona Glaser
  6. ;*
  7. ;* This file is part of FFmpeg.
  8. ;*
  9. ;* FFmpeg is free software; you can redistribute it and/or
  10. ;* modify it under the terms of the GNU Lesser General Public
  11. ;* License as published by the Free Software Foundation; either
  12. ;* version 2.1 of the License, or (at your option) any later version.
  13. ;*
  14. ;* FFmpeg is distributed in the hope that it will be useful,
  15. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17. ;* Lesser General Public License for more details.
  18. ;*
  19. ;* You should have received a copy of the GNU Lesser General Public
  20. ;* License along with FFmpeg; if not, write to the Free Software
  21. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. ;******************************************************************************
  23.  
  24. %include "libavutil/x86/x86util.asm"
  25.  
  26. SECTION .text
  27.  
  28. ;----------------------------------------
  29. ; void ff_clear_block(int16_t *blocks);
  30. ;----------------------------------------
  31. ; %1 = number of xmm registers used
  32. ; %2 = number of inline store loops
  33. %macro CLEAR_BLOCK 2
  34. cglobal clear_block, 1, 1, %1, blocks
  35.     ZERO  m0, m0
  36. %assign %%i 0
  37. %rep %2
  38.     mova  [blocksq+mmsize*(0+%%i)], m0
  39.     mova  [blocksq+mmsize*(1+%%i)], m0
  40.     mova  [blocksq+mmsize*(2+%%i)], m0
  41.     mova  [blocksq+mmsize*(3+%%i)], m0
  42.     mova  [blocksq+mmsize*(4+%%i)], m0
  43.     mova  [blocksq+mmsize*(5+%%i)], m0
  44.     mova  [blocksq+mmsize*(6+%%i)], m0
  45.     mova  [blocksq+mmsize*(7+%%i)], m0
  46. %assign %%i %%i+8
  47. %endrep
  48.     RET
  49. %endmacro
  50.  
  51. INIT_MMX mmx
  52. %define ZERO pxor
  53. CLEAR_BLOCK 0, 2
  54. INIT_XMM sse
  55. %define ZERO xorps
  56. CLEAR_BLOCK 1, 1
  57.  
  58. ;-----------------------------------------
  59. ; void ff_clear_blocks(int16_t *blocks);
  60. ;-----------------------------------------
  61. ; %1 = number of xmm registers used
  62. %macro CLEAR_BLOCKS 1
  63. cglobal clear_blocks, 1, 2, %1, blocks, len
  64.     add   blocksq, 768
  65.     mov      lenq, -768
  66.     ZERO       m0, m0
  67. .loop:
  68.     mova  [blocksq+lenq+mmsize*0], m0
  69.     mova  [blocksq+lenq+mmsize*1], m0
  70.     mova  [blocksq+lenq+mmsize*2], m0
  71.     mova  [blocksq+lenq+mmsize*3], m0
  72.     mova  [blocksq+lenq+mmsize*4], m0
  73.     mova  [blocksq+lenq+mmsize*5], m0
  74.     mova  [blocksq+lenq+mmsize*6], m0
  75.     mova  [blocksq+lenq+mmsize*7], m0
  76.     add   lenq, mmsize*8
  77.     js .loop
  78.     RET
  79. %endmacro
  80.  
  81. INIT_MMX mmx
  82. %define ZERO pxor
  83. CLEAR_BLOCKS 0
  84. INIT_XMM sse
  85. %define ZERO xorps
  86. CLEAR_BLOCKS 1
  87.