Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
;******************************************************************************
2
;* Vorbis x86 optimizations
3
;* Copyright (C) 2006 Loren Merritt 
4
;*
5
;* This file is part of FFmpeg.
6
;*
7
;* FFmpeg is free software; you can redistribute it and/or
8
;* modify it under the terms of the GNU Lesser General Public
9
;* License as published by the Free Software Foundation; either
10
;* version 2.1 of the License, or (at your option) any later version.
11
;*
12
;* FFmpeg is distributed in the hope that it will be useful,
13
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
;* Lesser General Public License for more details.
16
;*
17
;* You should have received a copy of the GNU Lesser General Public
18
;* License along with FFmpeg; if not, write to the Free Software
19
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
;******************************************************************************
21
 
22
%include "libavutil/x86/x86util.asm"
23
 
24
SECTION_RODATA
25
 
26
pdw_80000000: times 4 dd 0x80000000
27
 
28
SECTION .text
29
 
30
%if ARCH_X86_32
31
INIT_MMX 3dnow
32
cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
33
    pxor                     m7, m7
34
    lea                    magq, [magq+block_sizeq*4]
35
    lea                    angq, [angq+block_sizeq*4]
36
    neg             block_sizeq
37
.loop:
38
    mova                     m0, [magq+block_sizeq*4]
39
    mova                     m1, [angq+block_sizeq*4]
40
    mova                     m2, m0
41
    mova                     m3, m1
42
    pfcmpge                  m2, m7     ; m <= 0.0
43
    pfcmpge                  m3, m7     ; a <= 0.0
44
    pslld                    m2, 31     ; keep only the sign bit
45
    pxor                     m1, m2
46
    mova                     m4, m3
47
    pand                     m3, m1
48
    pandn                    m4, m1
49
    pfadd                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
50
    pfsub                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
51
    mova   [angq+block_sizeq*4], m3
52
    mova   [magq+block_sizeq*4], m0
53
    add             block_sizeq, 2
54
    jl .loop
55
    femms
56
    RET
57
%endif
58
 
59
INIT_XMM sse
60
cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr
61
    mova                     m5, [pdw_80000000]
62
    xor                   cntrq, cntrq
63
align 16
64
.loop:
65
    mova                     m0, [magq+cntrq*4]
66
    mova                     m1, [angq+cntrq*4]
67
    xorps                    m2, m2
68
    xorps                    m3, m3
69
    cmpleps                  m2, m0     ; m <= 0.0
70
    cmpleps                  m3, m1     ; a <= 0.0
71
    andps                    m2, m5     ; keep only the sign bit
72
    xorps                    m1, m2
73
    mova                     m4, m3
74
    andps                    m3, m1
75
    andnps                   m4, m1
76
    addps                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
77
    subps                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
78
    mova         [angq+cntrq*4], m3
79
    mova         [magq+cntrq*4], m0
80
    add                   cntrq, 4
81
    cmp                   cntrq, block_sizeq
82
    jl .loop
83
    RET