Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6148 serge 1
;******************************************************************************
2
;* MMX-optimized H.263 loop filter
3
;* Copyright (c) 2003-2013 Michael Niedermayer
4
;* Copyright (c) 2013 Daniel Kang
5
;*
6
;* This file is part of FFmpeg.
7
;*
8
;* FFmpeg is free software; you can redistribute it and/or
9
;* modify it under the terms of the GNU Lesser General Public
10
;* License as published by the Free Software Foundation; either
11
;* version 2.1 of the License, or (at your option) any later version.
12
;*
13
;* FFmpeg is distributed in the hope that it will be useful,
14
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
;* Lesser General Public License for more details.
17
;*
18
;* You should have received a copy of the GNU Lesser General Public
19
;* License along with FFmpeg; if not, write to the Free Software
20
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
;******************************************************************************
22
 
23
%include "libavutil/x86/x86util.asm"
24
 
25
SECTION_RODATA
26
cextern pb_FC
27
cextern h263_loop_filter_strength
28
 
29
SECTION_TEXT
30
 
31
%macro H263_LOOP_FILTER 5
32
    pxor         m7, m7
33
    mova         m0, [%1]
34
    mova         m1, [%1]
35
    mova         m2, [%4]
36
    mova         m3, [%4]
37
    punpcklbw    m0, m7
38
    punpckhbw    m1, m7
39
    punpcklbw    m2, m7
40
    punpckhbw    m3, m7
41
    psubw        m0, m2
42
    psubw        m1, m3
43
    mova         m2, [%2]
44
    mova         m3, [%2]
45
    mova         m4, [%3]
46
    mova         m5, [%3]
47
    punpcklbw    m2, m7
48
    punpckhbw    m3, m7
49
    punpcklbw    m4, m7
50
    punpckhbw    m5, m7
51
    psubw        m4, m2
52
    psubw        m5, m3
53
    psllw        m4, 2
54
    psllw        m5, 2
55
    paddw        m4, m0
56
    paddw        m5, m1
57
    pxor         m6, m6
58
    pcmpgtw      m6, m4
59
    pcmpgtw      m7, m5
60
    pxor         m4, m6
61
    pxor         m5, m7
62
    psubw        m4, m6
63
    psubw        m5, m7
64
    psrlw        m4, 3
65
    psrlw        m5, 3
66
    packuswb     m4, m5
67
    packsswb     m6, m7
68
    pxor         m7, m7
69
    movd         m2, %5
70
    punpcklbw    m2, m2
71
    punpcklbw    m2, m2
72
    punpcklbw    m2, m2
73
    psubusb      m2, m4
74
    mova         m3, m2
75
    psubusb      m3, m4
76
    psubb        m2, m3
77
    mova         m3, [%2]
78
    mova         m4, [%3]
79
    pxor         m3, m6
80
    pxor         m4, m6
81
    paddusb      m3, m2
82
    psubusb      m4, m2
83
    pxor         m3, m6
84
    pxor         m4, m6
85
    paddusb      m2, m2
86
    packsswb     m0, m1
87
    pcmpgtb      m7, m0
88
    pxor         m0, m7
89
    psubb        m0, m7
90
    mova         m1, m0
91
    psubusb      m0, m2
92
    psubb        m1, m0
93
    pand         m1, [pb_FC]
94
    psrlw        m1, 2
95
    pxor         m1, m7
96
    psubb        m1, m7
97
    mova         m5, [%1]
98
    mova         m6, [%4]
99
    psubb        m5, m1
100
    paddb        m6, m1
101
%endmacro
102
 
103
INIT_MMX mmx
104
; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
105
cglobal h263_v_loop_filter, 3,5
106
    movsxdifnidn r1, r1d
107
    movsxdifnidn r2, r2d
108
 
109
    lea          r4, [h263_loop_filter_strength]
110
    movzx       r3d, BYTE [r4+r2]
111
    movsx        r2, r3b
112
    shl          r2, 1
113
 
114
    mov          r3, r0
115
    sub          r3, r1
116
    mov          r4, r3
117
    sub          r4, r1
118
    H263_LOOP_FILTER r4, r3, r0, r0+r1, r2d
119
 
120
    mova       [r3], m3
121
    mova       [r0], m4
122
    mova       [r4], m5
123
    mova    [r0+r1], m6
124
    RET
125
 
126
%macro TRANSPOSE4X4 2
127
    movd      m0, [%1]
128
    movd      m1, [%1+r1]
129
    movd      m2, [%1+r1*2]
130
    movd      m3, [%1+r3]
131
    punpcklbw m0, m1
132
    punpcklbw m2, m3
133
    mova      m1, m0
134
    punpcklwd m0, m2
135
    punpckhwd m1, m2
136
    movd [%2+ 0], m0
137
    punpckhdq m0, m0
138
    movd [%2+ 8], m0
139
    movd [%2+16], m1
140
    punpckhdq m1, m1
141
    movd [%2+24], m1
142
%endmacro
143
 
144
 
145
; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
146
INIT_MMX mmx
147
cglobal h263_h_loop_filter, 3,5,0,32
148
    movsxdifnidn r1, r1d
149
    movsxdifnidn r2, r2d
150
 
151
    lea          r4, [h263_loop_filter_strength]
152
    movzx       r3d, BYTE [r4+r2]
153
    movsx        r2, r3b
154
    shl          r2, 1
155
 
156
    sub          r0, 2
157
    lea          r3, [r1*3]
158
 
159
    TRANSPOSE4X4 r0, rsp
160
    lea          r4, [r0+r1*4]
161
    TRANSPOSE4X4 r4, rsp+4
162
 
163
    H263_LOOP_FILTER rsp, rsp+8, rsp+16, rsp+24, r2d
164
 
165
    mova         m1, m5
166
    mova         m0, m4
167
    punpcklbw    m5, m3
168
    punpcklbw    m4, m6
169
    punpckhbw    m1, m3
170
    punpckhbw    m0, m6
171
    mova         m3, m5
172
    mova         m6, m1
173
    punpcklwd    m5, m4
174
    punpcklwd    m1, m0
175
    punpckhwd    m3, m4
176
    punpckhwd    m6, m0
177
    movd       [r0], m5
178
    punpckhdq    m5, m5
179
    movd  [r0+r1*1], m5
180
    movd  [r0+r1*2], m3
181
    punpckhdq    m3, m3
182
    movd    [r0+r3], m3
183
    movd       [r4], m1
184
    punpckhdq    m1, m1
185
    movd  [r4+r1*1], m1
186
    movd  [r4+r1*2], m6
187
    punpckhdq    m6, m6
188
    movd    [r4+r3], m6
189
    RET