Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
;******************************************************************************
2
;* MMX optimized DSP utils
3
;* Copyright (c) 2008 Loren Merritt
4
;* Copyright (c) 2003-2013 Michael Niedermayer
5
;* Copyright (c) 2013 Daniel Kang
6
;*
7
;* This file is part of FFmpeg.
8
;*
9
;* FFmpeg is free software; you can redistribute it and/or
10
;* modify it under the terms of the GNU Lesser General Public
11
;* License as published by the Free Software Foundation; either
12
;* version 2.1 of the License, or (at your option) any later version.
13
;*
14
;* FFmpeg is distributed in the hope that it will be useful,
15
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
;* Lesser General Public License for more details.
18
;*
19
;* You should have received a copy of the GNU Lesser General Public
20
;* License along with FFmpeg; if not, write to the Free Software
21
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
;******************************************************************************
23
 
24
%include "libavutil/x86/x86util.asm"
25
 
26
SECTION .text
27
 
28
%macro op_avgh 3
29
    movh   %3, %2
30
    pavgb  %1, %3
31
    movh   %2, %1
32
%endmacro
33
 
34
%macro op_avg 2
35
    pavgb  %1, %2
36
    mova   %2, %1
37
%endmacro
38
 
39
%macro op_puth 2-3
40
    movh   %2, %1
41
%endmacro
42
 
43
%macro op_put 2
44
    mova   %2, %1
45
%endmacro
46
 
47
; void pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
48
%macro PIXELS4_L2 1
49
%define OP op_%1h
50
cglobal %1_pixels4_l2, 6,6
51
    movsxdifnidn r3, r3d
52
    movsxdifnidn r4, r4d
53
    test        r5d, 1
54
    je        .loop
55
    movd         m0, [r1]
56
    movd         m1, [r2]
57
    add          r1, r4
58
    add          r2, 4
59
    pavgb        m0, m1
60
    OP           m0, [r0], m3
61
    add          r0, r3
62
    dec         r5d
63
.loop:
64
    mova         m0, [r1]
65
    mova         m1, [r1+r4]
66
    lea          r1, [r1+2*r4]
67
    pavgb        m0, [r2]
68
    pavgb        m1, [r2+4]
69
    OP           m0, [r0], m3
70
    OP           m1, [r0+r3], m3
71
    lea          r0, [r0+2*r3]
72
    mova         m0, [r1]
73
    mova         m1, [r1+r4]
74
    lea          r1, [r1+2*r4]
75
    pavgb        m0, [r2+8]
76
    pavgb        m1, [r2+12]
77
    OP           m0, [r0], m3
78
    OP           m1, [r0+r3], m3
79
    lea          r0, [r0+2*r3]
80
    add          r2, 16
81
    sub         r5d, 4
82
    jne       .loop
83
    REP_RET
84
%endmacro
85
 
86
INIT_MMX mmxext
87
PIXELS4_L2 put
88
PIXELS4_L2 avg
89
 
90
; void pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
91
%macro PIXELS8_L2 1
92
%define OP op_%1
93
cglobal %1_pixels8_l2, 6,6
94
    movsxdifnidn r3, r3d
95
    movsxdifnidn r4, r4d
96
    test        r5d, 1
97
    je        .loop
98
    mova         m0, [r1]
99
    mova         m1, [r2]
100
    add          r1, r4
101
    add          r2, 8
102
    pavgb        m0, m1
103
    OP           m0, [r0]
104
    add          r0, r3
105
    dec         r5d
106
.loop:
107
    mova         m0, [r1]
108
    mova         m1, [r1+r4]
109
    lea          r1, [r1+2*r4]
110
    pavgb        m0, [r2]
111
    pavgb        m1, [r2+8]
112
    OP           m0, [r0]
113
    OP           m1, [r0+r3]
114
    lea          r0, [r0+2*r3]
115
    mova         m0, [r1]
116
    mova         m1, [r1+r4]
117
    lea          r1, [r1+2*r4]
118
    pavgb        m0, [r2+16]
119
    pavgb        m1, [r2+24]
120
    OP           m0, [r0]
121
    OP           m1, [r0+r3]
122
    lea          r0, [r0+2*r3]
123
    add          r2, 32
124
    sub         r5d, 4
125
    jne       .loop
126
    REP_RET
127
%endmacro
128
 
129
INIT_MMX mmxext
130
PIXELS8_L2 put
131
PIXELS8_L2 avg
132
 
133
; void pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
134
%macro PIXELS16_L2 1
135
%define OP op_%1
136
cglobal %1_pixels16_l2, 6,6
137
    movsxdifnidn r3, r3d
138
    movsxdifnidn r4, r4d
139
    test        r5d, 1
140
    je        .loop
141
    mova         m0, [r1]
142
    mova         m1, [r1+8]
143
    pavgb        m0, [r2]
144
    pavgb        m1, [r2+8]
145
    add          r1, r4
146
    add          r2, 16
147
    OP           m0, [r0]
148
    OP           m1, [r0+8]
149
    add          r0, r3
150
    dec         r5d
151
.loop:
152
    mova         m0, [r1]
153
    mova         m1, [r1+8]
154
    add          r1, r4
155
    pavgb        m0, [r2]
156
    pavgb        m1, [r2+8]
157
    OP           m0, [r0]
158
    OP           m1, [r0+8]
159
    add          r0, r3
160
    mova         m0, [r1]
161
    mova         m1, [r1+8]
162
    add          r1, r4
163
    pavgb        m0, [r2+16]
164
    pavgb        m1, [r2+24]
165
    OP           m0, [r0]
166
    OP           m1, [r0+8]
167
    add          r0, r3
168
    add          r2, 32
169
    sub         r5d, 2
170
    jne       .loop
171
    REP_RET
172
%endmacro
173
 
174
INIT_MMX mmxext
175
PIXELS16_L2 put
176
PIXELS16_L2 avg