Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * MMX optimized motion estimation |
||
3 | * Copyright (c) 2001 Fabrice Bellard |
||
4 | * Copyright (c) 2002-2004 Michael Niedermayer |
||
5 | * |
||
6 | * mostly by Michael Niedermayer |
||
7 | * |
||
8 | * This file is part of FFmpeg. |
||
9 | * |
||
10 | * FFmpeg is free software; you can redistribute it and/or |
||
11 | * modify it under the terms of the GNU Lesser General Public |
||
12 | * License as published by the Free Software Foundation; either |
||
13 | * version 2.1 of the License, or (at your option) any later version. |
||
14 | * |
||
15 | * FFmpeg is distributed in the hope that it will be useful, |
||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
18 | * Lesser General Public License for more details. |
||
19 | * |
||
20 | * You should have received a copy of the GNU Lesser General Public |
||
21 | * License along with FFmpeg; if not, write to the Free Software |
||
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
23 | */ |
||
24 | |||
25 | #include "libavutil/attributes.h" |
||
26 | #include "libavutil/avassert.h" |
||
27 | #include "libavutil/mem.h" |
||
28 | #include "libavutil/x86/asm.h" |
||
29 | #include "libavutil/x86/cpu.h" |
||
30 | #include "dsputil_x86.h" |
||
31 | |||
32 | #if HAVE_INLINE_ASM |
||
33 | |||
34 | DECLARE_ASM_CONST(8, uint64_t, round_tab)[3]={ |
||
35 | 0x0000000000000000ULL, |
||
36 | 0x0001000100010001ULL, |
||
37 | 0x0002000200020002ULL, |
||
38 | }; |
||
39 | |||
40 | DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; |
||
41 | |||
42 | static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
||
43 | { |
||
44 | x86_reg len= -(x86_reg)stride*h; |
||
45 | __asm__ volatile( |
||
46 | ".p2align 4 \n\t" |
||
47 | "1: \n\t" |
||
48 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
49 | "movq (%2, %%"REG_a"), %%mm2 \n\t" |
||
50 | "movq (%2, %%"REG_a"), %%mm4 \n\t" |
||
51 | "add %3, %%"REG_a" \n\t" |
||
52 | "psubusb %%mm0, %%mm2 \n\t" |
||
53 | "psubusb %%mm4, %%mm0 \n\t" |
||
54 | "movq (%1, %%"REG_a"), %%mm1 \n\t" |
||
55 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
56 | "movq (%2, %%"REG_a"), %%mm5 \n\t" |
||
57 | "psubusb %%mm1, %%mm3 \n\t" |
||
58 | "psubusb %%mm5, %%mm1 \n\t" |
||
59 | "por %%mm2, %%mm0 \n\t" |
||
60 | "por %%mm1, %%mm3 \n\t" |
||
61 | "movq %%mm0, %%mm1 \n\t" |
||
62 | "movq %%mm3, %%mm2 \n\t" |
||
63 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
64 | "punpckhbw %%mm7, %%mm1 \n\t" |
||
65 | "punpcklbw %%mm7, %%mm3 \n\t" |
||
66 | "punpckhbw %%mm7, %%mm2 \n\t" |
||
67 | "paddw %%mm1, %%mm0 \n\t" |
||
68 | "paddw %%mm3, %%mm2 \n\t" |
||
69 | "paddw %%mm2, %%mm0 \n\t" |
||
70 | "paddw %%mm0, %%mm6 \n\t" |
||
71 | "add %3, %%"REG_a" \n\t" |
||
72 | " js 1b \n\t" |
||
73 | : "+a" (len) |
||
74 | : "r" (blk1 - len), "r" (blk2 - len), "r" ((x86_reg)stride) |
||
75 | ); |
||
76 | } |
||
77 | |||
78 | static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2, |
||
79 | int stride, int h) |
||
80 | { |
||
81 | __asm__ volatile( |
||
82 | ".p2align 4 \n\t" |
||
83 | "1: \n\t" |
||
84 | "movq (%1), %%mm0 \n\t" |
||
85 | "movq (%1, %3), %%mm1 \n\t" |
||
86 | "psadbw (%2), %%mm0 \n\t" |
||
87 | "psadbw (%2, %3), %%mm1 \n\t" |
||
88 | "paddw %%mm0, %%mm6 \n\t" |
||
89 | "paddw %%mm1, %%mm6 \n\t" |
||
90 | "lea (%1,%3,2), %1 \n\t" |
||
91 | "lea (%2,%3,2), %2 \n\t" |
||
92 | "sub $2, %0 \n\t" |
||
93 | " jg 1b \n\t" |
||
94 | : "+r" (h), "+r" (blk1), "+r" (blk2) |
||
95 | : "r" ((x86_reg)stride) |
||
96 | ); |
||
97 | } |
||
98 | |||
99 | static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) |
||
100 | { |
||
101 | int ret; |
||
102 | __asm__ volatile( |
||
103 | "pxor %%xmm2, %%xmm2 \n\t" |
||
104 | ".p2align 4 \n\t" |
||
105 | "1: \n\t" |
||
106 | "movdqu (%1), %%xmm0 \n\t" |
||
107 | "movdqu (%1, %4), %%xmm1 \n\t" |
||
108 | "psadbw (%2), %%xmm0 \n\t" |
||
109 | "psadbw (%2, %4), %%xmm1 \n\t" |
||
110 | "paddw %%xmm0, %%xmm2 \n\t" |
||
111 | "paddw %%xmm1, %%xmm2 \n\t" |
||
112 | "lea (%1,%4,2), %1 \n\t" |
||
113 | "lea (%2,%4,2), %2 \n\t" |
||
114 | "sub $2, %0 \n\t" |
||
115 | " jg 1b \n\t" |
||
116 | "movhlps %%xmm2, %%xmm0 \n\t" |
||
117 | "paddw %%xmm0, %%xmm2 \n\t" |
||
118 | "movd %%xmm2, %3 \n\t" |
||
119 | : "+r" (h), "+r" (blk1), "+r" (blk2), "=r"(ret) |
||
120 | : "r" ((x86_reg)stride) |
||
121 | ); |
||
122 | return ret; |
||
123 | } |
||
124 | |||
125 | static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2, |
||
126 | int stride, int h) |
||
127 | { |
||
128 | __asm__ volatile( |
||
129 | ".p2align 4 \n\t" |
||
130 | "1: \n\t" |
||
131 | "movq (%1), %%mm0 \n\t" |
||
132 | "movq (%1, %3), %%mm1 \n\t" |
||
133 | "pavgb 1(%1), %%mm0 \n\t" |
||
134 | "pavgb 1(%1, %3), %%mm1 \n\t" |
||
135 | "psadbw (%2), %%mm0 \n\t" |
||
136 | "psadbw (%2, %3), %%mm1 \n\t" |
||
137 | "paddw %%mm0, %%mm6 \n\t" |
||
138 | "paddw %%mm1, %%mm6 \n\t" |
||
139 | "lea (%1,%3,2), %1 \n\t" |
||
140 | "lea (%2,%3,2), %2 \n\t" |
||
141 | "sub $2, %0 \n\t" |
||
142 | " jg 1b \n\t" |
||
143 | : "+r" (h), "+r" (blk1), "+r" (blk2) |
||
144 | : "r" ((x86_reg)stride) |
||
145 | ); |
||
146 | } |
||
147 | |||
148 | static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2, |
||
149 | int stride, int h) |
||
150 | { |
||
151 | __asm__ volatile( |
||
152 | "movq (%1), %%mm0 \n\t" |
||
153 | "add %3, %1 \n\t" |
||
154 | ".p2align 4 \n\t" |
||
155 | "1: \n\t" |
||
156 | "movq (%1), %%mm1 \n\t" |
||
157 | "movq (%1, %3), %%mm2 \n\t" |
||
158 | "pavgb %%mm1, %%mm0 \n\t" |
||
159 | "pavgb %%mm2, %%mm1 \n\t" |
||
160 | "psadbw (%2), %%mm0 \n\t" |
||
161 | "psadbw (%2, %3), %%mm1 \n\t" |
||
162 | "paddw %%mm0, %%mm6 \n\t" |
||
163 | "paddw %%mm1, %%mm6 \n\t" |
||
164 | "movq %%mm2, %%mm0 \n\t" |
||
165 | "lea (%1,%3,2), %1 \n\t" |
||
166 | "lea (%2,%3,2), %2 \n\t" |
||
167 | "sub $2, %0 \n\t" |
||
168 | " jg 1b \n\t" |
||
169 | : "+r" (h), "+r" (blk1), "+r" (blk2) |
||
170 | : "r" ((x86_reg)stride) |
||
171 | ); |
||
172 | } |
||
173 | |||
174 | static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2, |
||
175 | int stride, int h) |
||
176 | { |
||
177 | __asm__ volatile( |
||
178 | "movq "MANGLE(bone)", %%mm5 \n\t" |
||
179 | "movq (%1), %%mm0 \n\t" |
||
180 | "pavgb 1(%1), %%mm0 \n\t" |
||
181 | "add %3, %1 \n\t" |
||
182 | ".p2align 4 \n\t" |
||
183 | "1: \n\t" |
||
184 | "movq (%1), %%mm1 \n\t" |
||
185 | "movq (%1,%3), %%mm2 \n\t" |
||
186 | "pavgb 1(%1), %%mm1 \n\t" |
||
187 | "pavgb 1(%1,%3), %%mm2 \n\t" |
||
188 | "psubusb %%mm5, %%mm1 \n\t" |
||
189 | "pavgb %%mm1, %%mm0 \n\t" |
||
190 | "pavgb %%mm2, %%mm1 \n\t" |
||
191 | "psadbw (%2), %%mm0 \n\t" |
||
192 | "psadbw (%2,%3), %%mm1 \n\t" |
||
193 | "paddw %%mm0, %%mm6 \n\t" |
||
194 | "paddw %%mm1, %%mm6 \n\t" |
||
195 | "movq %%mm2, %%mm0 \n\t" |
||
196 | "lea (%1,%3,2), %1 \n\t" |
||
197 | "lea (%2,%3,2), %2 \n\t" |
||
198 | "sub $2, %0 \n\t" |
||
199 | " jg 1b \n\t" |
||
200 | : "+r" (h), "+r" (blk1), "+r" (blk2) |
||
201 | : "r" ((x86_reg)stride) |
||
202 | ); |
||
203 | } |
||
204 | |||
205 | static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
||
206 | { |
||
207 | x86_reg len= -(x86_reg)stride*h; |
||
208 | __asm__ volatile( |
||
209 | ".p2align 4 \n\t" |
||
210 | "1: \n\t" |
||
211 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
212 | "movq (%2, %%"REG_a"), %%mm1 \n\t" |
||
213 | "movq (%1, %%"REG_a"), %%mm2 \n\t" |
||
214 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
215 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
216 | "punpcklbw %%mm7, %%mm1 \n\t" |
||
217 | "punpckhbw %%mm7, %%mm2 \n\t" |
||
218 | "punpckhbw %%mm7, %%mm3 \n\t" |
||
219 | "paddw %%mm0, %%mm1 \n\t" |
||
220 | "paddw %%mm2, %%mm3 \n\t" |
||
221 | "movq (%3, %%"REG_a"), %%mm4 \n\t" |
||
222 | "movq (%3, %%"REG_a"), %%mm2 \n\t" |
||
223 | "paddw %%mm5, %%mm1 \n\t" |
||
224 | "paddw %%mm5, %%mm3 \n\t" |
||
225 | "psrlw $1, %%mm1 \n\t" |
||
226 | "psrlw $1, %%mm3 \n\t" |
||
227 | "packuswb %%mm3, %%mm1 \n\t" |
||
228 | "psubusb %%mm1, %%mm4 \n\t" |
||
229 | "psubusb %%mm2, %%mm1 \n\t" |
||
230 | "por %%mm4, %%mm1 \n\t" |
||
231 | "movq %%mm1, %%mm0 \n\t" |
||
232 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
233 | "punpckhbw %%mm7, %%mm1 \n\t" |
||
234 | "paddw %%mm1, %%mm0 \n\t" |
||
235 | "paddw %%mm0, %%mm6 \n\t" |
||
236 | "add %4, %%"REG_a" \n\t" |
||
237 | " js 1b \n\t" |
||
238 | : "+a" (len) |
||
239 | : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((x86_reg)stride) |
||
240 | ); |
||
241 | } |
||
242 | |||
243 | static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
||
244 | { |
||
245 | x86_reg len= -(x86_reg)stride*h; |
||
246 | __asm__ volatile( |
||
247 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
248 | "movq 1(%1, %%"REG_a"), %%mm2 \n\t" |
||
249 | "movq %%mm0, %%mm1 \n\t" |
||
250 | "movq %%mm2, %%mm3 \n\t" |
||
251 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
252 | "punpckhbw %%mm7, %%mm1 \n\t" |
||
253 | "punpcklbw %%mm7, %%mm2 \n\t" |
||
254 | "punpckhbw %%mm7, %%mm3 \n\t" |
||
255 | "paddw %%mm2, %%mm0 \n\t" |
||
256 | "paddw %%mm3, %%mm1 \n\t" |
||
257 | ".p2align 4 \n\t" |
||
258 | "1: \n\t" |
||
259 | "movq (%2, %%"REG_a"), %%mm2 \n\t" |
||
260 | "movq 1(%2, %%"REG_a"), %%mm4 \n\t" |
||
261 | "movq %%mm2, %%mm3 \n\t" |
||
262 | "movq %%mm4, %%mm5 \n\t" |
||
263 | "punpcklbw %%mm7, %%mm2 \n\t" |
||
264 | "punpckhbw %%mm7, %%mm3 \n\t" |
||
265 | "punpcklbw %%mm7, %%mm4 \n\t" |
||
266 | "punpckhbw %%mm7, %%mm5 \n\t" |
||
267 | "paddw %%mm4, %%mm2 \n\t" |
||
268 | "paddw %%mm5, %%mm3 \n\t" |
||
269 | "movq 16+"MANGLE(round_tab)", %%mm5 \n\t" |
||
270 | "paddw %%mm2, %%mm0 \n\t" |
||
271 | "paddw %%mm3, %%mm1 \n\t" |
||
272 | "paddw %%mm5, %%mm0 \n\t" |
||
273 | "paddw %%mm5, %%mm1 \n\t" |
||
274 | "movq (%3, %%"REG_a"), %%mm4 \n\t" |
||
275 | "movq (%3, %%"REG_a"), %%mm5 \n\t" |
||
276 | "psrlw $2, %%mm0 \n\t" |
||
277 | "psrlw $2, %%mm1 \n\t" |
||
278 | "packuswb %%mm1, %%mm0 \n\t" |
||
279 | "psubusb %%mm0, %%mm4 \n\t" |
||
280 | "psubusb %%mm5, %%mm0 \n\t" |
||
281 | "por %%mm4, %%mm0 \n\t" |
||
282 | "movq %%mm0, %%mm4 \n\t" |
||
283 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
284 | "punpckhbw %%mm7, %%mm4 \n\t" |
||
285 | "paddw %%mm0, %%mm6 \n\t" |
||
286 | "paddw %%mm4, %%mm6 \n\t" |
||
287 | "movq %%mm2, %%mm0 \n\t" |
||
288 | "movq %%mm3, %%mm1 \n\t" |
||
289 | "add %4, %%"REG_a" \n\t" |
||
290 | " js 1b \n\t" |
||
291 | : "+a" (len) |
||
292 | : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride) |
||
293 | ); |
||
294 | } |
||
295 | |||
296 | static inline int sum_mmx(void) |
||
297 | { |
||
298 | int ret; |
||
299 | __asm__ volatile( |
||
300 | "movq %%mm6, %%mm0 \n\t" |
||
301 | "psrlq $32, %%mm6 \n\t" |
||
302 | "paddw %%mm0, %%mm6 \n\t" |
||
303 | "movq %%mm6, %%mm0 \n\t" |
||
304 | "psrlq $16, %%mm6 \n\t" |
||
305 | "paddw %%mm0, %%mm6 \n\t" |
||
306 | "movd %%mm6, %0 \n\t" |
||
307 | : "=r" (ret) |
||
308 | ); |
||
309 | return ret&0xFFFF; |
||
310 | } |
||
311 | |||
312 | static inline int sum_mmxext(void) |
||
313 | { |
||
314 | int ret; |
||
315 | __asm__ volatile( |
||
316 | "movd %%mm6, %0 \n\t" |
||
317 | : "=r" (ret) |
||
318 | ); |
||
319 | return ret; |
||
320 | } |
||
321 | |||
322 | static inline void sad8_x2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
||
323 | { |
||
324 | sad8_2_mmx(blk1, blk1+1, blk2, stride, h); |
||
325 | } |
||
326 | static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
||
327 | { |
||
328 | sad8_2_mmx(blk1, blk1+stride, blk2, stride, h); |
||
329 | } |
||
330 | |||
331 | |||
332 | #define PIX_SAD(suf)\ |
||
333 | static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
334 | {\ |
||
335 | av_assert2(h==8);\ |
||
336 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
337 | "pxor %%mm6, %%mm6 \n\t":);\ |
||
338 | \ |
||
339 | sad8_1_ ## suf(blk1, blk2, stride, 8);\ |
||
340 | \ |
||
341 | return sum_ ## suf();\ |
||
342 | }\ |
||
343 | static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
344 | {\ |
||
345 | av_assert2(h==8);\ |
||
346 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
347 | "pxor %%mm6, %%mm6 \n\t"\ |
||
348 | "movq %0, %%mm5 \n\t"\ |
||
349 | :: "m"(round_tab[1]) \ |
||
350 | );\ |
||
351 | \ |
||
352 | sad8_x2a_ ## suf(blk1, blk2, stride, 8);\ |
||
353 | \ |
||
354 | return sum_ ## suf();\ |
||
355 | }\ |
||
356 | \ |
||
357 | static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
358 | {\ |
||
359 | av_assert2(h==8);\ |
||
360 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
361 | "pxor %%mm6, %%mm6 \n\t"\ |
||
362 | "movq %0, %%mm5 \n\t"\ |
||
363 | :: "m"(round_tab[1]) \ |
||
364 | );\ |
||
365 | \ |
||
366 | sad8_y2a_ ## suf(blk1, blk2, stride, 8);\ |
||
367 | \ |
||
368 | return sum_ ## suf();\ |
||
369 | }\ |
||
370 | \ |
||
371 | static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
372 | {\ |
||
373 | av_assert2(h==8);\ |
||
374 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
375 | "pxor %%mm6, %%mm6 \n\t"\ |
||
376 | ::);\ |
||
377 | \ |
||
378 | sad8_4_ ## suf(blk1, blk2, stride, 8);\ |
||
379 | \ |
||
380 | return sum_ ## suf();\ |
||
381 | }\ |
||
382 | \ |
||
383 | static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
384 | {\ |
||
385 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
386 | "pxor %%mm6, %%mm6 \n\t":);\ |
||
387 | \ |
||
388 | sad8_1_ ## suf(blk1 , blk2 , stride, h);\ |
||
389 | sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ |
||
390 | \ |
||
391 | return sum_ ## suf();\ |
||
392 | }\ |
||
393 | static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
394 | {\ |
||
395 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
396 | "pxor %%mm6, %%mm6 \n\t"\ |
||
397 | "movq %0, %%mm5 \n\t"\ |
||
398 | :: "m"(round_tab[1]) \ |
||
399 | );\ |
||
400 | \ |
||
401 | sad8_x2a_ ## suf(blk1 , blk2 , stride, h);\ |
||
402 | sad8_x2a_ ## suf(blk1+8, blk2+8, stride, h);\ |
||
403 | \ |
||
404 | return sum_ ## suf();\ |
||
405 | }\ |
||
406 | static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
407 | {\ |
||
408 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
409 | "pxor %%mm6, %%mm6 \n\t"\ |
||
410 | "movq %0, %%mm5 \n\t"\ |
||
411 | :: "m"(round_tab[1]) \ |
||
412 | );\ |
||
413 | \ |
||
414 | sad8_y2a_ ## suf(blk1 , blk2 , stride, h);\ |
||
415 | sad8_y2a_ ## suf(blk1+8, blk2+8, stride, h);\ |
||
416 | \ |
||
417 | return sum_ ## suf();\ |
||
418 | }\ |
||
419 | static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
||
420 | {\ |
||
421 | __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
||
422 | "pxor %%mm6, %%mm6 \n\t"\ |
||
423 | ::);\ |
||
424 | \ |
||
425 | sad8_4_ ## suf(blk1 , blk2 , stride, h);\ |
||
426 | sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ |
||
427 | \ |
||
428 | return sum_ ## suf();\ |
||
429 | }\ |
||
430 | |||
431 | PIX_SAD(mmx) |
||
432 | PIX_SAD(mmxext) |
||
433 | |||
434 | #endif /* HAVE_INLINE_ASM */ |
||
435 | |||
436 | av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx) |
||
437 | { |
||
438 | #if HAVE_INLINE_ASM |
||
439 | int cpu_flags = av_get_cpu_flags(); |
||
440 | |||
441 | if (INLINE_MMX(cpu_flags)) { |
||
442 | c->pix_abs[0][0] = sad16_mmx; |
||
443 | c->pix_abs[0][1] = sad16_x2_mmx; |
||
444 | c->pix_abs[0][2] = sad16_y2_mmx; |
||
445 | c->pix_abs[0][3] = sad16_xy2_mmx; |
||
446 | c->pix_abs[1][0] = sad8_mmx; |
||
447 | c->pix_abs[1][1] = sad8_x2_mmx; |
||
448 | c->pix_abs[1][2] = sad8_y2_mmx; |
||
449 | c->pix_abs[1][3] = sad8_xy2_mmx; |
||
450 | |||
451 | c->sad[0]= sad16_mmx; |
||
452 | c->sad[1]= sad8_mmx; |
||
453 | } |
||
454 | if (INLINE_MMXEXT(cpu_flags)) { |
||
455 | c->pix_abs[0][0] = sad16_mmxext; |
||
456 | c->pix_abs[1][0] = sad8_mmxext; |
||
457 | |||
458 | c->sad[0] = sad16_mmxext; |
||
459 | c->sad[1] = sad8_mmxext; |
||
460 | |||
461 | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
||
462 | c->pix_abs[0][1] = sad16_x2_mmxext; |
||
463 | c->pix_abs[0][2] = sad16_y2_mmxext; |
||
464 | c->pix_abs[0][3] = sad16_xy2_mmxext; |
||
465 | c->pix_abs[1][1] = sad8_x2_mmxext; |
||
466 | c->pix_abs[1][2] = sad8_y2_mmxext; |
||
467 | c->pix_abs[1][3] = sad8_xy2_mmxext; |
||
468 | } |
||
469 | } |
||
470 | if (INLINE_SSE2(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) { |
||
471 | c->sad[0]= sad16_sse2; |
||
472 | } |
||
473 | #endif /* HAVE_INLINE_ASM */ |
||
474 | } |