Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * MMX-optimized avg/put pixel routines |
||
3 | * |
||
4 | * Copyright (c) 2000, 2001 Fabrice Bellard |
||
5 | * Copyright (c) 2002-2004 Michael Niedermayer |
||
6 | * |
||
7 | * This file is part of FFmpeg. |
||
8 | * |
||
9 | * FFmpeg is free software; you can redistribute it and/or |
||
10 | * modify it under the terms of the GNU Lesser General Public |
||
11 | * License as published by the Free Software Foundation; either |
||
12 | * version 2.1 of the License, or (at your option) any later version. |
||
13 | * |
||
14 | * FFmpeg is distributed in the hope that it will be useful, |
||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
17 | * Lesser General Public License for more details. |
||
18 | * |
||
19 | * You should have received a copy of the GNU Lesser General Public |
||
20 | * License along with FFmpeg; if not, write to the Free Software |
||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
22 | */ |
||
23 | |||
24 | #include |
||
25 | #include |
||
26 | |||
27 | #include "config.h" |
||
28 | #include "dsputil_x86.h" |
||
29 | |||
30 | #if HAVE_MMX_INLINE |
||
31 | |||
32 | // in case more speed is needed - unrolling would certainly help |
||
33 | void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels, |
||
34 | ptrdiff_t line_size, int h) |
||
35 | { |
||
36 | MOVQ_BFE(mm6); |
||
37 | JUMPALIGN(); |
||
38 | do { |
||
39 | __asm__ volatile( |
||
40 | "movq %0, %%mm0 \n\t" |
||
41 | "movq %1, %%mm1 \n\t" |
||
42 | PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6) |
||
43 | "movq %%mm2, %0 \n\t" |
||
44 | :"+m"(*block) |
||
45 | :"m"(*pixels) |
||
46 | :"memory"); |
||
47 | pixels += line_size; |
||
48 | block += line_size; |
||
49 | } |
||
50 | while (--h); |
||
51 | } |
||
52 | |||
53 | void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, |
||
54 | ptrdiff_t line_size, int h) |
||
55 | { |
||
56 | MOVQ_BFE(mm6); |
||
57 | JUMPALIGN(); |
||
58 | do { |
||
59 | __asm__ volatile( |
||
60 | "movq %0, %%mm0 \n\t" |
||
61 | "movq %1, %%mm1 \n\t" |
||
62 | PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6) |
||
63 | "movq %%mm2, %0 \n\t" |
||
64 | "movq 8%0, %%mm0 \n\t" |
||
65 | "movq 8%1, %%mm1 \n\t" |
||
66 | PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6) |
||
67 | "movq %%mm2, 8%0 \n\t" |
||
68 | :"+m"(*block) |
||
69 | :"m"(*pixels) |
||
70 | :"memory"); |
||
71 | pixels += line_size; |
||
72 | block += line_size; |
||
73 | } |
||
74 | while (--h); |
||
75 | } |
||
76 | |||
77 | void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, |
||
78 | ptrdiff_t line_size, int h) |
||
79 | { |
||
80 | __asm__ volatile ( |
||
81 | "lea (%3, %3), %%"REG_a" \n\t" |
||
82 | ".p2align 3 \n\t" |
||
83 | "1: \n\t" |
||
84 | "movq (%1 ), %%mm0 \n\t" |
||
85 | "movq (%1, %3), %%mm1 \n\t" |
||
86 | "movq %%mm0, (%2) \n\t" |
||
87 | "movq %%mm1, (%2, %3) \n\t" |
||
88 | "add %%"REG_a", %1 \n\t" |
||
89 | "add %%"REG_a", %2 \n\t" |
||
90 | "movq (%1 ), %%mm0 \n\t" |
||
91 | "movq (%1, %3), %%mm1 \n\t" |
||
92 | "movq %%mm0, (%2) \n\t" |
||
93 | "movq %%mm1, (%2, %3) \n\t" |
||
94 | "add %%"REG_a", %1 \n\t" |
||
95 | "add %%"REG_a", %2 \n\t" |
||
96 | "subl $4, %0 \n\t" |
||
97 | "jnz 1b \n\t" |
||
98 | : "+g"(h), "+r"(pixels), "+r"(block) |
||
99 | : "r"((x86_reg)line_size) |
||
100 | : "%"REG_a, "memory" |
||
101 | ); |
||
102 | } |
||
103 | |||
104 | void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, |
||
105 | ptrdiff_t line_size, int h) |
||
106 | { |
||
107 | __asm__ volatile ( |
||
108 | "lea (%3, %3), %%"REG_a" \n\t" |
||
109 | ".p2align 3 \n\t" |
||
110 | "1: \n\t" |
||
111 | "movq (%1 ), %%mm0 \n\t" |
||
112 | "movq 8(%1 ), %%mm4 \n\t" |
||
113 | "movq (%1, %3), %%mm1 \n\t" |
||
114 | "movq 8(%1, %3), %%mm5 \n\t" |
||
115 | "movq %%mm0, (%2) \n\t" |
||
116 | "movq %%mm4, 8(%2) \n\t" |
||
117 | "movq %%mm1, (%2, %3) \n\t" |
||
118 | "movq %%mm5, 8(%2, %3) \n\t" |
||
119 | "add %%"REG_a", %1 \n\t" |
||
120 | "add %%"REG_a", %2 \n\t" |
||
121 | "movq (%1 ), %%mm0 \n\t" |
||
122 | "movq 8(%1 ), %%mm4 \n\t" |
||
123 | "movq (%1, %3), %%mm1 \n\t" |
||
124 | "movq 8(%1, %3), %%mm5 \n\t" |
||
125 | "movq %%mm0, (%2) \n\t" |
||
126 | "movq %%mm4, 8(%2) \n\t" |
||
127 | "movq %%mm1, (%2, %3) \n\t" |
||
128 | "movq %%mm5, 8(%2, %3) \n\t" |
||
129 | "add %%"REG_a", %1 \n\t" |
||
130 | "add %%"REG_a", %2 \n\t" |
||
131 | "subl $4, %0 \n\t" |
||
132 | "jnz 1b \n\t" |
||
133 | : "+g"(h), "+r"(pixels), "+r"(block) |
||
134 | : "r"((x86_reg)line_size) |
||
135 | : "%"REG_a, "memory" |
||
136 | ); |
||
137 | } |
||
138 | |||
139 | #endif /* HAVE_MMX_INLINE */ |