Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * DSP utils mmx functions are compiled twice for rnd/no_rnd
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2003-2004 Michael Niedermayer 
5
 *
6
 * MMX optimization by Nick Kurshev 
7
 * mostly rewritten by Michael Niedermayer 
8
 * and improved by Zdenek Kabelac 
9
 *
10
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or
13
 * modify it under the terms of the GNU Lesser General Public
14
 * License as published by the Free Software Foundation; either
15
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
 */
26
 
27
#include 
28
#include 
29
 
30
// put_pixels
31
STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
32
                                  ptrdiff_t line_size, int h)
33
{
34
    MOVQ_ZERO(mm7);
35
    SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
36
    __asm__ volatile(
37
        "movq   (%1), %%mm0             \n\t"
38
        "movq   1(%1), %%mm4            \n\t"
39
        "movq   %%mm0, %%mm1            \n\t"
40
        "movq   %%mm4, %%mm5            \n\t"
41
        "punpcklbw %%mm7, %%mm0         \n\t"
42
        "punpcklbw %%mm7, %%mm4         \n\t"
43
        "punpckhbw %%mm7, %%mm1         \n\t"
44
        "punpckhbw %%mm7, %%mm5         \n\t"
45
        "paddusw %%mm0, %%mm4           \n\t"
46
        "paddusw %%mm1, %%mm5           \n\t"
47
        "xor    %%"REG_a", %%"REG_a"    \n\t"
48
        "add    %3, %1                  \n\t"
49
        ".p2align 3                     \n\t"
50
        "1:                             \n\t"
51
        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
52
        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
53
        "movq   %%mm0, %%mm1            \n\t"
54
        "movq   %%mm2, %%mm3            \n\t"
55
        "punpcklbw %%mm7, %%mm0         \n\t"
56
        "punpcklbw %%mm7, %%mm2         \n\t"
57
        "punpckhbw %%mm7, %%mm1         \n\t"
58
        "punpckhbw %%mm7, %%mm3         \n\t"
59
        "paddusw %%mm2, %%mm0           \n\t"
60
        "paddusw %%mm3, %%mm1           \n\t"
61
        "paddusw %%mm6, %%mm4           \n\t"
62
        "paddusw %%mm6, %%mm5           \n\t"
63
        "paddusw %%mm0, %%mm4           \n\t"
64
        "paddusw %%mm1, %%mm5           \n\t"
65
        "psrlw  $2, %%mm4               \n\t"
66
        "psrlw  $2, %%mm5               \n\t"
67
        "packuswb  %%mm5, %%mm4         \n\t"
68
        "movq   %%mm4, (%2, %%"REG_a")  \n\t"
69
        "add    %3, %%"REG_a"           \n\t"
70
 
71
        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
72
        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
73
        "movq   %%mm2, %%mm3            \n\t"
74
        "movq   %%mm4, %%mm5            \n\t"
75
        "punpcklbw %%mm7, %%mm2         \n\t"
76
        "punpcklbw %%mm7, %%mm4         \n\t"
77
        "punpckhbw %%mm7, %%mm3         \n\t"
78
        "punpckhbw %%mm7, %%mm5         \n\t"
79
        "paddusw %%mm2, %%mm4           \n\t"
80
        "paddusw %%mm3, %%mm5           \n\t"
81
        "paddusw %%mm6, %%mm0           \n\t"
82
        "paddusw %%mm6, %%mm1           \n\t"
83
        "paddusw %%mm4, %%mm0           \n\t"
84
        "paddusw %%mm5, %%mm1           \n\t"
85
        "psrlw  $2, %%mm0               \n\t"
86
        "psrlw  $2, %%mm1               \n\t"
87
        "packuswb  %%mm1, %%mm0         \n\t"
88
        "movq   %%mm0, (%2, %%"REG_a")  \n\t"
89
        "add    %3, %%"REG_a"           \n\t"
90
 
91
        "subl   $2, %0                  \n\t"
92
        "jnz    1b                      \n\t"
93
        :"+g"(h), "+S"(pixels)
94
        :"D"(block), "r"((x86_reg)line_size)
95
        :REG_a, "memory");
96
}
97
 
98
// avg_pixels
99
// this routine is 'slightly' suboptimal but mostly unused
100
STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
101
                                  ptrdiff_t line_size, int h)
102
{
103
    MOVQ_ZERO(mm7);
104
    SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
105
    __asm__ volatile(
106
        "movq   (%1), %%mm0             \n\t"
107
        "movq   1(%1), %%mm4            \n\t"
108
        "movq   %%mm0, %%mm1            \n\t"
109
        "movq   %%mm4, %%mm5            \n\t"
110
        "punpcklbw %%mm7, %%mm0         \n\t"
111
        "punpcklbw %%mm7, %%mm4         \n\t"
112
        "punpckhbw %%mm7, %%mm1         \n\t"
113
        "punpckhbw %%mm7, %%mm5         \n\t"
114
        "paddusw %%mm0, %%mm4           \n\t"
115
        "paddusw %%mm1, %%mm5           \n\t"
116
        "xor    %%"REG_a", %%"REG_a"    \n\t"
117
        "add    %3, %1                  \n\t"
118
        ".p2align 3                     \n\t"
119
        "1:                             \n\t"
120
        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
121
        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
122
        "movq   %%mm0, %%mm1            \n\t"
123
        "movq   %%mm2, %%mm3            \n\t"
124
        "punpcklbw %%mm7, %%mm0         \n\t"
125
        "punpcklbw %%mm7, %%mm2         \n\t"
126
        "punpckhbw %%mm7, %%mm1         \n\t"
127
        "punpckhbw %%mm7, %%mm3         \n\t"
128
        "paddusw %%mm2, %%mm0           \n\t"
129
        "paddusw %%mm3, %%mm1           \n\t"
130
        "paddusw %%mm6, %%mm4           \n\t"
131
        "paddusw %%mm6, %%mm5           \n\t"
132
        "paddusw %%mm0, %%mm4           \n\t"
133
        "paddusw %%mm1, %%mm5           \n\t"
134
        "psrlw  $2, %%mm4               \n\t"
135
        "psrlw  $2, %%mm5               \n\t"
136
                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
137
        "packuswb  %%mm5, %%mm4         \n\t"
138
                "pcmpeqd %%mm2, %%mm2   \n\t"
139
                "paddb %%mm2, %%mm2     \n\t"
140
                PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2)
141
                "movq   %%mm5, (%2, %%"REG_a")  \n\t"
142
        "add    %3, %%"REG_a"                \n\t"
143
 
144
        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
145
        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
146
        "movq   %%mm2, %%mm3            \n\t"
147
        "movq   %%mm4, %%mm5            \n\t"
148
        "punpcklbw %%mm7, %%mm2         \n\t"
149
        "punpcklbw %%mm7, %%mm4         \n\t"
150
        "punpckhbw %%mm7, %%mm3         \n\t"
151
        "punpckhbw %%mm7, %%mm5         \n\t"
152
        "paddusw %%mm2, %%mm4           \n\t"
153
        "paddusw %%mm3, %%mm5           \n\t"
154
        "paddusw %%mm6, %%mm0           \n\t"
155
        "paddusw %%mm6, %%mm1           \n\t"
156
        "paddusw %%mm4, %%mm0           \n\t"
157
        "paddusw %%mm5, %%mm1           \n\t"
158
        "psrlw  $2, %%mm0               \n\t"
159
        "psrlw  $2, %%mm1               \n\t"
160
                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
161
        "packuswb  %%mm1, %%mm0         \n\t"
162
                "pcmpeqd %%mm2, %%mm2   \n\t"
163
                "paddb %%mm2, %%mm2     \n\t"
164
                PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2)
165
                "movq   %%mm1, (%2, %%"REG_a")  \n\t"
166
        "add    %3, %%"REG_a"           \n\t"
167
 
168
        "subl   $2, %0                  \n\t"
169
        "jnz    1b                      \n\t"
170
        :"+g"(h), "+S"(pixels)
171
        :"D"(block), "r"((x86_reg)line_size)
172
        :REG_a, "memory");
173
}