Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (c) 2003 Michael Niedermayer 
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
 */
20
 
21
 
22
#include "libavutil/attributes.h"
23
#include "libavutil/cpu.h"
24
#include "libavutil/mem.h"
25
#include "libavutil/x86/asm.h"
26
#include "libavfilter/vf_spp.h"
27
 
28
#if HAVE_MMX_INLINE
29
static void hardthresh_mmx(int16_t dst[64], const int16_t src[64],
30
                           int qp, const uint8_t *permutation)
31
{
32
    int bias = 0; //FIXME
33
    unsigned int threshold1;
34
 
35
    threshold1 = qp * ((1<<4) - bias) - 1;
36
 
37
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3)    \
38
    "movq " #src0 ", %%mm0      \n"                                     \
39
    "movq " #src1 ", %%mm1      \n"                                     \
40
    "movq " #src2 ", %%mm2      \n"                                     \
41
    "movq " #src3 ", %%mm3      \n"                                     \
42
    "psubw %%mm4, %%mm0         \n"                                     \
43
    "psubw %%mm4, %%mm1         \n"                                     \
44
    "psubw %%mm4, %%mm2         \n"                                     \
45
    "psubw %%mm4, %%mm3         \n"                                     \
46
    "paddusw %%mm5, %%mm0       \n"                                     \
47
    "paddusw %%mm5, %%mm1       \n"                                     \
48
    "paddusw %%mm5, %%mm2       \n"                                     \
49
    "paddusw %%mm5, %%mm3       \n"                                     \
50
    "paddw %%mm6, %%mm0         \n"                                     \
51
    "paddw %%mm6, %%mm1         \n"                                     \
52
    "paddw %%mm6, %%mm2         \n"                                     \
53
    "paddw %%mm6, %%mm3         \n"                                     \
54
    "psubusw %%mm6, %%mm0       \n"                                     \
55
    "psubusw %%mm6, %%mm1       \n"                                     \
56
    "psubusw %%mm6, %%mm2       \n"                                     \
57
    "psubusw %%mm6, %%mm3       \n"                                     \
58
    "psraw $3, %%mm0            \n"                                     \
59
    "psraw $3, %%mm1            \n"                                     \
60
    "psraw $3, %%mm2            \n"                                     \
61
    "psraw $3, %%mm3            \n"                                     \
62
                                                                        \
63
    "movq %%mm0, %%mm7          \n"                                     \
64
    "punpcklwd %%mm2, %%mm0     \n" /*A*/                               \
65
    "punpckhwd %%mm2, %%mm7     \n" /*C*/                               \
66
    "movq %%mm1, %%mm2          \n"                                     \
67
    "punpcklwd %%mm3, %%mm1     \n" /*B*/                               \
68
    "punpckhwd %%mm3, %%mm2     \n" /*D*/                               \
69
    "movq %%mm0, %%mm3          \n"                                     \
70
    "punpcklwd %%mm1, %%mm0     \n" /*A*/                               \
71
    "punpckhwd %%mm7, %%mm3     \n" /*C*/                               \
72
    "punpcklwd %%mm2, %%mm7     \n" /*B*/                               \
73
    "punpckhwd %%mm2, %%mm1     \n" /*D*/                               \
74
                                                                        \
75
    "movq %%mm0, " #dst0 "      \n"                                     \
76
    "movq %%mm7, " #dst1 "      \n"                                     \
77
    "movq %%mm3, " #dst2 "      \n"                                     \
78
    "movq %%mm1, " #dst3 "      \n"
79
 
80
    __asm__ volatile(
81
        "movd %2, %%mm4             \n"
82
        "movd %3, %%mm5             \n"
83
        "movd %4, %%mm6             \n"
84
        "packssdw %%mm4, %%mm4      \n"
85
        "packssdw %%mm5, %%mm5      \n"
86
        "packssdw %%mm6, %%mm6      \n"
87
        "packssdw %%mm4, %%mm4      \n"
88
        "packssdw %%mm5, %%mm5      \n"
89
        "packssdw %%mm6, %%mm6      \n"
90
        REQUANT_CORE(  (%1),  8(%1), 16(%1), 24(%1),  (%0), 8(%0), 64(%0), 72(%0))
91
        REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
92
        REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
93
        REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
94
        : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
95
    );
96
    dst[0] = (src[0] + 4) >> 3;
97
}
98
 
99
static void softthresh_mmx(int16_t dst[64], const int16_t src[64],
100
                           int qp, const uint8_t *permutation)
101
{
102
    int bias = 0; //FIXME
103
    unsigned int threshold1;
104
 
105
    threshold1 = qp*((1<<4) - bias) - 1;
106
 
107
#undef REQUANT_CORE
108
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3)    \
109
    "movq " #src0 ", %%mm0      \n"                                     \
110
    "movq " #src1 ", %%mm1      \n"                                     \
111
    "pxor %%mm6, %%mm6          \n"                                     \
112
    "pxor %%mm7, %%mm7          \n"                                     \
113
    "pcmpgtw %%mm0, %%mm6       \n"                                     \
114
    "pcmpgtw %%mm1, %%mm7       \n"                                     \
115
    "pxor %%mm6, %%mm0          \n"                                     \
116
    "pxor %%mm7, %%mm1          \n"                                     \
117
    "psubusw %%mm4, %%mm0       \n"                                     \
118
    "psubusw %%mm4, %%mm1       \n"                                     \
119
    "pxor %%mm6, %%mm0          \n"                                     \
120
    "pxor %%mm7, %%mm1          \n"                                     \
121
    "movq " #src2 ", %%mm2      \n"                                     \
122
    "movq " #src3 ", %%mm3      \n"                                     \
123
    "pxor %%mm6, %%mm6          \n"                                     \
124
    "pxor %%mm7, %%mm7          \n"                                     \
125
    "pcmpgtw %%mm2, %%mm6       \n"                                     \
126
    "pcmpgtw %%mm3, %%mm7       \n"                                     \
127
    "pxor %%mm6, %%mm2          \n"                                     \
128
    "pxor %%mm7, %%mm3          \n"                                     \
129
    "psubusw %%mm4, %%mm2       \n"                                     \
130
    "psubusw %%mm4, %%mm3       \n"                                     \
131
    "pxor %%mm6, %%mm2          \n"                                     \
132
    "pxor %%mm7, %%mm3          \n"                                     \
133
                                                                        \
134
    "paddsw %%mm5, %%mm0        \n"                                     \
135
    "paddsw %%mm5, %%mm1        \n"                                     \
136
    "paddsw %%mm5, %%mm2        \n"                                     \
137
    "paddsw %%mm5, %%mm3        \n"                                     \
138
    "psraw $3, %%mm0            \n"                                     \
139
    "psraw $3, %%mm1            \n"                                     \
140
    "psraw $3, %%mm2            \n"                                     \
141
    "psraw $3, %%mm3            \n"                                     \
142
                                                                        \
143
    "movq %%mm0, %%mm7          \n"                                     \
144
    "punpcklwd %%mm2, %%mm0     \n" /*A*/                               \
145
    "punpckhwd %%mm2, %%mm7     \n" /*C*/                               \
146
    "movq %%mm1, %%mm2          \n"                                     \
147
    "punpcklwd %%mm3, %%mm1     \n" /*B*/                               \
148
    "punpckhwd %%mm3, %%mm2     \n" /*D*/                               \
149
    "movq %%mm0, %%mm3          \n"                                     \
150
    "punpcklwd %%mm1, %%mm0     \n" /*A*/                               \
151
    "punpckhwd %%mm7, %%mm3     \n" /*C*/                               \
152
    "punpcklwd %%mm2, %%mm7     \n" /*B*/                               \
153
    "punpckhwd %%mm2, %%mm1     \n" /*D*/                               \
154
                                                                        \
155
    "movq %%mm0, " #dst0 "      \n"                                     \
156
    "movq %%mm7, " #dst1 "      \n"                                     \
157
    "movq %%mm3, " #dst2 "      \n"                                     \
158
    "movq %%mm1, " #dst3 "      \n"
159
 
160
    __asm__ volatile(
161
        "movd %2, %%mm4             \n"
162
        "movd %3, %%mm5             \n"
163
        "packssdw %%mm4, %%mm4      \n"
164
        "packssdw %%mm5, %%mm5      \n"
165
        "packssdw %%mm4, %%mm4      \n"
166
        "packssdw %%mm5, %%mm5      \n"
167
        REQUANT_CORE(  (%1),  8(%1), 16(%1), 24(%1),  (%0), 8(%0), 64(%0), 72(%0))
168
        REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
169
        REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
170
        REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
171
        : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed?
172
    );
173
 
174
    dst[0] = (src[0] + 4) >> 3;
175
}
176
 
177
static void store_slice_mmx(uint8_t *dst, const int16_t *src,
178
                            int dst_stride, int src_stride,
179
                            int width, int height, int log2_scale,
180
                            const uint8_t dither[8][8])
181
{
182
    int y;
183
 
184
    for (y = 0; y < height; y++) {
185
        uint8_t *dst1 = dst;
186
        const int16_t *src1 = src;
187
        __asm__ volatile(
188
            "movq (%3), %%mm3           \n"
189
            "movq (%3), %%mm4           \n"
190
            "movd %4, %%mm2             \n"
191
            "pxor %%mm0, %%mm0          \n"
192
            "punpcklbw %%mm0, %%mm3     \n"
193
            "punpckhbw %%mm0, %%mm4     \n"
194
            "psraw %%mm2, %%mm3         \n"
195
            "psraw %%mm2, %%mm4         \n"
196
            "movd %5, %%mm2             \n"
197
            "1:                         \n"
198
            "movq (%0), %%mm0           \n"
199
            "movq 8(%0), %%mm1          \n"
200
            "paddw %%mm3, %%mm0         \n"
201
            "paddw %%mm4, %%mm1         \n"
202
            "psraw %%mm2, %%mm0         \n"
203
            "psraw %%mm2, %%mm1         \n"
204
            "packuswb %%mm1, %%mm0      \n"
205
            "movq %%mm0, (%1)           \n"
206
            "add $16, %0                \n"
207
            "add $8, %1                 \n"
208
            "cmp %2, %1                 \n"
209
            " jb 1b                     \n"
210
            : "+r" (src1), "+r"(dst1)
211
            : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(MAX_LEVEL - log2_scale)
212
        );
213
        src += src_stride;
214
        dst += dst_stride;
215
    }
216
}
217
 
218
#endif /* HAVE_MMX_INLINE */
219
 
220
av_cold void ff_spp_init_x86(SPPContext *s)
221
{
222
#if HAVE_MMX_INLINE
223
    int cpu_flags = av_get_cpu_flags();
224
 
225
    if (cpu_flags & AV_CPU_FLAG_MMX) {
226
        s->store_slice = store_slice_mmx;
227
        switch (s->mode) {
228
        case 0: s->requantize = hardthresh_mmx; break;
229
        case 1: s->requantize = softthresh_mmx; break;
230
        }
231
    }
232
#endif
233
}