Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner 
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
 
22
#include "libavutil/attributes.h"
23
#include "libavcodec/hpeldsp.h"
24
#include "hpeldsp_alpha.h"
25
#include "asm.h"
26
 
27
static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
28
{
29
    return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
30
}
31
 
32
static inline uint64_t avg2(uint64_t a, uint64_t b)
33
{
34
    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
35
}
36
 
37
#if 0
38
/* The XY2 routines basically utilize this scheme, but reuse parts in
39
   each iteration.  */
40
static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
41
{
42
    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
43
                + ((l2 & ~BYTE_VEC(0x03)) >> 2)
44
                + ((l3 & ~BYTE_VEC(0x03)) >> 2)
45
                + ((l4 & ~BYTE_VEC(0x03)) >> 2);
46
    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
47
                    + (l2 & BYTE_VEC(0x03))
48
                    + (l3 & BYTE_VEC(0x03))
49
                    + (l4 & BYTE_VEC(0x03))
50
                    + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
51
    return r1 + r2;
52
}
53
#endif
54
 
55
#define OP(LOAD, STORE)                         \
56
    do {                                        \
57
        STORE(LOAD(pixels), block);             \
58
        pixels += line_size;                    \
59
        block += line_size;                     \
60
    } while (--h)
61
 
62
#define OP_X2(LOAD, STORE)                                      \
63
    do {                                                        \
64
        uint64_t pix1, pix2;                                    \
65
                                                                \
66
        pix1 = LOAD(pixels);                                    \
67
        pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);        \
68
        STORE(AVG2(pix1, pix2), block);                         \
69
        pixels += line_size;                                    \
70
        block += line_size;                                     \
71
    } while (--h)
72
 
73
#define OP_Y2(LOAD, STORE)                      \
74
    do {                                        \
75
        uint64_t pix = LOAD(pixels);            \
76
        do {                                    \
77
            uint64_t next_pix;                  \
78
                                                \
79
            pixels += line_size;                \
80
            next_pix = LOAD(pixels);            \
81
            STORE(AVG2(pix, next_pix), block);  \
82
            block += line_size;                 \
83
            pix = next_pix;                     \
84
        } while (--h);                          \
85
    } while (0)
86
 
87
#define OP_XY2(LOAD, STORE)                                                 \
88
    do {                                                                    \
89
        uint64_t pix1 = LOAD(pixels);                                       \
90
        uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);           \
91
        uint64_t pix_l = (pix1 & BYTE_VEC(0x03))                            \
92
                       + (pix2 & BYTE_VEC(0x03));                           \
93
        uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2)                    \
94
                       + ((pix2 & ~BYTE_VEC(0x03)) >> 2);                   \
95
                                                                            \
96
        do {                                                                \
97
            uint64_t npix1, npix2;                                          \
98
            uint64_t npix_l, npix_h;                                        \
99
            uint64_t avg;                                                   \
100
                                                                            \
101
            pixels += line_size;                                            \
102
            npix1 = LOAD(pixels);                                           \
103
            npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56);              \
104
            npix_l = (npix1 & BYTE_VEC(0x03))                               \
105
                   + (npix2 & BYTE_VEC(0x03));                              \
106
            npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2)                       \
107
                   + ((npix2 & ~BYTE_VEC(0x03)) >> 2);                      \
108
            avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
109
                + pix_h + npix_h;                                           \
110
            STORE(avg, block);                                              \
111
                                                                            \
112
            block += line_size;                                             \
113
            pix_l = npix_l;                                                 \
114
            pix_h = npix_h;                                                 \
115
        } while (--h);                                                      \
116
    } while (0)
117
 
118
#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE)                                \
119
static void OPNAME ## _pixels ## SUFF ## _axp                               \
120
        (uint8_t *restrict block, const uint8_t *restrict pixels,           \
121
         ptrdiff_t line_size, int h)                                        \
122
{                                                                           \
123
    if ((size_t) pixels & 0x7) {                                            \
124
        OPKIND(uldq, STORE);                                                \
125
    } else {                                                                \
126
        OPKIND(ldq, STORE);                                                 \
127
    }                                                                       \
128
}                                                                           \
129
                                                                            \
130
static void OPNAME ## _pixels16 ## SUFF ## _axp                             \
131
        (uint8_t *restrict block, const uint8_t *restrict pixels,           \
132
         ptrdiff_t line_size, int h)                                        \
133
{                                                                           \
134
    OPNAME ## _pixels ## SUFF ## _axp(block,     pixels,     line_size, h); \
135
    OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
136
}
137
 
138
#define PIXOP(OPNAME, STORE)                    \
139
    MAKE_OP(OPNAME, ,     OP,     STORE)        \
140
    MAKE_OP(OPNAME, _x2,  OP_X2,  STORE)        \
141
    MAKE_OP(OPNAME, _y2,  OP_Y2,  STORE)        \
142
    MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
143
 
144
/* Rounding primitives.  */
145
#define AVG2 avg2
146
#define AVG4 avg4
147
#define AVG4_ROUNDER BYTE_VEC(0x02)
148
#define STORE(l, b) stq(l, b)
149
PIXOP(put, STORE);
150
 
151
#undef STORE
152
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
153
PIXOP(avg, STORE);
154
 
155
/* Not rounding primitives.  */
156
#undef AVG2
157
#undef AVG4
158
#undef AVG4_ROUNDER
159
#undef STORE
160
#define AVG2 avg2_no_rnd
161
#define AVG4 avg4_no_rnd
162
#define AVG4_ROUNDER BYTE_VEC(0x01)
163
#define STORE(l, b) stq(l, b)
164
PIXOP(put_no_rnd, STORE);
165
 
166
#undef STORE
167
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
168
PIXOP(avg_no_rnd, STORE);
169
 
170
static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
171
                                 ptrdiff_t line_size, int h)
172
{
173
    put_pixels_axp_asm(block,     pixels,     line_size, h);
174
    put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
175
}
176
 
177
av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
178
{
179
    c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
180
    c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
181
    c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
182
    c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
183
 
184
    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
185
    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
186
    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
187
    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
188
 
189
    c->avg_pixels_tab[0][0] = avg_pixels16_axp;
190
    c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
191
    c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
192
    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
193
 
194
    c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
195
    c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
196
    c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
197
    c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
198
 
199
    c->put_pixels_tab[1][0] = put_pixels_axp_asm;
200
    c->put_pixels_tab[1][1] = put_pixels_x2_axp;
201
    c->put_pixels_tab[1][2] = put_pixels_y2_axp;
202
    c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
203
 
204
    c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
205
    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
206
    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
207
    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
208
 
209
    c->avg_pixels_tab[1][0] = avg_pixels_axp;
210
    c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
211
    c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
212
    c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
213
}