Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (C) 2001-2011 Michael Niedermayer 
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
 
21
#ifndef SWSCALE_SWSCALE_INTERNAL_H
22
#define SWSCALE_SWSCALE_INTERNAL_H
23
 
24
#include "config.h"
25
 
26
#if HAVE_ALTIVEC_H
27
#include 
28
#endif
29
 
30
#include "libavutil/avassert.h"
31
#include "libavutil/avutil.h"
32
#include "libavutil/common.h"
33
#include "libavutil/intreadwrite.h"
34
#include "libavutil/log.h"
35
#include "libavutil/pixfmt.h"
36
#include "libavutil/pixdesc.h"
37
 
38
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
39
 
40
#define YUVRGB_TABLE_HEADROOM 128
41
 
42
#define MAX_FILTER_SIZE 256
43
 
44
#define DITHER1XBPP
45
 
46
#if HAVE_BIGENDIAN
47
#define ALT32_CORR (-1)
48
#else
49
#define ALT32_CORR   1
50
#endif
51
 
52
#if ARCH_X86_64
53
#   define APCK_PTR2  8
54
#   define APCK_COEF 16
55
#   define APCK_SIZE 24
56
#else
57
#   define APCK_PTR2  4
58
#   define APCK_COEF  8
59
#   define APCK_SIZE 16
60
#endif
61
 
62
struct SwsContext;
63
 
64
typedef enum SwsDither {
65
    SWS_DITHER_NONE = 0,
66
    SWS_DITHER_AUTO,
67
    SWS_DITHER_BAYER,
68
    SWS_DITHER_ED,
69
    NB_SWS_DITHER,
70
} SwsDither;
71
 
72
typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
73
                       int srcStride[], int srcSliceY, int srcSliceH,
74
                       uint8_t *dst[], int dstStride[]);
75
 
76
/**
77
 * Write one line of horizontally scaled data to planar output
78
 * without any additional vertical scaling (or point-scaling).
79
 *
80
 * @param src     scaled source data, 15bit for 8-10bit output,
81
 *                19-bit for 16bit output (in int32_t)
82
 * @param dest    pointer to the output plane. For >8bit
83
 *                output, this is in uint16_t
84
 * @param dstW    width of destination in pixels
85
 * @param dither  ordered dither array of type int16_t and size 8
86
 * @param offset  Dither offset
87
 */
88
typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
89
                               const uint8_t *dither, int offset);
90
 
91
/**
92
 * Write one line of horizontally scaled data to planar output
93
 * with multi-point vertical scaling between input pixels.
94
 *
95
 * @param filter        vertical luma/alpha scaling coefficients, 12bit [0,4096]
96
 * @param src           scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output,
97
 *                      19-bit for 16bit output (in int32_t)
98
 * @param filterSize    number of vertical input lines to scale
99
 * @param dest          pointer to output plane. For >8bit
100
 *                      output, this is in uint16_t
101
 * @param dstW          width of destination pixels
102
 * @param offset        Dither offset
103
 */
104
typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
105
                               const int16_t **src, uint8_t *dest, int dstW,
106
                               const uint8_t *dither, int offset);
107
 
108
/**
109
 * Write one line of horizontally scaled chroma to interleaved output
110
 * with multi-point vertical scaling between input pixels.
111
 *
112
 * @param c             SWS scaling context
113
 * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
114
 * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
115
 *                      19-bit for 16bit output (in int32_t)
116
 * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
117
 *                      19-bit for 16bit output (in int32_t)
118
 * @param chrFilterSize number of vertical chroma input lines to scale
119
 * @param dest          pointer to the output plane. For >8bit
120
 *                      output, this is in uint16_t
121
 * @param dstW          width of chroma planes
122
 */
123
typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
124
                                    const int16_t *chrFilter,
125
                                    int chrFilterSize,
126
                                    const int16_t **chrUSrc,
127
                                    const int16_t **chrVSrc,
128
                                    uint8_t *dest, int dstW);
129
 
130
/**
131
 * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
132
 * output without any additional vertical scaling (or point-scaling). Note
133
 * that this function may do chroma scaling, see the "uvalpha" argument.
134
 *
135
 * @param c       SWS scaling context
136
 * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
137
 *                19-bit for 16bit output (in int32_t)
138
 * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
139
 *                19-bit for 16bit output (in int32_t)
140
 * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
141
 *                19-bit for 16bit output (in int32_t)
142
 * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
143
 *                19-bit for 16bit output (in int32_t)
144
 * @param dest    pointer to the output plane. For 16bit output, this is
145
 *                uint16_t
146
 * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
147
 *                to write into dest[]
148
 * @param uvalpha chroma scaling coefficient for the second line of chroma
149
 *                pixels, either 2048 or 0. If 0, one chroma input is used
150
 *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
151
 *                is set, it generates 1 output pixel). If 2048, two chroma
152
 *                input pixels should be averaged for 2 output pixels (this
153
 *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
154
 * @param y       vertical line number for this output. This does not need
155
 *                to be used to calculate the offset in the destination,
156
 *                but can be used to generate comfort noise using dithering
157
 *                for some output formats.
158
 */
159
typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc,
160
                               const int16_t *chrUSrc[2],
161
                               const int16_t *chrVSrc[2],
162
                               const int16_t *alpSrc, uint8_t *dest,
163
                               int dstW, int uvalpha, int y);
164
/**
165
 * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
166
 * output by doing bilinear scaling between two input lines.
167
 *
168
 * @param c       SWS scaling context
169
 * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
170
 *                19-bit for 16bit output (in int32_t)
171
 * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
172
 *                19-bit for 16bit output (in int32_t)
173
 * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
174
 *                19-bit for 16bit output (in int32_t)
175
 * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
176
 *                19-bit for 16bit output (in int32_t)
177
 * @param dest    pointer to the output plane. For 16bit output, this is
178
 *                uint16_t
179
 * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
180
 *                to write into dest[]
181
 * @param yalpha  luma/alpha scaling coefficients for the second input line.
182
 *                The first line's coefficients can be calculated by using
183
 *                4096 - yalpha
184
 * @param uvalpha chroma scaling coefficient for the second input line. The
185
 *                first line's coefficients can be calculated by using
186
 *                4096 - uvalpha
187
 * @param y       vertical line number for this output. This does not need
188
 *                to be used to calculate the offset in the destination,
189
 *                but can be used to generate comfort noise using dithering
190
 *                for some output formats.
191
 */
192
typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2],
193
                               const int16_t *chrUSrc[2],
194
                               const int16_t *chrVSrc[2],
195
                               const int16_t *alpSrc[2],
196
                               uint8_t *dest,
197
                               int dstW, int yalpha, int uvalpha, int y);
198
/**
199
 * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
200
 * output by doing multi-point vertical scaling between input pixels.
201
 *
202
 * @param c             SWS scaling context
203
 * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
204
 * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
205
 *                      19-bit for 16bit output (in int32_t)
206
 * @param lumFilterSize number of vertical luma/alpha input lines to scale
207
 * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
208
 * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
209
 *                      19-bit for 16bit output (in int32_t)
210
 * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
211
 *                      19-bit for 16bit output (in int32_t)
212
 * @param chrFilterSize number of vertical chroma input lines to scale
213
 * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
214
 *                      19-bit for 16bit output (in int32_t)
215
 * @param dest          pointer to the output plane. For 16bit output, this is
216
 *                      uint16_t
217
 * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
218
 *                      to write into dest[]
219
 * @param y             vertical line number for this output. This does not need
220
 *                      to be used to calculate the offset in the destination,
221
 *                      but can be used to generate comfort noise using dithering
222
 *                      or some output formats.
223
 */
224
typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter,
225
                               const int16_t **lumSrc, int lumFilterSize,
226
                               const int16_t *chrFilter,
227
                               const int16_t **chrUSrc,
228
                               const int16_t **chrVSrc, int chrFilterSize,
229
                               const int16_t **alpSrc, uint8_t *dest,
230
                               int dstW, int y);
231
 
232
/**
233
 * Write one line of horizontally scaled Y/U/V/A to YUV/RGB
234
 * output by doing multi-point vertical scaling between input pixels.
235
 *
236
 * @param c             SWS scaling context
237
 * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
238
 * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
239
 *                      19-bit for 16bit output (in int32_t)
240
 * @param lumFilterSize number of vertical luma/alpha input lines to scale
241
 * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
242
 * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
243
 *                      19-bit for 16bit output (in int32_t)
244
 * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
245
 *                      19-bit for 16bit output (in int32_t)
246
 * @param chrFilterSize number of vertical chroma input lines to scale
247
 * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
248
 *                      19-bit for 16bit output (in int32_t)
249
 * @param dest          pointer to the output planes. For 16bit output, this is
250
 *                      uint16_t
251
 * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
252
 *                      to write into dest[]
253
 * @param y             vertical line number for this output. This does not need
254
 *                      to be used to calculate the offset in the destination,
255
 *                      but can be used to generate comfort noise using dithering
256
 *                      or some output formats.
257
 */
258
typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter,
259
                            const int16_t **lumSrc, int lumFilterSize,
260
                            const int16_t *chrFilter,
261
                            const int16_t **chrUSrc,
262
                            const int16_t **chrVSrc, int chrFilterSize,
263
                            const int16_t **alpSrc, uint8_t **dest,
264
                            int dstW, int y);
265
 
266
/* This struct should be aligned on at least a 32-byte boundary. */
267
typedef struct SwsContext {
268
    /**
269
     * info on struct for av_log
270
     */
271
    const AVClass *av_class;
272
 
273
    /**
274
     * Note that src, dst, srcStride, dstStride will be copied in the
275
     * sws_scale() wrapper so they can be freely modified here.
276
     */
277
    SwsFunc swscale;
278
    int srcW;                     ///< Width  of source      luma/alpha planes.
279
    int srcH;                     ///< Height of source      luma/alpha planes.
280
    int dstH;                     ///< Height of destination luma/alpha planes.
281
    int chrSrcW;                  ///< Width  of source      chroma     planes.
282
    int chrSrcH;                  ///< Height of source      chroma     planes.
283
    int chrDstW;                  ///< Width  of destination chroma     planes.
284
    int chrDstH;                  ///< Height of destination chroma     planes.
285
    int lumXInc, chrXInc;
286
    int lumYInc, chrYInc;
287
    enum AVPixelFormat dstFormat; ///< Destination pixel format.
288
    enum AVPixelFormat srcFormat; ///< Source      pixel format.
289
    int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
290
    int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
291
    int dstBpc, srcBpc;
292
    int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
293
    int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
294
    int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
295
    int chrDstVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in destination image.
296
    int vChrDrop;                 ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
297
    int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
298
    double param[2];              ///< Input parameters for scaling algorithms that need them.
299
 
300
    uint32_t pal_yuv[256];
301
    uint32_t pal_rgb[256];
302
 
303
    /**
304
     * @name Scaled horizontal lines ring buffer.
305
     * The horizontal scaler keeps just enough scaled lines in a ring buffer
306
     * so they may be passed to the vertical scaler. The pointers to the
307
     * allocated buffers for each line are duplicated in sequence in the ring
308
     * buffer to simplify indexing and avoid wrapping around between lines
309
     * inside the vertical scaler code. The wrapping is done before the
310
     * vertical scaler is called.
311
     */
312
    //@{
313
    int16_t **lumPixBuf;          ///< Ring buffer for scaled horizontal luma   plane lines to be fed to the vertical scaler.
314
    int16_t **chrUPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
315
    int16_t **chrVPixBuf;         ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
316
    int16_t **alpPixBuf;          ///< Ring buffer for scaled horizontal alpha  plane lines to be fed to the vertical scaler.
317
    int vLumBufSize;              ///< Number of vertical luma/alpha lines allocated in the ring buffer.
318
    int vChrBufSize;              ///< Number of vertical chroma     lines allocated in the ring buffer.
319
    int lastInLumBuf;             ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
320
    int lastInChrBuf;             ///< Last scaled horizontal chroma     line from source in the ring buffer.
321
    int lumBufIndex;              ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source.
322
    int chrBufIndex;              ///< Index in ring buffer of the last scaled horizontal chroma     line from source.
323
    //@}
324
 
325
    uint8_t *formatConvBuffer;
326
 
327
    /**
328
     * @name Horizontal and vertical filters.
329
     * To better understand the following fields, here is a pseudo-code of
330
     * their usage in filtering a horizontal line:
331
     * @code
332
     * for (i = 0; i < width; i++) {
333
     *     dst[i] = 0;
334
     *     for (j = 0; j < filterSize; j++)
335
     *         dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
336
     *     dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
337
     * }
338
     * @endcode
339
     */
340
    //@{
341
    int16_t *hLumFilter;          ///< Array of horizontal filter coefficients for luma/alpha planes.
342
    int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
343
    int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
344
    int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
345
    int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
346
    int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
347
    int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
348
    int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
349
    int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
350
    int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
351
    int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
352
    int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels.
353
    //@}
354
 
355
    int lumMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
356
    int chrMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
357
    uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
358
    uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
359
 
360
    int canMMXEXTBeUsed;
361
 
362
    int dstY;                     ///< Last destination vertical line output from last slice.
363
    int flags;                    ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
364
    void *yuvTable;             // pointer to the yuv->rgb table start so it can be freed()
365
    uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
366
    uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
367
    int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
368
    uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
369
    DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points
370
#define RY_IDX 0
371
#define GY_IDX 1
372
#define BY_IDX 2
373
#define RU_IDX 3
374
#define GU_IDX 4
375
#define BU_IDX 5
376
#define RV_IDX 6
377
#define GV_IDX 7
378
#define BV_IDX 8
379
#define RGB2YUV_SHIFT 15
380
 
381
    int *dither_error[4];
382
 
383
    //Colorspace stuff
384
    int contrast, brightness, saturation;    // for sws_getColorspaceDetails
385
    int srcColorspaceTable[4];
386
    int dstColorspaceTable[4];
387
    int srcRange;                 ///< 0 = MPG YUV range, 1 = JPG YUV range (source      image).
388
    int dstRange;                 ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
389
    int src0Alpha;
390
    int dst0Alpha;
391
    int srcXYZ;
392
    int dstXYZ;
393
    int src_h_chr_pos;
394
    int dst_h_chr_pos;
395
    int src_v_chr_pos;
396
    int dst_v_chr_pos;
397
    int yuv2rgb_y_offset;
398
    int yuv2rgb_y_coeff;
399
    int yuv2rgb_v2r_coeff;
400
    int yuv2rgb_v2g_coeff;
401
    int yuv2rgb_u2g_coeff;
402
    int yuv2rgb_u2b_coeff;
403
 
404
#define RED_DITHER            "0*8"
405
#define GREEN_DITHER          "1*8"
406
#define BLUE_DITHER           "2*8"
407
#define Y_COEFF               "3*8"
408
#define VR_COEFF              "4*8"
409
#define UB_COEFF              "5*8"
410
#define VG_COEFF              "6*8"
411
#define UG_COEFF              "7*8"
412
#define Y_OFFSET              "8*8"
413
#define U_OFFSET              "9*8"
414
#define V_OFFSET              "10*8"
415
#define LUM_MMX_FILTER_OFFSET "11*8"
416
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
417
#define DSTW_OFFSET           "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
418
#define ESP_OFFSET            "11*8+4*4*256*2+8"
419
#define VROUNDER_OFFSET       "11*8+4*4*256*2+16"
420
#define U_TEMP                "11*8+4*4*256*2+24"
421
#define V_TEMP                "11*8+4*4*256*2+32"
422
#define Y_TEMP                "11*8+4*4*256*2+40"
423
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
424
#define UV_OFF_PX             "11*8+4*4*256*3+48"
425
#define UV_OFF_BYTE           "11*8+4*4*256*3+56"
426
#define DITHER16              "11*8+4*4*256*3+64"
427
#define DITHER32              "11*8+4*4*256*3+80"
428
 
429
    DECLARE_ALIGNED(8, uint64_t, redDither);
430
    DECLARE_ALIGNED(8, uint64_t, greenDither);
431
    DECLARE_ALIGNED(8, uint64_t, blueDither);
432
 
433
    DECLARE_ALIGNED(8, uint64_t, yCoeff);
434
    DECLARE_ALIGNED(8, uint64_t, vrCoeff);
435
    DECLARE_ALIGNED(8, uint64_t, ubCoeff);
436
    DECLARE_ALIGNED(8, uint64_t, vgCoeff);
437
    DECLARE_ALIGNED(8, uint64_t, ugCoeff);
438
    DECLARE_ALIGNED(8, uint64_t, yOffset);
439
    DECLARE_ALIGNED(8, uint64_t, uOffset);
440
    DECLARE_ALIGNED(8, uint64_t, vOffset);
441
    int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
442
    int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
443
    int dstW;                     ///< Width  of destination luma/alpha planes.
444
    DECLARE_ALIGNED(8, uint64_t, esp);
445
    DECLARE_ALIGNED(8, uint64_t, vRounder);
446
    DECLARE_ALIGNED(8, uint64_t, u_temp);
447
    DECLARE_ALIGNED(8, uint64_t, v_temp);
448
    DECLARE_ALIGNED(8, uint64_t, y_temp);
449
    int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
450
    // alignment of these values is not necessary, but merely here
451
    // to maintain the same offset across x8632 and x86-64. Once we
452
    // use proper offset macros in the asm, they can be removed.
453
    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
454
    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
455
    DECLARE_ALIGNED(8, uint16_t, dither16)[8];
456
    DECLARE_ALIGNED(8, uint32_t, dither32)[8];
457
 
458
    const uint8_t *chrDither8, *lumDither8;
459
 
460
#if HAVE_ALTIVEC
461
    vector signed short   CY;
462
    vector signed short   CRV;
463
    vector signed short   CBU;
464
    vector signed short   CGU;
465
    vector signed short   CGV;
466
    vector signed short   OY;
467
    vector unsigned short CSHIFT;
468
    vector signed short  *vYCoeffsBank, *vCCoeffsBank;
469
#endif
470
 
471
#if ARCH_BFIN
472
    DECLARE_ALIGNED(4, uint32_t, oy);
473
    DECLARE_ALIGNED(4, uint32_t, oc);
474
    DECLARE_ALIGNED(4, uint32_t, zero);
475
    DECLARE_ALIGNED(4, uint32_t, cy);
476
    DECLARE_ALIGNED(4, uint32_t, crv);
477
    DECLARE_ALIGNED(4, uint32_t, rmask);
478
    DECLARE_ALIGNED(4, uint32_t, cbu);
479
    DECLARE_ALIGNED(4, uint32_t, bmask);
480
    DECLARE_ALIGNED(4, uint32_t, cgu);
481
    DECLARE_ALIGNED(4, uint32_t, cgv);
482
    DECLARE_ALIGNED(4, uint32_t, gmask);
483
#endif
484
 
485
#if HAVE_VIS
486
    DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
487
#endif
488
    int use_mmx_vfilter;
489
 
490
/* pre defined color-spaces gamma */
491
#define XYZ_GAMMA (2.6f)
492
#define RGB_GAMMA (2.2f)
493
    int16_t *xyzgamma;
494
    int16_t *rgbgamma;
495
    int16_t *xyzgammainv;
496
    int16_t *rgbgammainv;
497
    int16_t xyz2rgb_matrix[3][4];
498
    int16_t rgb2xyz_matrix[3][4];
499
 
500
    /* function pointers for swscale() */
501
    yuv2planar1_fn yuv2plane1;
502
    yuv2planarX_fn yuv2planeX;
503
    yuv2interleavedX_fn yuv2nv12cX;
504
    yuv2packed1_fn yuv2packed1;
505
    yuv2packed2_fn yuv2packed2;
506
    yuv2packedX_fn yuv2packedX;
507
    yuv2anyX_fn yuv2anyX;
508
 
509
    /// Unscaled conversion of luma plane to YV12 for horizontal scaler.
510
    void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
511
                      int width, uint32_t *pal);
512
    /// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
513
    void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
514
                      int width, uint32_t *pal);
515
    /// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
516
    void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
517
                      const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
518
                      int width, uint32_t *pal);
519
 
520
    /**
521
     * Functions to read planar input, such as planar RGB, and convert
522
     * internally to Y/UV/A.
523
     */
524
    /** @{ */
525
    void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv);
526
    void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
527
                          int width, int32_t *rgb2yuv);
528
    void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv);
529
    /** @} */
530
 
531
    /**
532
     * Scale one horizontal line of input data using a bilinear filter
533
     * to produce one line of output data. Compared to SwsContext->hScale(),
534
     * please take note of the following caveats when using these:
535
     * - Scaling is done using only 7bit instead of 14bit coefficients.
536
     * - You can use no more than 5 input pixels to produce 4 output
537
     *   pixels. Therefore, this filter should not be used for downscaling
538
     *   by more than ~20% in width (because that equals more than 5/4th
539
     *   downscaling and thus more than 5 pixels input per 4 pixels output).
540
     * - In general, bilinear filters create artifacts during downscaling
541
     *   (even when <20%), because one output pixel will span more than one
542
     *   input pixel, and thus some pixels will need edges of both neighbor
543
     *   pixels to interpolate the output pixel. Since you can use at most
544
     *   two input pixels per output pixel in bilinear scaling, this is
545
     *   impossible and thus downscaling by any size will create artifacts.
546
     * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
547
     * in SwsContext->flags.
548
     */
549
    /** @{ */
550
    void (*hyscale_fast)(struct SwsContext *c,
551
                         int16_t *dst, int dstWidth,
552
                         const uint8_t *src, int srcW, int xInc);
553
    void (*hcscale_fast)(struct SwsContext *c,
554
                         int16_t *dst1, int16_t *dst2, int dstWidth,
555
                         const uint8_t *src1, const uint8_t *src2,
556
                         int srcW, int xInc);
557
    /** @} */
558
 
559
    /**
560
     * Scale one horizontal line of input data using a filter over the input
561
     * lines, to produce one (differently sized) line of output data.
562
     *
563
     * @param dst        pointer to destination buffer for horizontally scaled
564
     *                   data. If the number of bits per component of one
565
     *                   destination pixel (SwsContext->dstBpc) is <= 10, data
566
     *                   will be 15bpc in 16bits (int16_t) width. Else (i.e.
567
     *                   SwsContext->dstBpc == 16), data will be 19bpc in
568
     *                   32bits (int32_t) width.
569
     * @param dstW       width of destination image
570
     * @param src        pointer to source data to be scaled. If the number of
571
     *                   bits per component of a source pixel (SwsContext->srcBpc)
572
     *                   is 8, this is 8bpc in 8bits (uint8_t) width. Else
573
     *                   (i.e. SwsContext->dstBpc > 8), this is native depth
574
     *                   in 16bits (uint16_t) width. In other words, for 9-bit
575
     *                   YUV input, this is 9bpc, for 10-bit YUV input, this is
576
     *                   10bpc, and for 16-bit RGB or YUV, this is 16bpc.
577
     * @param filter     filter coefficients to be used per output pixel for
578
     *                   scaling. This contains 14bpp filtering coefficients.
579
     *                   Guaranteed to contain dstW * filterSize entries.
580
     * @param filterPos  position of the first input pixel to be used for
581
     *                   each output pixel during scaling. Guaranteed to
582
     *                   contain dstW entries.
583
     * @param filterSize the number of input coefficients to be used (and
584
     *                   thus the number of input pixels to be used) for
585
     *                   creating a single output pixel. Is aligned to 4
586
     *                   (and input coefficients thus padded with zeroes)
587
     *                   to simplify creating SIMD code.
588
     */
589
    /** @{ */
590
    void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
591
                    const uint8_t *src, const int16_t *filter,
592
                    const int32_t *filterPos, int filterSize);
593
    void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
594
                    const uint8_t *src, const int16_t *filter,
595
                    const int32_t *filterPos, int filterSize);
596
    /** @} */
597
 
598
    /// Color range conversion function for luma plane if needed.
599
    void (*lumConvertRange)(int16_t *dst, int width);
600
    /// Color range conversion function for chroma planes if needed.
601
    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
602
 
603
    int needs_hcscale; ///< Set if there are chroma planes to be converted.
604
 
605
    SwsDither dither;
606
} SwsContext;
607
//FIXME check init (where 0)
608
 
609
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c);
610
int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
611
                             int fullRange, int brightness,
612
                             int contrast, int saturation);
613
void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4],
614
                                int brightness, int contrast, int saturation);
615
 
616
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
617
                           int lastInLumBuf, int lastInChrBuf);
618
 
619
SwsFunc ff_yuv2rgb_init_x86(SwsContext *c);
620
SwsFunc ff_yuv2rgb_init_vis(SwsContext *c);
621
SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c);
622
SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c);
623
 
624
#if FF_API_SWS_FORMAT_NAME
625
/**
626
 * @deprecated Use av_get_pix_fmt_name() instead.
627
 */
628
attribute_deprecated
629
const char *sws_format_name(enum AVPixelFormat format);
630
#endif
631
 
632
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
633
{
634
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
635
    av_assert0(desc);
636
    return desc->comp[0].depth_minus1 == 15;
637
}
638
 
639
static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
640
{
641
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
642
    av_assert0(desc);
643
    return desc->comp[0].depth_minus1 >= 8 && desc->comp[0].depth_minus1 <= 13;
644
}
645
 
646
#define isNBPS(x) is9_OR_10BPS(x)
647
 
648
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
649
{
650
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
651
    av_assert0(desc);
652
    return desc->flags & AV_PIX_FMT_FLAG_BE;
653
}
654
 
655
static av_always_inline int isYUV(enum AVPixelFormat pix_fmt)
656
{
657
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
658
    av_assert0(desc);
659
    return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2;
660
}
661
 
662
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
663
{
664
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
665
    av_assert0(desc);
666
    return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt));
667
}
668
 
669
static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
670
{
671
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
672
    av_assert0(desc);
673
    return (desc->flags & AV_PIX_FMT_FLAG_RGB);
674
}
675
 
676
#if 0 // FIXME
677
#define isGray(x) \
678
    (!(av_pix_fmt_desc_get(x)->flags & AV_PIX_FMT_FLAG_PAL) && \
679
     av_pix_fmt_desc_get(x)->nb_components <= 2)
680
#else
681
#define isGray(x)                      \
682
    ((x) == AV_PIX_FMT_GRAY8       ||  \
683
     (x) == AV_PIX_FMT_Y400A       ||  \
684
     (x) == AV_PIX_FMT_GRAY16BE    ||  \
685
     (x) == AV_PIX_FMT_GRAY16LE)
686
#endif
687
 
688
#define isRGBinInt(x) \
689
    (           \
690
     (x) == AV_PIX_FMT_RGB48BE     ||  \
691
     (x) == AV_PIX_FMT_RGB48LE     ||  \
692
     (x) == AV_PIX_FMT_RGBA64BE    ||  \
693
     (x) == AV_PIX_FMT_RGBA64LE    ||  \
694
     (x) == AV_PIX_FMT_RGB32       ||  \
695
     (x) == AV_PIX_FMT_RGB32_1     ||  \
696
     (x) == AV_PIX_FMT_RGB24       ||  \
697
     (x) == AV_PIX_FMT_RGB565BE    ||  \
698
     (x) == AV_PIX_FMT_RGB565LE    ||  \
699
     (x) == AV_PIX_FMT_RGB555BE    ||  \
700
     (x) == AV_PIX_FMT_RGB555LE    ||  \
701
     (x) == AV_PIX_FMT_RGB444BE    ||  \
702
     (x) == AV_PIX_FMT_RGB444LE    ||  \
703
     (x) == AV_PIX_FMT_RGB8        ||  \
704
     (x) == AV_PIX_FMT_RGB4        ||  \
705
     (x) == AV_PIX_FMT_RGB4_BYTE   ||  \
706
     (x) == AV_PIX_FMT_MONOBLACK   ||  \
707
     (x) == AV_PIX_FMT_MONOWHITE   \
708
    )
709
#define isBGRinInt(x) \
710
    (           \
711
     (x) == AV_PIX_FMT_BGR48BE     ||  \
712
     (x) == AV_PIX_FMT_BGR48LE     ||  \
713
     (x) == AV_PIX_FMT_BGRA64BE    ||  \
714
     (x) == AV_PIX_FMT_BGRA64LE    ||  \
715
     (x) == AV_PIX_FMT_BGR32       ||  \
716
     (x) == AV_PIX_FMT_BGR32_1     ||  \
717
     (x) == AV_PIX_FMT_BGR24       ||  \
718
     (x) == AV_PIX_FMT_BGR565BE    ||  \
719
     (x) == AV_PIX_FMT_BGR565LE    ||  \
720
     (x) == AV_PIX_FMT_BGR555BE    ||  \
721
     (x) == AV_PIX_FMT_BGR555LE    ||  \
722
     (x) == AV_PIX_FMT_BGR444BE    ||  \
723
     (x) == AV_PIX_FMT_BGR444LE    ||  \
724
     (x) == AV_PIX_FMT_BGR8        ||  \
725
     (x) == AV_PIX_FMT_BGR4        ||  \
726
     (x) == AV_PIX_FMT_BGR4_BYTE   ||  \
727
     (x) == AV_PIX_FMT_MONOBLACK   ||  \
728
     (x) == AV_PIX_FMT_MONOWHITE   \
729
    )
730
 
731
#define isRGBinBytes(x) (           \
732
           (x) == AV_PIX_FMT_RGB48BE     \
733
        || (x) == AV_PIX_FMT_RGB48LE     \
734
        || (x) == AV_PIX_FMT_RGBA64BE    \
735
        || (x) == AV_PIX_FMT_RGBA64LE    \
736
        || (x) == AV_PIX_FMT_RGBA        \
737
        || (x) == AV_PIX_FMT_ARGB        \
738
        || (x) == AV_PIX_FMT_RGB24       \
739
    )
740
#define isBGRinBytes(x) (           \
741
           (x) == AV_PIX_FMT_BGR48BE     \
742
        || (x) == AV_PIX_FMT_BGR48LE     \
743
        || (x) == AV_PIX_FMT_BGRA64BE    \
744
        || (x) == AV_PIX_FMT_BGRA64LE    \
745
        || (x) == AV_PIX_FMT_BGRA        \
746
        || (x) == AV_PIX_FMT_ABGR        \
747
        || (x) == AV_PIX_FMT_BGR24       \
748
    )
749
 
750
#define isAnyRGB(x) \
751
    (           \
752
          isRGBinInt(x)       ||    \
753
          isBGRinInt(x)       ||    \
754
          isRGB(x)      \
755
    )
756
 
757
static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
758
{
759
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
760
    av_assert0(desc);
761
    if (pix_fmt == AV_PIX_FMT_PAL8)
762
        return 1;
763
    return desc->flags & AV_PIX_FMT_FLAG_ALPHA;
764
}
765
 
766
#if 1
767
#define isPacked(x)         (       \
768
           (x)==AV_PIX_FMT_PAL8        \
769
        || (x)==AV_PIX_FMT_YUYV422     \
770
        || (x)==AV_PIX_FMT_UYVY422     \
771
        || (x)==AV_PIX_FMT_Y400A       \
772
        ||  isRGBinInt(x)           \
773
        ||  isBGRinInt(x)           \
774
    )
775
#else
776
static av_always_inline int isPacked(enum AVPixelFormat pix_fmt)
777
{
778
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
779
    av_assert0(desc);
780
    return ((desc->nb_components >= 2 && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) ||
781
            pix_fmt == AV_PIX_FMT_PAL8);
782
}
783
 
784
#endif
785
static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt)
786
{
787
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
788
    av_assert0(desc);
789
    return (desc->nb_components >= 2 && (desc->flags & AV_PIX_FMT_FLAG_PLANAR));
790
}
791
 
792
static av_always_inline int isPackedRGB(enum AVPixelFormat pix_fmt)
793
{
794
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
795
    av_assert0(desc);
796
    return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == AV_PIX_FMT_FLAG_RGB);
797
}
798
 
799
static av_always_inline int isPlanarRGB(enum AVPixelFormat pix_fmt)
800
{
801
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
802
    av_assert0(desc);
803
    return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) ==
804
            (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB));
805
}
806
 
807
static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
808
{
809
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
810
    av_assert0(desc);
811
    return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL);
812
}
813
 
814
extern const uint64_t ff_dither4[2];
815
extern const uint64_t ff_dither8[2];
816
 
817
extern const uint8_t ff_dither_2x2_4[3][8];
818
extern const uint8_t ff_dither_2x2_8[3][8];
819
extern const uint8_t ff_dither_4x4_16[5][8];
820
extern const uint8_t ff_dither_8x8_32[9][8];
821
extern const uint8_t ff_dither_8x8_73[9][8];
822
extern const uint8_t ff_dither_8x8_128[9][8];
823
extern const uint8_t ff_dither_8x8_220[9][8];
824
 
825
extern const int32_t ff_yuv2rgb_coeffs[8][4];
826
 
827
extern const AVClass sws_context_class;
828
 
829
/**
830
 * Set c->swscale to an unscaled converter if one exists for the specific
831
 * source and destination formats, bit depths, flags, etc.
832
 */
833
void ff_get_unscaled_swscale(SwsContext *c);
834
void ff_get_unscaled_swscale_bfin(SwsContext *c);
835
void ff_get_unscaled_swscale_ppc(SwsContext *c);
836
 
837
/**
838
 * Return function pointer to fastest main scaler path function depending
839
 * on architecture and available optimizations.
840
 */
841
SwsFunc ff_getSwsFunc(SwsContext *c);
842
 
843
void ff_sws_init_input_funcs(SwsContext *c);
844
void ff_sws_init_output_funcs(SwsContext *c,
845
                              yuv2planar1_fn *yuv2plane1,
846
                              yuv2planarX_fn *yuv2planeX,
847
                              yuv2interleavedX_fn *yuv2nv12cX,
848
                              yuv2packed1_fn *yuv2packed1,
849
                              yuv2packed2_fn *yuv2packed2,
850
                              yuv2packedX_fn *yuv2packedX,
851
                              yuv2anyX_fn *yuv2anyX);
852
void ff_sws_init_swscale_ppc(SwsContext *c);
853
void ff_sws_init_swscale_x86(SwsContext *c);
854
 
855
static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
856
                               int alpha, int bits, const int big_endian)
857
{
858
    int i, j;
859
    uint8_t *ptr = plane + stride * y;
860
    int v = alpha ? 0xFFFF>>(15-bits) : (1<
861
    for (i = 0; i < height; i++) {
862
#define FILL(wfunc) \
863
        for (j = 0; j < width; j++) {\
864
            wfunc(ptr+2*j, v);\
865
        }
866
        if (big_endian) {
867
            FILL(AV_WB16);
868
        } else {
869
            FILL(AV_WL16);
870
        }
871
        ptr += stride;
872
    }
873
}
874
 
875
#endif /* SWSCALE_SWSCALE_INTERNAL_H */