Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | /* |
2 | * Copyright (C) 2001-2003 Michael Niedermayer |
||
3 | * |
||
4 | * This file is part of FFmpeg. |
||
5 | * |
||
6 | * FFmpeg is free software; you can redistribute it and/or |
||
7 | * modify it under the terms of the GNU Lesser General Public |
||
8 | * License as published by the Free Software Foundation; either |
||
9 | * version 2.1 of the License, or (at your option) any later version. |
||
10 | * |
||
11 | * FFmpeg is distributed in the hope that it will be useful, |
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
14 | * Lesser General Public License for more details. |
||
15 | * |
||
16 | * You should have received a copy of the GNU Lesser General Public |
||
17 | * License along with FFmpeg; if not, write to the Free Software |
||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
19 | */ |
||
20 | |||
21 | #include "config.h" |
||
22 | |||
23 | #define _SVID_SOURCE // needed for MAP_ANONYMOUS |
||
24 | #define _DARWIN_C_SOURCE // needed for MAP_ANON |
||
25 | #include |
||
26 | #include |
||
27 | #include |
||
28 | #include |
||
29 | #if HAVE_SYS_MMAN_H |
||
30 | #include |
||
31 | #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) |
||
32 | #define MAP_ANONYMOUS MAP_ANON |
||
33 | #endif |
||
34 | #endif |
||
35 | #if HAVE_VIRTUALALLOC |
||
36 | #define WIN32_LEAN_AND_MEAN |
||
37 | #include |
||
38 | #endif |
||
39 | |||
40 | #include "libavutil/attributes.h" |
||
41 | #include "libavutil/avassert.h" |
||
42 | #include "libavutil/avutil.h" |
||
43 | #include "libavutil/bswap.h" |
||
44 | #include "libavutil/cpu.h" |
||
45 | #include "libavutil/intreadwrite.h" |
||
46 | #include "libavutil/mathematics.h" |
||
47 | #include "libavutil/opt.h" |
||
48 | #include "libavutil/pixdesc.h" |
||
49 | #include "libavutil/ppc/cpu.h" |
||
50 | #include "libavutil/x86/asm.h" |
||
51 | #include "libavutil/x86/cpu.h" |
||
52 | #include "rgb2rgb.h" |
||
53 | #include "swscale.h" |
||
54 | #include "swscale_internal.h" |
||
55 | |||
56 | static void handle_formats(SwsContext *c); |
||
57 | |||
58 | unsigned swscale_version(void) |
||
59 | { |
||
60 | av_assert0(LIBSWSCALE_VERSION_MICRO >= 100); |
||
61 | return LIBSWSCALE_VERSION_INT; |
||
62 | } |
||
63 | |||
64 | const char *swscale_configuration(void) |
||
65 | { |
||
66 | return FFMPEG_CONFIGURATION; |
||
67 | } |
||
68 | |||
69 | const char *swscale_license(void) |
||
70 | { |
||
71 | #define LICENSE_PREFIX "libswscale license: " |
||
72 | return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; |
||
73 | } |
||
74 | |||
75 | #define RET 0xC3 // near return opcode for x86 |
||
76 | |||
77 | typedef struct FormatEntry { |
||
78 | uint8_t is_supported_in :1; |
||
79 | uint8_t is_supported_out :1; |
||
80 | uint8_t is_supported_endianness :1; |
||
81 | } FormatEntry; |
||
82 | |||
83 | static const FormatEntry format_entries[AV_PIX_FMT_NB] = { |
||
84 | [AV_PIX_FMT_YUV420P] = { 1, 1 }, |
||
85 | [AV_PIX_FMT_YUYV422] = { 1, 1 }, |
||
86 | [AV_PIX_FMT_RGB24] = { 1, 1 }, |
||
87 | [AV_PIX_FMT_BGR24] = { 1, 1 }, |
||
88 | [AV_PIX_FMT_YUV422P] = { 1, 1 }, |
||
89 | [AV_PIX_FMT_YUV444P] = { 1, 1 }, |
||
90 | [AV_PIX_FMT_YUV410P] = { 1, 1 }, |
||
91 | [AV_PIX_FMT_YUV411P] = { 1, 1 }, |
||
92 | [AV_PIX_FMT_GRAY8] = { 1, 1 }, |
||
93 | [AV_PIX_FMT_MONOWHITE] = { 1, 1 }, |
||
94 | [AV_PIX_FMT_MONOBLACK] = { 1, 1 }, |
||
95 | [AV_PIX_FMT_PAL8] = { 1, 0 }, |
||
96 | [AV_PIX_FMT_YUVJ420P] = { 1, 1 }, |
||
97 | [AV_PIX_FMT_YUVJ411P] = { 1, 1 }, |
||
98 | [AV_PIX_FMT_YUVJ422P] = { 1, 1 }, |
||
99 | [AV_PIX_FMT_YUVJ444P] = { 1, 1 }, |
||
100 | [AV_PIX_FMT_UYVY422] = { 1, 1 }, |
||
101 | [AV_PIX_FMT_UYYVYY411] = { 0, 0 }, |
||
102 | [AV_PIX_FMT_BGR8] = { 1, 1 }, |
||
103 | [AV_PIX_FMT_BGR4] = { 0, 1 }, |
||
104 | [AV_PIX_FMT_BGR4_BYTE] = { 1, 1 }, |
||
105 | [AV_PIX_FMT_RGB8] = { 1, 1 }, |
||
106 | [AV_PIX_FMT_RGB4] = { 0, 1 }, |
||
107 | [AV_PIX_FMT_RGB4_BYTE] = { 1, 1 }, |
||
108 | [AV_PIX_FMT_NV12] = { 1, 1 }, |
||
109 | [AV_PIX_FMT_NV21] = { 1, 1 }, |
||
110 | [AV_PIX_FMT_ARGB] = { 1, 1 }, |
||
111 | [AV_PIX_FMT_RGBA] = { 1, 1 }, |
||
112 | [AV_PIX_FMT_ABGR] = { 1, 1 }, |
||
113 | [AV_PIX_FMT_BGRA] = { 1, 1 }, |
||
114 | [AV_PIX_FMT_0RGB] = { 1, 1 }, |
||
115 | [AV_PIX_FMT_RGB0] = { 1, 1 }, |
||
116 | [AV_PIX_FMT_0BGR] = { 1, 1 }, |
||
117 | [AV_PIX_FMT_BGR0] = { 1, 1 }, |
||
118 | [AV_PIX_FMT_GRAY16BE] = { 1, 1 }, |
||
119 | [AV_PIX_FMT_GRAY16LE] = { 1, 1 }, |
||
120 | [AV_PIX_FMT_YUV440P] = { 1, 1 }, |
||
121 | [AV_PIX_FMT_YUVJ440P] = { 1, 1 }, |
||
122 | [AV_PIX_FMT_YUVA420P] = { 1, 1 }, |
||
123 | [AV_PIX_FMT_YUVA422P] = { 1, 1 }, |
||
124 | [AV_PIX_FMT_YUVA444P] = { 1, 1 }, |
||
125 | [AV_PIX_FMT_YUVA420P9BE] = { 1, 1 }, |
||
126 | [AV_PIX_FMT_YUVA420P9LE] = { 1, 1 }, |
||
127 | [AV_PIX_FMT_YUVA422P9BE] = { 1, 1 }, |
||
128 | [AV_PIX_FMT_YUVA422P9LE] = { 1, 1 }, |
||
129 | [AV_PIX_FMT_YUVA444P9BE] = { 1, 1 }, |
||
130 | [AV_PIX_FMT_YUVA444P9LE] = { 1, 1 }, |
||
131 | [AV_PIX_FMT_YUVA420P10BE]= { 1, 1 }, |
||
132 | [AV_PIX_FMT_YUVA420P10LE]= { 1, 1 }, |
||
133 | [AV_PIX_FMT_YUVA422P10BE]= { 1, 1 }, |
||
134 | [AV_PIX_FMT_YUVA422P10LE]= { 1, 1 }, |
||
135 | [AV_PIX_FMT_YUVA444P10BE]= { 1, 1 }, |
||
136 | [AV_PIX_FMT_YUVA444P10LE]= { 1, 1 }, |
||
137 | [AV_PIX_FMT_YUVA420P16BE]= { 1, 1 }, |
||
138 | [AV_PIX_FMT_YUVA420P16LE]= { 1, 1 }, |
||
139 | [AV_PIX_FMT_YUVA422P16BE]= { 1, 1 }, |
||
140 | [AV_PIX_FMT_YUVA422P16LE]= { 1, 1 }, |
||
141 | [AV_PIX_FMT_YUVA444P16BE]= { 1, 1 }, |
||
142 | [AV_PIX_FMT_YUVA444P16LE]= { 1, 1 }, |
||
143 | [AV_PIX_FMT_RGB48BE] = { 1, 1 }, |
||
144 | [AV_PIX_FMT_RGB48LE] = { 1, 1 }, |
||
145 | [AV_PIX_FMT_RGBA64BE] = { 1, 1 }, |
||
146 | [AV_PIX_FMT_RGBA64LE] = { 1, 1 }, |
||
147 | [AV_PIX_FMT_RGB565BE] = { 1, 1 }, |
||
148 | [AV_PIX_FMT_RGB565LE] = { 1, 1 }, |
||
149 | [AV_PIX_FMT_RGB555BE] = { 1, 1 }, |
||
150 | [AV_PIX_FMT_RGB555LE] = { 1, 1 }, |
||
151 | [AV_PIX_FMT_BGR565BE] = { 1, 1 }, |
||
152 | [AV_PIX_FMT_BGR565LE] = { 1, 1 }, |
||
153 | [AV_PIX_FMT_BGR555BE] = { 1, 1 }, |
||
154 | [AV_PIX_FMT_BGR555LE] = { 1, 1 }, |
||
155 | [AV_PIX_FMT_YUV420P16LE] = { 1, 1 }, |
||
156 | [AV_PIX_FMT_YUV420P16BE] = { 1, 1 }, |
||
157 | [AV_PIX_FMT_YUV422P16LE] = { 1, 1 }, |
||
158 | [AV_PIX_FMT_YUV422P16BE] = { 1, 1 }, |
||
159 | [AV_PIX_FMT_YUV444P16LE] = { 1, 1 }, |
||
160 | [AV_PIX_FMT_YUV444P16BE] = { 1, 1 }, |
||
161 | [AV_PIX_FMT_RGB444LE] = { 1, 1 }, |
||
162 | [AV_PIX_FMT_RGB444BE] = { 1, 1 }, |
||
163 | [AV_PIX_FMT_BGR444LE] = { 1, 1 }, |
||
164 | [AV_PIX_FMT_BGR444BE] = { 1, 1 }, |
||
165 | [AV_PIX_FMT_Y400A] = { 1, 0 }, |
||
166 | [AV_PIX_FMT_BGR48BE] = { 1, 1 }, |
||
167 | [AV_PIX_FMT_BGR48LE] = { 1, 1 }, |
||
168 | [AV_PIX_FMT_BGRA64BE] = { 0, 0 }, |
||
169 | [AV_PIX_FMT_BGRA64LE] = { 0, 0 }, |
||
170 | [AV_PIX_FMT_YUV420P9BE] = { 1, 1 }, |
||
171 | [AV_PIX_FMT_YUV420P9LE] = { 1, 1 }, |
||
172 | [AV_PIX_FMT_YUV420P10BE] = { 1, 1 }, |
||
173 | [AV_PIX_FMT_YUV420P10LE] = { 1, 1 }, |
||
174 | [AV_PIX_FMT_YUV420P12BE] = { 1, 1 }, |
||
175 | [AV_PIX_FMT_YUV420P12LE] = { 1, 1 }, |
||
176 | [AV_PIX_FMT_YUV420P14BE] = { 1, 1 }, |
||
177 | [AV_PIX_FMT_YUV420P14LE] = { 1, 1 }, |
||
178 | [AV_PIX_FMT_YUV422P9BE] = { 1, 1 }, |
||
179 | [AV_PIX_FMT_YUV422P9LE] = { 1, 1 }, |
||
180 | [AV_PIX_FMT_YUV422P10BE] = { 1, 1 }, |
||
181 | [AV_PIX_FMT_YUV422P10LE] = { 1, 1 }, |
||
182 | [AV_PIX_FMT_YUV422P12BE] = { 1, 1 }, |
||
183 | [AV_PIX_FMT_YUV422P12LE] = { 1, 1 }, |
||
184 | [AV_PIX_FMT_YUV422P14BE] = { 1, 1 }, |
||
185 | [AV_PIX_FMT_YUV422P14LE] = { 1, 1 }, |
||
186 | [AV_PIX_FMT_YUV444P9BE] = { 1, 1 }, |
||
187 | [AV_PIX_FMT_YUV444P9LE] = { 1, 1 }, |
||
188 | [AV_PIX_FMT_YUV444P10BE] = { 1, 1 }, |
||
189 | [AV_PIX_FMT_YUV444P10LE] = { 1, 1 }, |
||
190 | [AV_PIX_FMT_YUV444P12BE] = { 1, 1 }, |
||
191 | [AV_PIX_FMT_YUV444P12LE] = { 1, 1 }, |
||
192 | [AV_PIX_FMT_YUV444P14BE] = { 1, 1 }, |
||
193 | [AV_PIX_FMT_YUV444P14LE] = { 1, 1 }, |
||
194 | [AV_PIX_FMT_GBRP] = { 1, 1 }, |
||
195 | [AV_PIX_FMT_GBRP9LE] = { 1, 1 }, |
||
196 | [AV_PIX_FMT_GBRP9BE] = { 1, 1 }, |
||
197 | [AV_PIX_FMT_GBRP10LE] = { 1, 1 }, |
||
198 | [AV_PIX_FMT_GBRP10BE] = { 1, 1 }, |
||
199 | [AV_PIX_FMT_GBRP12LE] = { 1, 1 }, |
||
200 | [AV_PIX_FMT_GBRP12BE] = { 1, 1 }, |
||
201 | [AV_PIX_FMT_GBRP14LE] = { 1, 1 }, |
||
202 | [AV_PIX_FMT_GBRP14BE] = { 1, 1 }, |
||
203 | [AV_PIX_FMT_GBRP16LE] = { 1, 0 }, |
||
204 | [AV_PIX_FMT_GBRP16BE] = { 1, 0 }, |
||
205 | [AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 }, |
||
206 | [AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 }, |
||
207 | [AV_PIX_FMT_GBRAP] = { 1, 1 }, |
||
208 | [AV_PIX_FMT_GBRAP16LE] = { 1, 0 }, |
||
209 | [AV_PIX_FMT_GBRAP16BE] = { 1, 0 }, |
||
210 | }; |
||
211 | |||
212 | int sws_isSupportedInput(enum AVPixelFormat pix_fmt) |
||
213 | { |
||
214 | return (unsigned)pix_fmt < AV_PIX_FMT_NB ? |
||
215 | format_entries[pix_fmt].is_supported_in : 0; |
||
216 | } |
||
217 | |||
218 | int sws_isSupportedOutput(enum AVPixelFormat pix_fmt) |
||
219 | { |
||
220 | return (unsigned)pix_fmt < AV_PIX_FMT_NB ? |
||
221 | format_entries[pix_fmt].is_supported_out : 0; |
||
222 | } |
||
223 | |||
224 | int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt) |
||
225 | { |
||
226 | return (unsigned)pix_fmt < AV_PIX_FMT_NB ? |
||
227 | format_entries[pix_fmt].is_supported_endianness : 0; |
||
228 | } |
||
229 | |||
230 | #if FF_API_SWS_FORMAT_NAME |
||
231 | const char *sws_format_name(enum AVPixelFormat format) |
||
232 | { |
||
233 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); |
||
234 | if (desc) |
||
235 | return desc->name; |
||
236 | else |
||
237 | return "Unknown format"; |
||
238 | } |
||
239 | #endif |
||
240 | |||
241 | static double getSplineCoeff(double a, double b, double c, double d, |
||
242 | double dist) |
||
243 | { |
||
244 | if (dist <= 1.0) |
||
245 | return ((d * dist + c) * dist + b) * dist + a; |
||
246 | else |
||
247 | return getSplineCoeff(0.0, |
||
248 | b + 2.0 * c + 3.0 * d, |
||
249 | c + 3.0 * d, |
||
250 | -b - 3.0 * c - 6.0 * d, |
||
251 | dist - 1.0); |
||
252 | } |
||
253 | |||
254 | static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir) |
||
255 | { |
||
256 | if (pos < 0) { |
||
257 | pos = (128 << chr_subsample) - 128; |
||
258 | } |
||
259 | pos += 128; // relative to ideal left edge |
||
260 | return pos >> chr_subsample; |
||
261 | } |
||
262 | |||
263 | static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos, |
||
264 | int *outFilterSize, int xInc, int srcW, |
||
265 | int dstW, int filterAlign, int one, |
||
266 | int flags, int cpu_flags, |
||
267 | SwsVector *srcFilter, SwsVector *dstFilter, |
||
268 | double param[2], int srcPos, int dstPos) |
||
269 | { |
||
270 | int i; |
||
271 | int filterSize; |
||
272 | int filter2Size; |
||
273 | int minFilterSize; |
||
274 | int64_t *filter = NULL; |
||
275 | int64_t *filter2 = NULL; |
||
276 | const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8)); |
||
277 | int ret = -1; |
||
278 | |||
279 | emms_c(); // FIXME should not be required but IS (even for non-MMX versions) |
||
280 | |||
281 | // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end |
||
282 | FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail); |
||
283 | |||
284 | if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled |
||
285 | int i; |
||
286 | filterSize = 1; |
||
287 | FF_ALLOCZ_OR_GOTO(NULL, filter, |
||
288 | dstW * sizeof(*filter) * filterSize, fail); |
||
289 | |||
290 | for (i = 0; i < dstW; i++) { |
||
291 | filter[i * filterSize] = fone; |
||
292 | (*filterPos)[i] = i; |
||
293 | } |
||
294 | } else if (flags & SWS_POINT) { // lame looking point sampling mode |
||
295 | int i; |
||
296 | int64_t xDstInSrc; |
||
297 | filterSize = 1; |
||
298 | FF_ALLOC_OR_GOTO(NULL, filter, |
||
299 | dstW * sizeof(*filter) * filterSize, fail); |
||
300 | |||
301 | xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); |
||
302 | for (i = 0; i < dstW; i++) { |
||
303 | int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; |
||
304 | |||
305 | (*filterPos)[i] = xx; |
||
306 | filter[i] = fone; |
||
307 | xDstInSrc += xInc; |
||
308 | } |
||
309 | } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) || |
||
310 | (flags & SWS_FAST_BILINEAR)) { // bilinear upscale |
||
311 | int i; |
||
312 | int64_t xDstInSrc; |
||
313 | filterSize = 2; |
||
314 | FF_ALLOC_OR_GOTO(NULL, filter, |
||
315 | dstW * sizeof(*filter) * filterSize, fail); |
||
316 | |||
317 | xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); |
||
318 | for (i = 0; i < dstW; i++) { |
||
319 | int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; |
||
320 | int j; |
||
321 | |||
322 | (*filterPos)[i] = xx; |
||
323 | // bilinear upscale / linear interpolate / area averaging |
||
324 | for (j = 0; j < filterSize; j++) { |
||
325 | int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16); |
||
326 | if (coeff < 0) |
||
327 | coeff = 0; |
||
328 | filter[i * filterSize + j] = coeff; |
||
329 | xx++; |
||
330 | } |
||
331 | xDstInSrc += xInc; |
||
332 | } |
||
333 | } else { |
||
334 | int64_t xDstInSrc; |
||
335 | int sizeFactor; |
||
336 | |||
337 | if (flags & SWS_BICUBIC) |
||
338 | sizeFactor = 4; |
||
339 | else if (flags & SWS_X) |
||
340 | sizeFactor = 8; |
||
341 | else if (flags & SWS_AREA) |
||
342 | sizeFactor = 1; // downscale only, for upscale it is bilinear |
||
343 | else if (flags & SWS_GAUSS) |
||
344 | sizeFactor = 8; // infinite ;) |
||
345 | else if (flags & SWS_LANCZOS) |
||
346 | sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6; |
||
347 | else if (flags & SWS_SINC) |
||
348 | sizeFactor = 20; // infinite ;) |
||
349 | else if (flags & SWS_SPLINE) |
||
350 | sizeFactor = 20; // infinite ;) |
||
351 | else if (flags & SWS_BILINEAR) |
||
352 | sizeFactor = 2; |
||
353 | else { |
||
354 | av_assert0(0); |
||
355 | } |
||
356 | |||
357 | if (xInc <= 1 << 16) |
||
358 | filterSize = 1 + sizeFactor; // upscale |
||
359 | else |
||
360 | filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW; |
||
361 | |||
362 | filterSize = FFMIN(filterSize, srcW - 2); |
||
363 | filterSize = FFMAX(filterSize, 1); |
||
364 | |||
365 | FF_ALLOC_OR_GOTO(NULL, filter, |
||
366 | dstW * sizeof(*filter) * filterSize, fail); |
||
367 | |||
368 | xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7); |
||
369 | for (i = 0; i < dstW; i++) { |
||
370 | int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17); |
||
371 | int j; |
||
372 | (*filterPos)[i] = xx; |
||
373 | for (j = 0; j < filterSize; j++) { |
||
374 | int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13; |
||
375 | double floatd; |
||
376 | int64_t coeff; |
||
377 | |||
378 | if (xInc > 1 << 16) |
||
379 | d = d * dstW / srcW; |
||
380 | floatd = d * (1.0 / (1 << 30)); |
||
381 | |||
382 | if (flags & SWS_BICUBIC) { |
||
383 | int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24); |
||
384 | int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24); |
||
385 | |||
386 | if (d >= 1LL << 31) { |
||
387 | coeff = 0.0; |
||
388 | } else { |
||
389 | int64_t dd = (d * d) >> 30; |
||
390 | int64_t ddd = (dd * d) >> 30; |
||
391 | |||
392 | if (d < 1LL << 30) |
||
393 | coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + |
||
394 | (-18 * (1 << 24) + 12 * B + 6 * C) * dd + |
||
395 | (6 * (1 << 24) - 2 * B) * (1 << 30); |
||
396 | else |
||
397 | coeff = (-B - 6 * C) * ddd + |
||
398 | (6 * B + 30 * C) * dd + |
||
399 | (-12 * B - 48 * C) * d + |
||
400 | (8 * B + 24 * C) * (1 << 30); |
||
401 | } |
||
402 | coeff /= (1LL<<54)/fone; |
||
403 | } |
||
404 | #if 0 |
||
405 | else if (flags & SWS_X) { |
||
406 | double p = param ? param * 0.01 : 0.3; |
||
407 | coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0; |
||
408 | coeff *= pow(2.0, -p * d * d); |
||
409 | } |
||
410 | #endif |
||
411 | else if (flags & SWS_X) { |
||
412 | double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; |
||
413 | double c; |
||
414 | |||
415 | if (floatd < 1.0) |
||
416 | c = cos(floatd * M_PI); |
||
417 | else |
||
418 | c = -1.0; |
||
419 | if (c < 0.0) |
||
420 | c = -pow(-c, A); |
||
421 | else |
||
422 | c = pow(c, A); |
||
423 | coeff = (c * 0.5 + 0.5) * fone; |
||
424 | } else if (flags & SWS_AREA) { |
||
425 | int64_t d2 = d - (1 << 29); |
||
426 | if (d2 * xInc < -(1LL << (29 + 16))) |
||
427 | coeff = 1.0 * (1LL << (30 + 16)); |
||
428 | else if (d2 * xInc < (1LL << (29 + 16))) |
||
429 | coeff = -d2 * xInc + (1LL << (29 + 16)); |
||
430 | else |
||
431 | coeff = 0.0; |
||
432 | coeff *= fone >> (30 + 16); |
||
433 | } else if (flags & SWS_GAUSS) { |
||
434 | double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; |
||
435 | coeff = (pow(2.0, -p * floatd * floatd)) * fone; |
||
436 | } else if (flags & SWS_SINC) { |
||
437 | coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone; |
||
438 | } else if (flags & SWS_LANCZOS) { |
||
439 | double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; |
||
440 | coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) / |
||
441 | (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone; |
||
442 | if (floatd > p) |
||
443 | coeff = 0; |
||
444 | } else if (flags & SWS_BILINEAR) { |
||
445 | coeff = (1 << 30) - d; |
||
446 | if (coeff < 0) |
||
447 | coeff = 0; |
||
448 | coeff *= fone >> 30; |
||
449 | } else if (flags & SWS_SPLINE) { |
||
450 | double p = -2.196152422706632; |
||
451 | coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone; |
||
452 | } else { |
||
453 | av_assert0(0); |
||
454 | } |
||
455 | |||
456 | filter[i * filterSize + j] = coeff; |
||
457 | xx++; |
||
458 | } |
||
459 | xDstInSrc += 2 * xInc; |
||
460 | } |
||
461 | } |
||
462 | |||
463 | /* apply src & dst Filter to filter -> filter2 |
||
464 | * av_free(filter); |
||
465 | */ |
||
466 | av_assert0(filterSize > 0); |
||
467 | filter2Size = filterSize; |
||
468 | if (srcFilter) |
||
469 | filter2Size += srcFilter->length - 1; |
||
470 | if (dstFilter) |
||
471 | filter2Size += dstFilter->length - 1; |
||
472 | av_assert0(filter2Size > 0); |
||
473 | FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail); |
||
474 | |||
475 | for (i = 0; i < dstW; i++) { |
||
476 | int j, k; |
||
477 | |||
478 | if (srcFilter) { |
||
479 | for (k = 0; k < srcFilter->length; k++) { |
||
480 | for (j = 0; j < filterSize; j++) |
||
481 | filter2[i * filter2Size + k + j] += |
||
482 | srcFilter->coeff[k] * filter[i * filterSize + j]; |
||
483 | } |
||
484 | } else { |
||
485 | for (j = 0; j < filterSize; j++) |
||
486 | filter2[i * filter2Size + j] = filter[i * filterSize + j]; |
||
487 | } |
||
488 | // FIXME dstFilter |
||
489 | |||
490 | (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2; |
||
491 | } |
||
492 | av_freep(&filter); |
||
493 | |||
494 | /* try to reduce the filter-size (step1 find size and shift left) */ |
||
495 | // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). |
||
496 | minFilterSize = 0; |
||
497 | for (i = dstW - 1; i >= 0; i--) { |
||
498 | int min = filter2Size; |
||
499 | int j; |
||
500 | int64_t cutOff = 0.0; |
||
501 | |||
502 | /* get rid of near zero elements on the left by shifting left */ |
||
503 | for (j = 0; j < filter2Size; j++) { |
||
504 | int k; |
||
505 | cutOff += FFABS(filter2[i * filter2Size]); |
||
506 | |||
507 | if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone) |
||
508 | break; |
||
509 | |||
510 | /* preserve monotonicity because the core can't handle the |
||
511 | * filter otherwise */ |
||
512 | if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1]) |
||
513 | break; |
||
514 | |||
515 | // move filter coefficients left |
||
516 | for (k = 1; k < filter2Size; k++) |
||
517 | filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k]; |
||
518 | filter2[i * filter2Size + k - 1] = 0; |
||
519 | (*filterPos)[i]++; |
||
520 | } |
||
521 | |||
522 | cutOff = 0; |
||
523 | /* count near zeros on the right */ |
||
524 | for (j = filter2Size - 1; j > 0; j--) { |
||
525 | cutOff += FFABS(filter2[i * filter2Size + j]); |
||
526 | |||
527 | if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone) |
||
528 | break; |
||
529 | min--; |
||
530 | } |
||
531 | |||
532 | if (min > minFilterSize) |
||
533 | minFilterSize = min; |
||
534 | } |
||
535 | |||
536 | if (PPC_ALTIVEC(cpu_flags)) { |
||
537 | // we can handle the special case 4, so we don't want to go the full 8 |
||
538 | if (minFilterSize < 5) |
||
539 | filterAlign = 4; |
||
540 | |||
541 | /* We really don't want to waste our time doing useless computation, so |
||
542 | * fall back on the scalar C code for very small filters. |
||
543 | * Vectorizing is worth it only if you have a decent-sized vector. */ |
||
544 | if (minFilterSize < 3) |
||
545 | filterAlign = 1; |
||
546 | } |
||
547 | |||
548 | if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { |
||
549 | // special case for unscaled vertical filtering |
||
550 | if (minFilterSize == 1 && filterAlign == 2) |
||
551 | filterAlign = 1; |
||
552 | } |
||
553 | |||
554 | av_assert0(minFilterSize > 0); |
||
555 | filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1)); |
||
556 | av_assert0(filterSize > 0); |
||
557 | filter = av_malloc(filterSize * dstW * sizeof(*filter)); |
||
558 | if (filterSize >= MAX_FILTER_SIZE * 16 / |
||
559 | ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) { |
||
560 | av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreem scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize); |
||
561 | goto fail; |
||
562 | } |
||
563 | *outFilterSize = filterSize; |
||
564 | |||
565 | if (flags & SWS_PRINT_INFO) |
||
566 | av_log(NULL, AV_LOG_VERBOSE, |
||
567 | "SwScaler: reducing / aligning filtersize %d -> %d\n", |
||
568 | filter2Size, filterSize); |
||
569 | /* try to reduce the filter-size (step2 reduce it) */ |
||
570 | for (i = 0; i < dstW; i++) { |
||
571 | int j; |
||
572 | |||
573 | for (j = 0; j < filterSize; j++) { |
||
574 | if (j >= filter2Size) |
||
575 | filter[i * filterSize + j] = 0; |
||
576 | else |
||
577 | filter[i * filterSize + j] = filter2[i * filter2Size + j]; |
||
578 | if ((flags & SWS_BITEXACT) && j >= minFilterSize) |
||
579 | filter[i * filterSize + j] = 0; |
||
580 | } |
||
581 | } |
||
582 | |||
583 | // FIXME try to align filterPos if possible |
||
584 | |||
585 | // fix borders |
||
586 | for (i = 0; i < dstW; i++) { |
||
587 | int j; |
||
588 | if ((*filterPos)[i] < 0) { |
||
589 | // move filter coefficients left to compensate for filterPos |
||
590 | for (j = 1; j < filterSize; j++) { |
||
591 | int left = FFMAX(j + (*filterPos)[i], 0); |
||
592 | filter[i * filterSize + left] += filter[i * filterSize + j]; |
||
593 | filter[i * filterSize + j] = 0; |
||
594 | } |
||
595 | (*filterPos)[i]= 0; |
||
596 | } |
||
597 | |||
598 | if ((*filterPos)[i] + filterSize > srcW) { |
||
599 | int shift = (*filterPos)[i] + filterSize - srcW; |
||
600 | // move filter coefficients right to compensate for filterPos |
||
601 | for (j = filterSize - 2; j >= 0; j--) { |
||
602 | int right = FFMIN(j + shift, filterSize - 1); |
||
603 | filter[i * filterSize + right] += filter[i * filterSize + j]; |
||
604 | filter[i * filterSize + j] = 0; |
||
605 | } |
||
606 | (*filterPos)[i]= srcW - filterSize; |
||
607 | } |
||
608 | } |
||
609 | |||
610 | // Note the +1 is for the MMX scaler which reads over the end |
||
611 | /* align at 16 for AltiVec (needed by hScale_altivec_real) */ |
||
612 | FF_ALLOCZ_OR_GOTO(NULL, *outFilter, |
||
613 | *outFilterSize * (dstW + 3) * sizeof(int16_t), fail); |
||
614 | |||
615 | /* normalize & store in outFilter */ |
||
616 | for (i = 0; i < dstW; i++) { |
||
617 | int j; |
||
618 | int64_t error = 0; |
||
619 | int64_t sum = 0; |
||
620 | |||
621 | for (j = 0; j < filterSize; j++) { |
||
622 | sum += filter[i * filterSize + j]; |
||
623 | } |
||
624 | sum = (sum + one / 2) / one; |
||
625 | if (!sum) { |
||
626 | av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n"); |
||
627 | sum = 1; |
||
628 | } |
||
629 | for (j = 0; j < *outFilterSize; j++) { |
||
630 | int64_t v = filter[i * filterSize + j] + error; |
||
631 | int intV = ROUNDED_DIV(v, sum); |
||
632 | (*outFilter)[i * (*outFilterSize) + j] = intV; |
||
633 | error = v - intV * sum; |
||
634 | } |
||
635 | } |
||
636 | |||
637 | (*filterPos)[dstW + 0] = |
||
638 | (*filterPos)[dstW + 1] = |
||
639 | (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will |
||
640 | * read over the end */ |
||
641 | for (i = 0; i < *outFilterSize; i++) { |
||
642 | int k = (dstW - 1) * (*outFilterSize) + i; |
||
643 | (*outFilter)[k + 1 * (*outFilterSize)] = |
||
644 | (*outFilter)[k + 2 * (*outFilterSize)] = |
||
645 | (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k]; |
||
646 | } |
||
647 | |||
648 | ret = 0; |
||
649 | |||
650 | fail: |
||
651 | if(ret < 0) |
||
652 | av_log(NULL, AV_LOG_ERROR, "sws: initFilter failed\n"); |
||
653 | av_free(filter); |
||
654 | av_free(filter2); |
||
655 | return ret; |
||
656 | } |
||
657 | |||
658 | #if HAVE_MMXEXT_INLINE |
||
659 | static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, |
||
660 | int16_t *filter, int32_t *filterPos, |
||
661 | int numSplits) |
||
662 | { |
||
663 | uint8_t *fragmentA; |
||
664 | x86_reg imm8OfPShufW1A; |
||
665 | x86_reg imm8OfPShufW2A; |
||
666 | x86_reg fragmentLengthA; |
||
667 | uint8_t *fragmentB; |
||
668 | x86_reg imm8OfPShufW1B; |
||
669 | x86_reg imm8OfPShufW2B; |
||
670 | x86_reg fragmentLengthB; |
||
671 | int fragmentPos; |
||
672 | |||
673 | int xpos, i; |
||
674 | |||
675 | // create an optimized horizontal scaling routine |
||
676 | /* This scaler is made of runtime-generated MMXEXT code using specially tuned |
||
677 | * pshufw instructions. For every four output pixels, if four input pixels |
||
678 | * are enough for the fast bilinear scaling, then a chunk of fragmentB is |
||
679 | * used. If five input pixels are needed, then a chunk of fragmentA is used. |
||
680 | */ |
||
681 | |||
682 | // code fragment |
||
683 | |||
684 | __asm__ volatile ( |
||
685 | "jmp 9f \n\t" |
||
686 | // Begin |
||
687 | "0: \n\t" |
||
688 | "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" |
||
689 | "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" |
||
690 | "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t" |
||
691 | "punpcklbw %%mm7, %%mm1 \n\t" |
||
692 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
693 | "pshufw $0xFF, %%mm1, %%mm1 \n\t" |
||
694 | "1: \n\t" |
||
695 | "pshufw $0xFF, %%mm0, %%mm0 \n\t" |
||
696 | "2: \n\t" |
||
697 | "psubw %%mm1, %%mm0 \n\t" |
||
698 | "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" |
||
699 | "pmullw %%mm3, %%mm0 \n\t" |
||
700 | "psllw $7, %%mm1 \n\t" |
||
701 | "paddw %%mm1, %%mm0 \n\t" |
||
702 | |||
703 | "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" |
||
704 | |||
705 | "add $8, %%"REG_a" \n\t" |
||
706 | // End |
||
707 | "9: \n\t" |
||
708 | // "int $3 \n\t" |
||
709 | "lea " LOCAL_MANGLE(0b) ", %0 \n\t" |
||
710 | "lea " LOCAL_MANGLE(1b) ", %1 \n\t" |
||
711 | "lea " LOCAL_MANGLE(2b) ", %2 \n\t" |
||
712 | "dec %1 \n\t" |
||
713 | "dec %2 \n\t" |
||
714 | "sub %0, %1 \n\t" |
||
715 | "sub %0, %2 \n\t" |
||
716 | "lea " LOCAL_MANGLE(9b) ", %3 \n\t" |
||
717 | "sub %0, %3 \n\t" |
||
718 | |||
719 | |||
720 | : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), |
||
721 | "=r" (fragmentLengthA) |
||
722 | ); |
||
723 | |||
724 | __asm__ volatile ( |
||
725 | "jmp 9f \n\t" |
||
726 | // Begin |
||
727 | "0: \n\t" |
||
728 | "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" |
||
729 | "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" |
||
730 | "punpcklbw %%mm7, %%mm0 \n\t" |
||
731 | "pshufw $0xFF, %%mm0, %%mm1 \n\t" |
||
732 | "1: \n\t" |
||
733 | "pshufw $0xFF, %%mm0, %%mm0 \n\t" |
||
734 | "2: \n\t" |
||
735 | "psubw %%mm1, %%mm0 \n\t" |
||
736 | "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" |
||
737 | "pmullw %%mm3, %%mm0 \n\t" |
||
738 | "psllw $7, %%mm1 \n\t" |
||
739 | "paddw %%mm1, %%mm0 \n\t" |
||
740 | |||
741 | "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" |
||
742 | |||
743 | "add $8, %%"REG_a" \n\t" |
||
744 | // End |
||
745 | "9: \n\t" |
||
746 | // "int $3 \n\t" |
||
747 | "lea " LOCAL_MANGLE(0b) ", %0 \n\t" |
||
748 | "lea " LOCAL_MANGLE(1b) ", %1 \n\t" |
||
749 | "lea " LOCAL_MANGLE(2b) ", %2 \n\t" |
||
750 | "dec %1 \n\t" |
||
751 | "dec %2 \n\t" |
||
752 | "sub %0, %1 \n\t" |
||
753 | "sub %0, %2 \n\t" |
||
754 | "lea " LOCAL_MANGLE(9b) ", %3 \n\t" |
||
755 | "sub %0, %3 \n\t" |
||
756 | |||
757 | |||
758 | : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), |
||
759 | "=r" (fragmentLengthB) |
||
760 | ); |
||
761 | |||
762 | xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers |
||
763 | fragmentPos = 0; |
||
764 | |||
765 | for (i = 0; i < dstW / numSplits; i++) { |
||
766 | int xx = xpos >> 16; |
||
767 | |||
768 | if ((i & 3) == 0) { |
||
769 | int a = 0; |
||
770 | int b = ((xpos + xInc) >> 16) - xx; |
||
771 | int c = ((xpos + xInc * 2) >> 16) - xx; |
||
772 | int d = ((xpos + xInc * 3) >> 16) - xx; |
||
773 | int inc = (d + 1 < 4); |
||
774 | uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA; |
||
775 | x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A; |
||
776 | x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A; |
||
777 | x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA; |
||
778 | int maxShift = 3 - (d + inc); |
||
779 | int shift = 0; |
||
780 | |||
781 | if (filterCode) { |
||
782 | filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9; |
||
783 | filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9; |
||
784 | filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9; |
||
785 | filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9; |
||
786 | filterPos[i / 2] = xx; |
||
787 | |||
788 | memcpy(filterCode + fragmentPos, fragment, fragmentLength); |
||
789 | |||
790 | filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) | |
||
791 | ((b + inc) << 2) | |
||
792 | ((c + inc) << 4) | |
||
793 | ((d + inc) << 6); |
||
794 | filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) | |
||
795 | (c << 4) | |
||
796 | (d << 6); |
||
797 | |||
798 | if (i + 4 - inc >= dstW) |
||
799 | shift = maxShift; // avoid overread |
||
800 | else if ((filterPos[i / 2] & 3) <= maxShift) |
||
801 | shift = filterPos[i / 2] & 3; // align |
||
802 | |||
803 | if (shift && i >= shift) { |
||
804 | filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift; |
||
805 | filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift; |
||
806 | filterPos[i / 2] -= shift; |
||
807 | } |
||
808 | } |
||
809 | |||
810 | fragmentPos += fragmentLength; |
||
811 | |||
812 | if (filterCode) |
||
813 | filterCode[fragmentPos] = RET; |
||
814 | } |
||
815 | xpos += xInc; |
||
816 | } |
||
817 | if (filterCode) |
||
818 | filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part |
||
819 | |||
820 | return fragmentPos + 1; |
||
821 | } |
||
822 | #endif /* HAVE_MMXEXT_INLINE */ |
||
823 | |||
824 | static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange) |
||
825 | { |
||
826 | int64_t W, V, Z, Cy, Cu, Cv; |
||
827 | int64_t vr = table[0]; |
||
828 | int64_t ub = table[1]; |
||
829 | int64_t ug = -table[2]; |
||
830 | int64_t vg = -table[3]; |
||
831 | int64_t ONE = 65536; |
||
832 | int64_t cy = ONE; |
||
833 | uint8_t *p = (uint8_t*)c->input_rgb2yuv_table; |
||
834 | int i; |
||
835 | static const int8_t map[] = { |
||
836 | BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX, |
||
837 | RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX, |
||
838 | RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX, |
||
839 | BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX, |
||
840 | BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX, |
||
841 | RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX, |
||
842 | RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX, |
||
843 | BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX, |
||
844 | BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX, |
||
845 | RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX, |
||
846 | RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX, |
||
847 | BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX, |
||
848 | RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, |
||
849 | BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, |
||
850 | GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , |
||
851 | -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, |
||
852 | RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, |
||
853 | BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, |
||
854 | GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , |
||
855 | -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, |
||
856 | RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, |
||
857 | BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, |
||
858 | GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , |
||
859 | -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23 |
||
860 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24 |
||
861 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25 |
||
862 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26 |
||
863 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27 |
||
864 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28 |
||
865 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29 |
||
866 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30 |
||
867 | -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31 |
||
868 | BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32 |
||
869 | BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33 |
||
870 | BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34 |
||
871 | }; |
||
872 | |||
873 | dstRange = 0; //FIXME range = 1 is handled elsewhere |
||
874 | |||
875 | if (!dstRange) { |
||
876 | cy = cy * 255 / 219; |
||
877 | } else { |
||
878 | vr = vr * 224 / 255; |
||
879 | ub = ub * 224 / 255; |
||
880 | ug = ug * 224 / 255; |
||
881 | vg = vg * 224 / 255; |
||
882 | } |
||
883 | W = ROUNDED_DIV(ONE*ONE*ug, ub); |
||
884 | V = ROUNDED_DIV(ONE*ONE*vg, vr); |
||
885 | Z = ONE*ONE-W-V; |
||
886 | |||
887 | Cy = ROUNDED_DIV(cy*Z, ONE); |
||
888 | Cu = ROUNDED_DIV(ub*Z, ONE); |
||
889 | Cv = ROUNDED_DIV(vr*Z, ONE); |
||
890 | |||
891 | c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cy); |
||
892 | c->input_rgb2yuv_table[GY_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cy); |
||
893 | c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cy); |
||
894 | |||
895 | c->input_rgb2yuv_table[RU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cu); |
||
896 | c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cu); |
||
897 | c->input_rgb2yuv_table[BU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W) , Cu); |
||
898 | |||
899 | c->input_rgb2yuv_table[RV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z) , Cv); |
||
900 | c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cv); |
||
901 | c->input_rgb2yuv_table[BV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cv); |
||
902 | |||
903 | if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) { |
||
904 | c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
905 | c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
906 | c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
907 | c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
908 | c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
909 | c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
910 | c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
911 | c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
912 | c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
||
913 | } |
||
914 | for(i=0; i |
||
915 | AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0); |
||
916 | } |
||
917 | |||
918 | static void fill_xyztables(struct SwsContext *c) |
||
919 | { |
||
920 | int i; |
||
921 | double xyzgamma = XYZ_GAMMA; |
||
922 | double rgbgamma = 1.0 / RGB_GAMMA; |
||
923 | double xyzgammainv = 1.0 / XYZ_GAMMA; |
||
924 | double rgbgammainv = RGB_GAMMA; |
||
925 | static const int16_t xyz2rgb_matrix[3][4] = { |
||
926 | {13270, -6295, -2041}, |
||
927 | {-3969, 7682, 170}, |
||
928 | { 228, -835, 4329} }; |
||
929 | static const int16_t rgb2xyz_matrix[3][4] = { |
||
930 | {1689, 1464, 739}, |
||
931 | { 871, 2929, 296}, |
||
932 | { 79, 488, 3891} }; |
||
933 | static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096]; |
||
934 | |||
935 | memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix)); |
||
936 | memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix)); |
||
937 | c->xyzgamma = xyzgamma_tab; |
||
938 | c->rgbgamma = rgbgamma_tab; |
||
939 | c->xyzgammainv = xyzgammainv_tab; |
||
940 | c->rgbgammainv = rgbgammainv_tab; |
||
941 | |||
942 | if (rgbgamma_tab[4095]) |
||
943 | return; |
||
944 | |||
945 | /* set gamma vectors */ |
||
946 | for (i = 0; i < 4096; i++) { |
||
947 | xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0); |
||
948 | rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0); |
||
949 | xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0); |
||
950 | rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0); |
||
951 | } |
||
952 | } |
||
953 | |||
954 | int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], |
||
955 | int srcRange, const int table[4], int dstRange, |
||
956 | int brightness, int contrast, int saturation) |
||
957 | { |
||
958 | const AVPixFmtDescriptor *desc_dst; |
||
959 | const AVPixFmtDescriptor *desc_src; |
||
960 | memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4); |
||
961 | memmove(c->dstColorspaceTable, table, sizeof(int) * 4); |
||
962 | |||
963 | handle_formats(c); |
||
964 | desc_dst = av_pix_fmt_desc_get(c->dstFormat); |
||
965 | desc_src = av_pix_fmt_desc_get(c->srcFormat); |
||
966 | |||
967 | if(!isYUV(c->dstFormat) && !isGray(c->dstFormat)) |
||
968 | dstRange = 0; |
||
969 | if(!isYUV(c->srcFormat) && !isGray(c->srcFormat)) |
||
970 | srcRange = 0; |
||
971 | |||
972 | c->brightness = brightness; |
||
973 | c->contrast = contrast; |
||
974 | c->saturation = saturation; |
||
975 | c->srcRange = srcRange; |
||
976 | c->dstRange = dstRange; |
||
977 | |||
978 | fill_xyztables(c); |
||
979 | |||
980 | if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) |
||
981 | return -1; |
||
982 | |||
983 | c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); |
||
984 | c->srcFormatBpp = av_get_bits_per_pixel(desc_src); |
||
985 | |||
986 | if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) { |
||
987 | ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, |
||
988 | contrast, saturation); |
||
989 | // FIXME factorize |
||
990 | |||
991 | if (ARCH_PPC) |
||
992 | ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness, |
||
993 | contrast, saturation); |
||
994 | } |
||
995 | |||
996 | fill_rgb2yuv_table(c, table, dstRange); |
||
997 | |||
998 | return 0; |
||
999 | } |
||
1000 | |||
1001 | int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, |
||
1002 | int *srcRange, int **table, int *dstRange, |
||
1003 | int *brightness, int *contrast, int *saturation) |
||
1004 | { |
||
1005 | if (!c ) |
||
1006 | return -1; |
||
1007 | |||
1008 | *inv_table = c->srcColorspaceTable; |
||
1009 | *table = c->dstColorspaceTable; |
||
1010 | *srcRange = c->srcRange; |
||
1011 | *dstRange = c->dstRange; |
||
1012 | *brightness = c->brightness; |
||
1013 | *contrast = c->contrast; |
||
1014 | *saturation = c->saturation; |
||
1015 | |||
1016 | return 0; |
||
1017 | } |
||
1018 | |||
1019 | static int handle_jpeg(enum AVPixelFormat *format) |
||
1020 | { |
||
1021 | switch (*format) { |
||
1022 | case AV_PIX_FMT_YUVJ420P: |
||
1023 | *format = AV_PIX_FMT_YUV420P; |
||
1024 | return 1; |
||
1025 | case AV_PIX_FMT_YUVJ411P: |
||
1026 | *format = AV_PIX_FMT_YUV411P; |
||
1027 | return 1; |
||
1028 | case AV_PIX_FMT_YUVJ422P: |
||
1029 | *format = AV_PIX_FMT_YUV422P; |
||
1030 | return 1; |
||
1031 | case AV_PIX_FMT_YUVJ444P: |
||
1032 | *format = AV_PIX_FMT_YUV444P; |
||
1033 | return 1; |
||
1034 | case AV_PIX_FMT_YUVJ440P: |
||
1035 | *format = AV_PIX_FMT_YUV440P; |
||
1036 | return 1; |
||
1037 | case AV_PIX_FMT_GRAY8: |
||
1038 | return 1; |
||
1039 | default: |
||
1040 | return 0; |
||
1041 | } |
||
1042 | } |
||
1043 | |||
1044 | static int handle_0alpha(enum AVPixelFormat *format) |
||
1045 | { |
||
1046 | switch (*format) { |
||
1047 | case AV_PIX_FMT_0BGR : *format = AV_PIX_FMT_ABGR ; return 1; |
||
1048 | case AV_PIX_FMT_BGR0 : *format = AV_PIX_FMT_BGRA ; return 4; |
||
1049 | case AV_PIX_FMT_0RGB : *format = AV_PIX_FMT_ARGB ; return 1; |
||
1050 | case AV_PIX_FMT_RGB0 : *format = AV_PIX_FMT_RGBA ; return 4; |
||
1051 | default: return 0; |
||
1052 | } |
||
1053 | } |
||
1054 | |||
1055 | static int handle_xyz(enum AVPixelFormat *format) |
||
1056 | { |
||
1057 | switch (*format) { |
||
1058 | case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1; |
||
1059 | case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1; |
||
1060 | default: return 0; |
||
1061 | } |
||
1062 | } |
||
1063 | |||
1064 | static void handle_formats(SwsContext *c) |
||
1065 | { |
||
1066 | c->src0Alpha |= handle_0alpha(&c->srcFormat); |
||
1067 | c->dst0Alpha |= handle_0alpha(&c->dstFormat); |
||
1068 | c->srcXYZ |= handle_xyz(&c->srcFormat); |
||
1069 | c->dstXYZ |= handle_xyz(&c->dstFormat); |
||
1070 | } |
||
1071 | |||
1072 | SwsContext *sws_alloc_context(void) |
||
1073 | { |
||
1074 | SwsContext *c = av_mallocz(sizeof(SwsContext)); |
||
1075 | |||
1076 | if (c) { |
||
1077 | c->av_class = &sws_context_class; |
||
1078 | av_opt_set_defaults(c); |
||
1079 | } |
||
1080 | |||
1081 | return c; |
||
1082 | } |
||
1083 | |||
1084 | av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, |
||
1085 | SwsFilter *dstFilter) |
||
1086 | { |
||
1087 | int i, j; |
||
1088 | int usesVFilter, usesHFilter; |
||
1089 | int unscaled; |
||
1090 | SwsFilter dummyFilter = { NULL, NULL, NULL, NULL }; |
||
1091 | int srcW = c->srcW; |
||
1092 | int srcH = c->srcH; |
||
1093 | int dstW = c->dstW; |
||
1094 | int dstH = c->dstH; |
||
1095 | int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16); |
||
1096 | int flags, cpu_flags; |
||
1097 | enum AVPixelFormat srcFormat = c->srcFormat; |
||
1098 | enum AVPixelFormat dstFormat = c->dstFormat; |
||
1099 | const AVPixFmtDescriptor *desc_src; |
||
1100 | const AVPixFmtDescriptor *desc_dst; |
||
1101 | |||
1102 | cpu_flags = av_get_cpu_flags(); |
||
1103 | flags = c->flags; |
||
1104 | emms_c(); |
||
1105 | if (!rgb15to16) |
||
1106 | sws_rgb2rgb_init(); |
||
1107 | |||
1108 | unscaled = (srcW == dstW && srcH == dstH); |
||
1109 | |||
1110 | c->srcRange |= handle_jpeg(&c->srcFormat); |
||
1111 | c->dstRange |= handle_jpeg(&c->dstFormat); |
||
1112 | |||
1113 | if (!c->contrast && !c->saturation && !c->dstFormatBpp) |
||
1114 | sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange, |
||
1115 | ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], |
||
1116 | c->dstRange, 0, 1 << 16, 1 << 16); |
||
1117 | |||
1118 | if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat) |
||
1119 | av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); |
||
1120 | handle_formats(c); |
||
1121 | srcFormat = c->srcFormat; |
||
1122 | dstFormat = c->dstFormat; |
||
1123 | desc_src = av_pix_fmt_desc_get(srcFormat); |
||
1124 | desc_dst = av_pix_fmt_desc_get(dstFormat); |
||
1125 | |||
1126 | if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) && |
||
1127 | av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) { |
||
1128 | if (!sws_isSupportedInput(srcFormat)) { |
||
1129 | av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", |
||
1130 | av_get_pix_fmt_name(srcFormat)); |
||
1131 | return AVERROR(EINVAL); |
||
1132 | } |
||
1133 | if (!sws_isSupportedOutput(dstFormat)) { |
||
1134 | av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", |
||
1135 | av_get_pix_fmt_name(dstFormat)); |
||
1136 | return AVERROR(EINVAL); |
||
1137 | } |
||
1138 | } |
||
1139 | |||
1140 | i = flags & (SWS_POINT | |
||
1141 | SWS_AREA | |
||
1142 | SWS_BILINEAR | |
||
1143 | SWS_FAST_BILINEAR | |
||
1144 | SWS_BICUBIC | |
||
1145 | SWS_X | |
||
1146 | SWS_GAUSS | |
||
1147 | SWS_LANCZOS | |
||
1148 | SWS_SINC | |
||
1149 | SWS_SPLINE | |
||
1150 | SWS_BICUBLIN); |
||
1151 | |||
1152 | /* provide a default scaler if not set by caller */ |
||
1153 | if (!i) { |
||
1154 | if (dstW < srcW && dstH < srcH) |
||
1155 | flags |= SWS_BICUBIC; |
||
1156 | else if (dstW > srcW && dstH > srcH) |
||
1157 | flags |= SWS_BICUBIC; |
||
1158 | else |
||
1159 | flags |= SWS_BICUBIC; |
||
1160 | c->flags = flags; |
||
1161 | } else if (i & (i - 1)) { |
||
1162 | av_log(c, AV_LOG_ERROR, |
||
1163 | "Exactly one scaler algorithm must be chosen, got %X\n", i); |
||
1164 | return AVERROR(EINVAL); |
||
1165 | } |
||
1166 | /* sanity check */ |
||
1167 | if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) { |
||
1168 | /* FIXME check if these are enough and try to lower them after |
||
1169 | * fixing the relevant parts of the code */ |
||
1170 | av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n", |
||
1171 | srcW, srcH, dstW, dstH); |
||
1172 | return AVERROR(EINVAL); |
||
1173 | } |
||
1174 | |||
1175 | if (!dstFilter) |
||
1176 | dstFilter = &dummyFilter; |
||
1177 | if (!srcFilter) |
||
1178 | srcFilter = &dummyFilter; |
||
1179 | |||
1180 | c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; |
||
1181 | c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; |
||
1182 | c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); |
||
1183 | c->srcFormatBpp = av_get_bits_per_pixel(desc_src); |
||
1184 | c->vRounder = 4 * 0x0001000100010001ULL; |
||
1185 | |||
1186 | usesVFilter = (srcFilter->lumV && srcFilter->lumV->length > 1) || |
||
1187 | (srcFilter->chrV && srcFilter->chrV->length > 1) || |
||
1188 | (dstFilter->lumV && dstFilter->lumV->length > 1) || |
||
1189 | (dstFilter->chrV && dstFilter->chrV->length > 1); |
||
1190 | usesHFilter = (srcFilter->lumH && srcFilter->lumH->length > 1) || |
||
1191 | (srcFilter->chrH && srcFilter->chrH->length > 1) || |
||
1192 | (dstFilter->lumH && dstFilter->lumH->length > 1) || |
||
1193 | (dstFilter->chrH && dstFilter->chrH->length > 1); |
||
1194 | |||
1195 | av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample); |
||
1196 | av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample); |
||
1197 | |||
1198 | if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) { |
||
1199 | if (dstW&1) { |
||
1200 | av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n"); |
||
1201 | flags |= SWS_FULL_CHR_H_INT; |
||
1202 | c->flags = flags; |
||
1203 | } |
||
1204 | |||
1205 | if ( c->chrSrcHSubSample == 0 |
||
1206 | && c->chrSrcVSubSample == 0 |
||
1207 | && c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER |
||
1208 | && !(c->flags & SWS_FAST_BILINEAR) |
||
1209 | ) { |
||
1210 | av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n"); |
||
1211 | flags |= SWS_FULL_CHR_H_INT; |
||
1212 | c->flags = flags; |
||
1213 | } |
||
1214 | } |
||
1215 | |||
1216 | if (c->dither == SWS_DITHER_AUTO) { |
||
1217 | if (flags & SWS_ERROR_DIFFUSION) |
||
1218 | c->dither = SWS_DITHER_ED; |
||
1219 | } |
||
1220 | |||
1221 | if(dstFormat == AV_PIX_FMT_BGR4_BYTE || |
||
1222 | dstFormat == AV_PIX_FMT_RGB4_BYTE || |
||
1223 | dstFormat == AV_PIX_FMT_BGR8 || |
||
1224 | dstFormat == AV_PIX_FMT_RGB8) { |
||
1225 | if (c->dither == SWS_DITHER_AUTO) |
||
1226 | c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER; |
||
1227 | if (!(flags & SWS_FULL_CHR_H_INT)) { |
||
1228 | if (c->dither == SWS_DITHER_ED) { |
||
1229 | av_log(c, AV_LOG_DEBUG, |
||
1230 | "Desired dithering only supported in full chroma interpolation for destination format '%s'\n", |
||
1231 | av_get_pix_fmt_name(dstFormat)); |
||
1232 | flags |= SWS_FULL_CHR_H_INT; |
||
1233 | c->flags = flags; |
||
1234 | } |
||
1235 | } |
||
1236 | if (flags & SWS_FULL_CHR_H_INT) { |
||
1237 | if (c->dither == SWS_DITHER_BAYER) { |
||
1238 | av_log(c, AV_LOG_DEBUG, |
||
1239 | "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n", |
||
1240 | av_get_pix_fmt_name(dstFormat)); |
||
1241 | c->dither = SWS_DITHER_ED; |
||
1242 | } |
||
1243 | } |
||
1244 | } |
||
1245 | if (isPlanarRGB(dstFormat)) { |
||
1246 | if (!(flags & SWS_FULL_CHR_H_INT)) { |
||
1247 | av_log(c, AV_LOG_DEBUG, |
||
1248 | "%s output is not supported with half chroma resolution, switching to full\n", |
||
1249 | av_get_pix_fmt_name(dstFormat)); |
||
1250 | flags |= SWS_FULL_CHR_H_INT; |
||
1251 | c->flags = flags; |
||
1252 | } |
||
1253 | } |
||
1254 | |||
1255 | /* reuse chroma for 2 pixels RGB/BGR unless user wants full |
||
1256 | * chroma interpolation */ |
||
1257 | if (flags & SWS_FULL_CHR_H_INT && |
||
1258 | isAnyRGB(dstFormat) && |
||
1259 | !isPlanarRGB(dstFormat) && |
||
1260 | dstFormat != AV_PIX_FMT_RGBA && |
||
1261 | dstFormat != AV_PIX_FMT_ARGB && |
||
1262 | dstFormat != AV_PIX_FMT_BGRA && |
||
1263 | dstFormat != AV_PIX_FMT_ABGR && |
||
1264 | dstFormat != AV_PIX_FMT_RGB24 && |
||
1265 | dstFormat != AV_PIX_FMT_BGR24 && |
||
1266 | dstFormat != AV_PIX_FMT_BGR4_BYTE && |
||
1267 | dstFormat != AV_PIX_FMT_RGB4_BYTE && |
||
1268 | dstFormat != AV_PIX_FMT_BGR8 && |
||
1269 | dstFormat != AV_PIX_FMT_RGB8 |
||
1270 | ) { |
||
1271 | av_log(c, AV_LOG_WARNING, |
||
1272 | "full chroma interpolation for destination format '%s' not yet implemented\n", |
||
1273 | av_get_pix_fmt_name(dstFormat)); |
||
1274 | flags &= ~SWS_FULL_CHR_H_INT; |
||
1275 | c->flags = flags; |
||
1276 | } |
||
1277 | if (isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT)) |
||
1278 | c->chrDstHSubSample = 1; |
||
1279 | |||
1280 | // drop some chroma lines if the user wants it |
||
1281 | c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >> |
||
1282 | SWS_SRC_V_CHR_DROP_SHIFT; |
||
1283 | c->chrSrcVSubSample += c->vChrDrop; |
||
1284 | |||
1285 | /* drop every other pixel for chroma calculation unless user |
||
1286 | * wants full chroma */ |
||
1287 | if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) && |
||
1288 | srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 && |
||
1289 | srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 && |
||
1290 | srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE && |
||
1291 | srcFormat != AV_PIX_FMT_GBRP9BE && srcFormat != AV_PIX_FMT_GBRP9LE && |
||
1292 | srcFormat != AV_PIX_FMT_GBRP10BE && srcFormat != AV_PIX_FMT_GBRP10LE && |
||
1293 | srcFormat != AV_PIX_FMT_GBRP12BE && srcFormat != AV_PIX_FMT_GBRP12LE && |
||
1294 | srcFormat != AV_PIX_FMT_GBRP14BE && srcFormat != AV_PIX_FMT_GBRP14LE && |
||
1295 | srcFormat != AV_PIX_FMT_GBRP16BE && srcFormat != AV_PIX_FMT_GBRP16LE && |
||
1296 | ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || |
||
1297 | (flags & SWS_FAST_BILINEAR))) |
||
1298 | c->chrSrcHSubSample = 1; |
||
1299 | |||
1300 | // Note the FF_CEIL_RSHIFT is so that we always round toward +inf. |
||
1301 | c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample); |
||
1302 | c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample); |
||
1303 | c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample); |
||
1304 | c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample); |
||
1305 | |||
1306 | FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); |
||
1307 | |||
1308 | /* unscaled special cases */ |
||
1309 | if (unscaled && !usesHFilter && !usesVFilter && |
||
1310 | (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) { |
||
1311 | ff_get_unscaled_swscale(c); |
||
1312 | |||
1313 | if (c->swscale) { |
||
1314 | if (flags & SWS_PRINT_INFO) |
||
1315 | av_log(c, AV_LOG_INFO, |
||
1316 | "using unscaled %s -> %s special converter\n", |
||
1317 | av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); |
||
1318 | return 0; |
||
1319 | } |
||
1320 | } |
||
1321 | |||
1322 | c->srcBpc = 1 + desc_src->comp[0].depth_minus1; |
||
1323 | if (c->srcBpc < 8) |
||
1324 | c->srcBpc = 8; |
||
1325 | c->dstBpc = 1 + desc_dst->comp[0].depth_minus1; |
||
1326 | if (c->dstBpc < 8) |
||
1327 | c->dstBpc = 8; |
||
1328 | if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8) |
||
1329 | c->srcBpc = 16; |
||
1330 | if (c->dstBpc == 16) |
||
1331 | dst_stride <<= 1; |
||
1332 | |||
1333 | if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) { |
||
1334 | c->canMMXEXTBeUsed = (dstW >= srcW && (dstW & 31) == 0 && |
||
1335 | (srcW & 15) == 0) ? 1 : 0; |
||
1336 | if (!c->canMMXEXTBeUsed && dstW >= srcW && (srcW & 15) == 0 |
||
1337 | |||
1338 | && (flags & SWS_FAST_BILINEAR)) { |
||
1339 | if (flags & SWS_PRINT_INFO) |
||
1340 | av_log(c, AV_LOG_INFO, |
||
1341 | "output width is not a multiple of 32 -> no MMXEXT scaler\n"); |
||
1342 | } |
||
1343 | if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) |
||
1344 | c->canMMXEXTBeUsed = 0; |
||
1345 | } else |
||
1346 | c->canMMXEXTBeUsed = 0; |
||
1347 | |||
1348 | c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; |
||
1349 | c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; |
||
1350 | |||
1351 | /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src |
||
1352 | * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do |
||
1353 | * correct scaling. |
||
1354 | * n-2 is the last chrominance sample available. |
||
1355 | * This is not perfect, but no one should notice the difference, the more |
||
1356 | * correct variant would be like the vertical one, but that would require |
||
1357 | * some special code for the first and last pixel */ |
||
1358 | if (flags & SWS_FAST_BILINEAR) { |
||
1359 | if (c->canMMXEXTBeUsed) { |
||
1360 | c->lumXInc += 20; |
||
1361 | c->chrXInc += 20; |
||
1362 | } |
||
1363 | // we don't use the x86 asm scaler if MMX is available |
||
1364 | else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) { |
||
1365 | c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; |
||
1366 | c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; |
||
1367 | } |
||
1368 | } |
||
1369 | |||
1370 | #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS) |
||
1371 | |||
1372 | /* precalculate horizontal scaler filter coefficients */ |
||
1373 | { |
||
1374 | #if HAVE_MMXEXT_INLINE |
||
1375 | // can't downscale !!! |
||
1376 | if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { |
||
1377 | c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL, |
||
1378 | NULL, NULL, 8); |
||
1379 | c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc, |
||
1380 | NULL, NULL, NULL, 4); |
||
1381 | |||
1382 | #if USE_MMAP |
||
1383 | c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, |
||
1384 | PROT_READ | PROT_WRITE, |
||
1385 | MAP_PRIVATE | MAP_ANONYMOUS, |
||
1386 | -1, 0); |
||
1387 | c->chrMmxextFilterCode = mmap(NULL, c->chrMmxextFilterCodeSize, |
||
1388 | PROT_READ | PROT_WRITE, |
||
1389 | MAP_PRIVATE | MAP_ANONYMOUS, |
||
1390 | -1, 0); |
||
1391 | #elif HAVE_VIRTUALALLOC |
||
1392 | c->lumMmxextFilterCode = VirtualAlloc(NULL, |
||
1393 | c->lumMmxextFilterCodeSize, |
||
1394 | MEM_COMMIT, |
||
1395 | PAGE_EXECUTE_READWRITE); |
||
1396 | c->chrMmxextFilterCode = VirtualAlloc(NULL, |
||
1397 | c->chrMmxextFilterCodeSize, |
||
1398 | MEM_COMMIT, |
||
1399 | PAGE_EXECUTE_READWRITE); |
||
1400 | #else |
||
1401 | c->lumMmxextFilterCode = av_malloc(c->lumMmxextFilterCodeSize); |
||
1402 | c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize); |
||
1403 | #endif |
||
1404 | |||
1405 | #ifdef MAP_ANONYMOUS |
||
1406 | if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED) |
||
1407 | #else |
||
1408 | if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode) |
||
1409 | #endif |
||
1410 | { |
||
1411 | av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); |
||
1412 | return AVERROR(ENOMEM); |
||
1413 | } |
||
1414 | |||
1415 | FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail); |
||
1416 | FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail); |
||
1417 | FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); |
||
1418 | FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); |
||
1419 | |||
1420 | init_hscaler_mmxext( dstW, c->lumXInc, c->lumMmxextFilterCode, |
||
1421 | c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); |
||
1422 | init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, |
||
1423 | c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); |
||
1424 | |||
1425 | #if USE_MMAP |
||
1426 | if ( mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1 |
||
1427 | || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) { |
||
1428 | av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n"); |
||
1429 | goto fail; |
||
1430 | } |
||
1431 | #endif |
||
1432 | } else |
||
1433 | #endif /* HAVE_MMXEXT_INLINE */ |
||
1434 | { |
||
1435 | const int filterAlign = X86_MMX(cpu_flags) ? 4 : |
||
1436 | PPC_ALTIVEC(cpu_flags) ? 8 : 1; |
||
1437 | |||
1438 | if (initFilter(&c->hLumFilter, &c->hLumFilterPos, |
||
1439 | &c->hLumFilterSize, c->lumXInc, |
||
1440 | srcW, dstW, filterAlign, 1 << 14, |
||
1441 | (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, |
||
1442 | cpu_flags, srcFilter->lumH, dstFilter->lumH, |
||
1443 | c->param, |
||
1444 | get_local_pos(c, 0, 0, 0), |
||
1445 | get_local_pos(c, 0, 0, 0)) < 0) |
||
1446 | goto fail; |
||
1447 | if (initFilter(&c->hChrFilter, &c->hChrFilterPos, |
||
1448 | &c->hChrFilterSize, c->chrXInc, |
||
1449 | c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, |
||
1450 | (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, |
||
1451 | cpu_flags, srcFilter->chrH, dstFilter->chrH, |
||
1452 | c->param, |
||
1453 | get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0), |
||
1454 | get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0)) < 0) |
||
1455 | goto fail; |
||
1456 | } |
||
1457 | } // initialize horizontal stuff |
||
1458 | |||
1459 | /* precalculate vertical scaler filter coefficients */ |
||
1460 | { |
||
1461 | const int filterAlign = X86_MMX(cpu_flags) ? 2 : |
||
1462 | PPC_ALTIVEC(cpu_flags) ? 8 : 1; |
||
1463 | |||
1464 | if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, |
||
1465 | c->lumYInc, srcH, dstH, filterAlign, (1 << 12), |
||
1466 | (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, |
||
1467 | cpu_flags, srcFilter->lumV, dstFilter->lumV, |
||
1468 | c->param, |
||
1469 | get_local_pos(c, 0, 0, 1), |
||
1470 | get_local_pos(c, 0, 0, 1)) < 0) |
||
1471 | goto fail; |
||
1472 | if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, |
||
1473 | c->chrYInc, c->chrSrcH, c->chrDstH, |
||
1474 | filterAlign, (1 << 12), |
||
1475 | (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, |
||
1476 | cpu_flags, srcFilter->chrV, dstFilter->chrV, |
||
1477 | c->param, |
||
1478 | get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1), |
||
1479 | get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1)) < 0) |
||
1480 | |||
1481 | goto fail; |
||
1482 | |||
1483 | #if HAVE_ALTIVEC |
||
1484 | FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH, fail); |
||
1485 | FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail); |
||
1486 | |||
1487 | for (i = 0; i < c->vLumFilterSize * c->dstH; i++) { |
||
1488 | int j; |
||
1489 | short *p = (short *)&c->vYCoeffsBank[i]; |
||
1490 | for (j = 0; j < 8; j++) |
||
1491 | p[j] = c->vLumFilter[i]; |
||
1492 | } |
||
1493 | |||
1494 | for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) { |
||
1495 | int j; |
||
1496 | short *p = (short *)&c->vCCoeffsBank[i]; |
||
1497 | for (j = 0; j < 8; j++) |
||
1498 | p[j] = c->vChrFilter[i]; |
||
1499 | } |
||
1500 | #endif |
||
1501 | } |
||
1502 | |||
1503 | // calculate buffer sizes so that they won't run out while handling these damn slices |
||
1504 | c->vLumBufSize = c->vLumFilterSize; |
||
1505 | c->vChrBufSize = c->vChrFilterSize; |
||
1506 | for (i = 0; i < dstH; i++) { |
||
1507 | int chrI = (int64_t)i * c->chrDstH / dstH; |
||
1508 | int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1, |
||
1509 | ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1) |
||
1510 | << c->chrSrcVSubSample)); |
||
1511 | |||
1512 | nextSlice >>= c->chrSrcVSubSample; |
||
1513 | nextSlice <<= c->chrSrcVSubSample; |
||
1514 | if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice) |
||
1515 | c->vLumBufSize = nextSlice - c->vLumFilterPos[i]; |
||
1516 | if (c->vChrFilterPos[chrI] + c->vChrBufSize < |
||
1517 | (nextSlice >> c->chrSrcVSubSample)) |
||
1518 | c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) - |
||
1519 | c->vChrFilterPos[chrI]; |
||
1520 | } |
||
1521 | |||
1522 | for (i = 0; i < 4; i++) |
||
1523 | FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail); |
||
1524 | |||
1525 | /* Allocate pixbufs (we use dynamic allocation because otherwise we would |
||
1526 | * need to allocate several megabytes to handle all possible cases) */ |
||
1527 | FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); |
||
1528 | FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail); |
||
1529 | FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail); |
||
1530 | if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) |
||
1531 | FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); |
||
1532 | /* Note we need at least one pixel more at the end because of the MMX code |
||
1533 | * (just in case someone wants to replace the 4000/8000). */ |
||
1534 | /* align at 16 bytes for AltiVec */ |
||
1535 | for (i = 0; i < c->vLumBufSize; i++) { |
||
1536 | FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i + c->vLumBufSize], |
||
1537 | dst_stride + 16, fail); |
||
1538 | c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize]; |
||
1539 | } |
||
1540 | // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) |
||
1541 | c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); |
||
1542 | c->uv_offx2 = dst_stride + 16; |
||
1543 | for (i = 0; i < c->vChrBufSize; i++) { |
||
1544 | FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize], |
||
1545 | dst_stride * 2 + 32, fail); |
||
1546 | c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize]; |
||
1547 | c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize] |
||
1548 | = c->chrUPixBuf[i] + (dst_stride >> 1) + 8; |
||
1549 | } |
||
1550 | if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
||
1551 | for (i = 0; i < c->vLumBufSize; i++) { |
||
1552 | FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i + c->vLumBufSize], |
||
1553 | dst_stride + 16, fail); |
||
1554 | c->alpPixBuf[i] = c->alpPixBuf[i + c->vLumBufSize]; |
||
1555 | } |
||
1556 | |||
1557 | // try to avoid drawing green stuff between the right end and the stride end |
||
1558 | for (i = 0; i < c->vChrBufSize; i++) |
||
1559 | if(desc_dst->comp[0].depth_minus1 == 15){ |
||
1560 | av_assert0(c->dstBpc > 14); |
||
1561 | for(j=0; j |
||
1562 | ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18; |
||
1563 | } else |
||
1564 | for(j=0; j |
||
1565 | ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14; |
||
1566 | |||
1567 | av_assert0(c->chrDstH <= dstH); |
||
1568 | |||
1569 | if (flags & SWS_PRINT_INFO) { |
||
1570 | const char *scaler, *cpucaps; |
||
1571 | if (flags & SWS_FAST_BILINEAR) |
||
1572 | scaler = "FAST_BILINEAR scaler"; |
||
1573 | else if (flags & SWS_BILINEAR) |
||
1574 | scaler = "BILINEAR scaler"; |
||
1575 | else if (flags & SWS_BICUBIC) |
||
1576 | scaler = "BICUBIC scaler"; |
||
1577 | else if (flags & SWS_X) |
||
1578 | scaler = "Experimental scaler"; |
||
1579 | else if (flags & SWS_POINT) |
||
1580 | scaler = "Nearest Neighbor / POINT scaler"; |
||
1581 | else if (flags & SWS_AREA) |
||
1582 | scaler = "Area Averaging scaler"; |
||
1583 | else if (flags & SWS_BICUBLIN) |
||
1584 | scaler = "luma BICUBIC / chroma BILINEAR scaler"; |
||
1585 | else if (flags & SWS_GAUSS) |
||
1586 | scaler = "Gaussian scaler"; |
||
1587 | else if (flags & SWS_SINC) |
||
1588 | scaler = "Sinc scaler"; |
||
1589 | else if (flags & SWS_LANCZOS) |
||
1590 | scaler = "Lanczos scaler"; |
||
1591 | else if (flags & SWS_SPLINE) |
||
1592 | scaler = "Bicubic spline scaler"; |
||
1593 | else |
||
1594 | scaler = "ehh flags invalid?!"; |
||
1595 | |||
1596 | av_log(c, AV_LOG_INFO, "%s, from %s to %s%s ", |
||
1597 | scaler, |
||
1598 | av_get_pix_fmt_name(srcFormat), |
||
1599 | #ifdef DITHER1XBPP |
||
1600 | dstFormat == AV_PIX_FMT_BGR555 || dstFormat == AV_PIX_FMT_BGR565 || |
||
1601 | dstFormat == AV_PIX_FMT_RGB444BE || dstFormat == AV_PIX_FMT_RGB444LE || |
||
1602 | dstFormat == AV_PIX_FMT_BGR444BE || dstFormat == AV_PIX_FMT_BGR444LE ? |
||
1603 | "dithered " : "", |
||
1604 | #else |
||
1605 | "", |
||
1606 | #endif |
||
1607 | av_get_pix_fmt_name(dstFormat)); |
||
1608 | |||
1609 | if (INLINE_MMXEXT(cpu_flags)) |
||
1610 | cpucaps = "MMXEXT"; |
||
1611 | else if (INLINE_AMD3DNOW(cpu_flags)) |
||
1612 | cpucaps = "3DNOW"; |
||
1613 | else if (INLINE_MMX(cpu_flags)) |
||
1614 | cpucaps = "MMX"; |
||
1615 | else if (PPC_ALTIVEC(cpu_flags)) |
||
1616 | cpucaps = "AltiVec"; |
||
1617 | else |
||
1618 | cpucaps = "C"; |
||
1619 | |||
1620 | av_log(c, AV_LOG_INFO, "using %s\n", cpucaps); |
||
1621 | |||
1622 | av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); |
||
1623 | av_log(c, AV_LOG_DEBUG, |
||
1624 | "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", |
||
1625 | c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); |
||
1626 | av_log(c, AV_LOG_DEBUG, |
||
1627 | "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", |
||
1628 | c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, |
||
1629 | c->chrXInc, c->chrYInc); |
||
1630 | } |
||
1631 | |||
1632 | c->swscale = ff_getSwsFunc(c); |
||
1633 | return 0; |
||
1634 | fail: // FIXME replace things by appropriate error codes |
||
1635 | return -1; |
||
1636 | } |
||
1637 | |||
1638 | #if FF_API_SWS_GETCONTEXT |
||
1639 | SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, |
||
1640 | int dstW, int dstH, enum AVPixelFormat dstFormat, |
||
1641 | int flags, SwsFilter *srcFilter, |
||
1642 | SwsFilter *dstFilter, const double *param) |
||
1643 | { |
||
1644 | SwsContext *c; |
||
1645 | |||
1646 | if (!(c = sws_alloc_context())) |
||
1647 | return NULL; |
||
1648 | |||
1649 | c->flags = flags; |
||
1650 | c->srcW = srcW; |
||
1651 | c->srcH = srcH; |
||
1652 | c->dstW = dstW; |
||
1653 | c->dstH = dstH; |
||
1654 | c->srcFormat = srcFormat; |
||
1655 | c->dstFormat = dstFormat; |
||
1656 | |||
1657 | if (param) { |
||
1658 | c->param[0] = param[0]; |
||
1659 | c->param[1] = param[1]; |
||
1660 | } |
||
1661 | |||
1662 | if (sws_init_context(c, srcFilter, dstFilter) < 0) { |
||
1663 | sws_freeContext(c); |
||
1664 | return NULL; |
||
1665 | } |
||
1666 | |||
1667 | return c; |
||
1668 | } |
||
1669 | #endif |
||
1670 | |||
1671 | SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, |
||
1672 | float lumaSharpen, float chromaSharpen, |
||
1673 | float chromaHShift, float chromaVShift, |
||
1674 | int verbose) |
||
1675 | { |
||
1676 | SwsFilter *filter = av_malloc(sizeof(SwsFilter)); |
||
1677 | if (!filter) |
||
1678 | return NULL; |
||
1679 | |||
1680 | if (lumaGBlur != 0.0) { |
||
1681 | filter->lumH = sws_getGaussianVec(lumaGBlur, 3.0); |
||
1682 | filter->lumV = sws_getGaussianVec(lumaGBlur, 3.0); |
||
1683 | } else { |
||
1684 | filter->lumH = sws_getIdentityVec(); |
||
1685 | filter->lumV = sws_getIdentityVec(); |
||
1686 | } |
||
1687 | |||
1688 | if (chromaGBlur != 0.0) { |
||
1689 | filter->chrH = sws_getGaussianVec(chromaGBlur, 3.0); |
||
1690 | filter->chrV = sws_getGaussianVec(chromaGBlur, 3.0); |
||
1691 | } else { |
||
1692 | filter->chrH = sws_getIdentityVec(); |
||
1693 | filter->chrV = sws_getIdentityVec(); |
||
1694 | } |
||
1695 | |||
1696 | if (chromaSharpen != 0.0) { |
||
1697 | SwsVector *id = sws_getIdentityVec(); |
||
1698 | sws_scaleVec(filter->chrH, -chromaSharpen); |
||
1699 | sws_scaleVec(filter->chrV, -chromaSharpen); |
||
1700 | sws_addVec(filter->chrH, id); |
||
1701 | sws_addVec(filter->chrV, id); |
||
1702 | sws_freeVec(id); |
||
1703 | } |
||
1704 | |||
1705 | if (lumaSharpen != 0.0) { |
||
1706 | SwsVector *id = sws_getIdentityVec(); |
||
1707 | sws_scaleVec(filter->lumH, -lumaSharpen); |
||
1708 | sws_scaleVec(filter->lumV, -lumaSharpen); |
||
1709 | sws_addVec(filter->lumH, id); |
||
1710 | sws_addVec(filter->lumV, id); |
||
1711 | sws_freeVec(id); |
||
1712 | } |
||
1713 | |||
1714 | if (chromaHShift != 0.0) |
||
1715 | sws_shiftVec(filter->chrH, (int)(chromaHShift + 0.5)); |
||
1716 | |||
1717 | if (chromaVShift != 0.0) |
||
1718 | sws_shiftVec(filter->chrV, (int)(chromaVShift + 0.5)); |
||
1719 | |||
1720 | sws_normalizeVec(filter->chrH, 1.0); |
||
1721 | sws_normalizeVec(filter->chrV, 1.0); |
||
1722 | sws_normalizeVec(filter->lumH, 1.0); |
||
1723 | sws_normalizeVec(filter->lumV, 1.0); |
||
1724 | |||
1725 | if (verbose) |
||
1726 | sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG); |
||
1727 | if (verbose) |
||
1728 | sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG); |
||
1729 | |||
1730 | return filter; |
||
1731 | } |
||
1732 | |||
1733 | SwsVector *sws_allocVec(int length) |
||
1734 | { |
||
1735 | SwsVector *vec; |
||
1736 | |||
1737 | if(length <= 0 || length > INT_MAX/ sizeof(double)) |
||
1738 | return NULL; |
||
1739 | |||
1740 | vec = av_malloc(sizeof(SwsVector)); |
||
1741 | if (!vec) |
||
1742 | return NULL; |
||
1743 | vec->length = length; |
||
1744 | vec->coeff = av_malloc(sizeof(double) * length); |
||
1745 | if (!vec->coeff) |
||
1746 | av_freep(&vec); |
||
1747 | return vec; |
||
1748 | } |
||
1749 | |||
1750 | SwsVector *sws_getGaussianVec(double variance, double quality) |
||
1751 | { |
||
1752 | const int length = (int)(variance * quality + 0.5) | 1; |
||
1753 | int i; |
||
1754 | double middle = (length - 1) * 0.5; |
||
1755 | SwsVector *vec; |
||
1756 | |||
1757 | if(variance < 0 || quality < 0) |
||
1758 | return NULL; |
||
1759 | |||
1760 | vec = sws_allocVec(length); |
||
1761 | |||
1762 | if (!vec) |
||
1763 | return NULL; |
||
1764 | |||
1765 | for (i = 0; i < length; i++) { |
||
1766 | double dist = i - middle; |
||
1767 | vec->coeff[i] = exp(-dist * dist / (2 * variance * variance)) / |
||
1768 | sqrt(2 * variance * M_PI); |
||
1769 | } |
||
1770 | |||
1771 | sws_normalizeVec(vec, 1.0); |
||
1772 | |||
1773 | return vec; |
||
1774 | } |
||
1775 | |||
1776 | SwsVector *sws_getConstVec(double c, int length) |
||
1777 | { |
||
1778 | int i; |
||
1779 | SwsVector *vec = sws_allocVec(length); |
||
1780 | |||
1781 | if (!vec) |
||
1782 | return NULL; |
||
1783 | |||
1784 | for (i = 0; i < length; i++) |
||
1785 | vec->coeff[i] = c; |
||
1786 | |||
1787 | return vec; |
||
1788 | } |
||
1789 | |||
1790 | SwsVector *sws_getIdentityVec(void) |
||
1791 | { |
||
1792 | return sws_getConstVec(1.0, 1); |
||
1793 | } |
||
1794 | |||
1795 | static double sws_dcVec(SwsVector *a) |
||
1796 | { |
||
1797 | int i; |
||
1798 | double sum = 0; |
||
1799 | |||
1800 | for (i = 0; i < a->length; i++) |
||
1801 | sum += a->coeff[i]; |
||
1802 | |||
1803 | return sum; |
||
1804 | } |
||
1805 | |||
1806 | void sws_scaleVec(SwsVector *a, double scalar) |
||
1807 | { |
||
1808 | int i; |
||
1809 | |||
1810 | for (i = 0; i < a->length; i++) |
||
1811 | a->coeff[i] *= scalar; |
||
1812 | } |
||
1813 | |||
1814 | void sws_normalizeVec(SwsVector *a, double height) |
||
1815 | { |
||
1816 | sws_scaleVec(a, height / sws_dcVec(a)); |
||
1817 | } |
||
1818 | |||
1819 | static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b) |
||
1820 | { |
||
1821 | int length = a->length + b->length - 1; |
||
1822 | int i, j; |
||
1823 | SwsVector *vec = sws_getConstVec(0.0, length); |
||
1824 | |||
1825 | if (!vec) |
||
1826 | return NULL; |
||
1827 | |||
1828 | for (i = 0; i < a->length; i++) { |
||
1829 | for (j = 0; j < b->length; j++) { |
||
1830 | vec->coeff[i + j] += a->coeff[i] * b->coeff[j]; |
||
1831 | } |
||
1832 | } |
||
1833 | |||
1834 | return vec; |
||
1835 | } |
||
1836 | |||
1837 | static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b) |
||
1838 | { |
||
1839 | int length = FFMAX(a->length, b->length); |
||
1840 | int i; |
||
1841 | SwsVector *vec = sws_getConstVec(0.0, length); |
||
1842 | |||
1843 | if (!vec) |
||
1844 | return NULL; |
||
1845 | |||
1846 | for (i = 0; i < a->length; i++) |
||
1847 | vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i]; |
||
1848 | for (i = 0; i < b->length; i++) |
||
1849 | vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] += b->coeff[i]; |
||
1850 | |||
1851 | return vec; |
||
1852 | } |
||
1853 | |||
1854 | static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b) |
||
1855 | { |
||
1856 | int length = FFMAX(a->length, b->length); |
||
1857 | int i; |
||
1858 | SwsVector *vec = sws_getConstVec(0.0, length); |
||
1859 | |||
1860 | if (!vec) |
||
1861 | return NULL; |
||
1862 | |||
1863 | for (i = 0; i < a->length; i++) |
||
1864 | vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i]; |
||
1865 | for (i = 0; i < b->length; i++) |
||
1866 | vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] -= b->coeff[i]; |
||
1867 | |||
1868 | return vec; |
||
1869 | } |
||
1870 | |||
1871 | /* shift left / or right if "shift" is negative */ |
||
1872 | static SwsVector *sws_getShiftedVec(SwsVector *a, int shift) |
||
1873 | { |
||
1874 | int length = a->length + FFABS(shift) * 2; |
||
1875 | int i; |
||
1876 | SwsVector *vec = sws_getConstVec(0.0, length); |
||
1877 | |||
1878 | if (!vec) |
||
1879 | return NULL; |
||
1880 | |||
1881 | for (i = 0; i < a->length; i++) { |
||
1882 | vec->coeff[i + (length - 1) / 2 - |
||
1883 | (a->length - 1) / 2 - shift] = a->coeff[i]; |
||
1884 | } |
||
1885 | |||
1886 | return vec; |
||
1887 | } |
||
1888 | |||
1889 | void sws_shiftVec(SwsVector *a, int shift) |
||
1890 | { |
||
1891 | SwsVector *shifted = sws_getShiftedVec(a, shift); |
||
1892 | av_free(a->coeff); |
||
1893 | a->coeff = shifted->coeff; |
||
1894 | a->length = shifted->length; |
||
1895 | av_free(shifted); |
||
1896 | } |
||
1897 | |||
1898 | void sws_addVec(SwsVector *a, SwsVector *b) |
||
1899 | { |
||
1900 | SwsVector *sum = sws_sumVec(a, b); |
||
1901 | av_free(a->coeff); |
||
1902 | a->coeff = sum->coeff; |
||
1903 | a->length = sum->length; |
||
1904 | av_free(sum); |
||
1905 | } |
||
1906 | |||
1907 | void sws_subVec(SwsVector *a, SwsVector *b) |
||
1908 | { |
||
1909 | SwsVector *diff = sws_diffVec(a, b); |
||
1910 | av_free(a->coeff); |
||
1911 | a->coeff = diff->coeff; |
||
1912 | a->length = diff->length; |
||
1913 | av_free(diff); |
||
1914 | } |
||
1915 | |||
1916 | void sws_convVec(SwsVector *a, SwsVector *b) |
||
1917 | { |
||
1918 | SwsVector *conv = sws_getConvVec(a, b); |
||
1919 | av_free(a->coeff); |
||
1920 | a->coeff = conv->coeff; |
||
1921 | a->length = conv->length; |
||
1922 | av_free(conv); |
||
1923 | } |
||
1924 | |||
1925 | SwsVector *sws_cloneVec(SwsVector *a) |
||
1926 | { |
||
1927 | SwsVector *vec = sws_allocVec(a->length); |
||
1928 | |||
1929 | if (!vec) |
||
1930 | return NULL; |
||
1931 | |||
1932 | memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff)); |
||
1933 | |||
1934 | return vec; |
||
1935 | } |
||
1936 | |||
1937 | void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level) |
||
1938 | { |
||
1939 | int i; |
||
1940 | double max = 0; |
||
1941 | double min = 0; |
||
1942 | double range; |
||
1943 | |||
1944 | for (i = 0; i < a->length; i++) |
||
1945 | if (a->coeff[i] > max) |
||
1946 | max = a->coeff[i]; |
||
1947 | |||
1948 | for (i = 0; i < a->length; i++) |
||
1949 | if (a->coeff[i] < min) |
||
1950 | min = a->coeff[i]; |
||
1951 | |||
1952 | range = max - min; |
||
1953 | |||
1954 | for (i = 0; i < a->length; i++) { |
||
1955 | int x = (int)((a->coeff[i] - min) * 60.0 / range + 0.5); |
||
1956 | av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]); |
||
1957 | for (; x > 0; x--) |
||
1958 | av_log(log_ctx, log_level, " "); |
||
1959 | av_log(log_ctx, log_level, "|\n"); |
||
1960 | } |
||
1961 | } |
||
1962 | |||
1963 | void sws_freeVec(SwsVector *a) |
||
1964 | { |
||
1965 | if (!a) |
||
1966 | return; |
||
1967 | av_freep(&a->coeff); |
||
1968 | a->length = 0; |
||
1969 | av_free(a); |
||
1970 | } |
||
1971 | |||
1972 | void sws_freeFilter(SwsFilter *filter) |
||
1973 | { |
||
1974 | if (!filter) |
||
1975 | return; |
||
1976 | |||
1977 | sws_freeVec(filter->lumH); |
||
1978 | sws_freeVec(filter->lumV); |
||
1979 | sws_freeVec(filter->chrH); |
||
1980 | sws_freeVec(filter->chrV); |
||
1981 | av_free(filter); |
||
1982 | } |
||
1983 | |||
1984 | void sws_freeContext(SwsContext *c) |
||
1985 | { |
||
1986 | int i; |
||
1987 | if (!c) |
||
1988 | return; |
||
1989 | |||
1990 | if (c->lumPixBuf) { |
||
1991 | for (i = 0; i < c->vLumBufSize; i++) |
||
1992 | av_freep(&c->lumPixBuf[i]); |
||
1993 | av_freep(&c->lumPixBuf); |
||
1994 | } |
||
1995 | |||
1996 | if (c->chrUPixBuf) { |
||
1997 | for (i = 0; i < c->vChrBufSize; i++) |
||
1998 | av_freep(&c->chrUPixBuf[i]); |
||
1999 | av_freep(&c->chrUPixBuf); |
||
2000 | av_freep(&c->chrVPixBuf); |
||
2001 | } |
||
2002 | |||
2003 | if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
||
2004 | for (i = 0; i < c->vLumBufSize; i++) |
||
2005 | av_freep(&c->alpPixBuf[i]); |
||
2006 | av_freep(&c->alpPixBuf); |
||
2007 | } |
||
2008 | |||
2009 | for (i = 0; i < 4; i++) |
||
2010 | av_freep(&c->dither_error[i]); |
||
2011 | |||
2012 | av_freep(&c->vLumFilter); |
||
2013 | av_freep(&c->vChrFilter); |
||
2014 | av_freep(&c->hLumFilter); |
||
2015 | av_freep(&c->hChrFilter); |
||
2016 | #if HAVE_ALTIVEC |
||
2017 | av_freep(&c->vYCoeffsBank); |
||
2018 | av_freep(&c->vCCoeffsBank); |
||
2019 | #endif |
||
2020 | |||
2021 | av_freep(&c->vLumFilterPos); |
||
2022 | av_freep(&c->vChrFilterPos); |
||
2023 | av_freep(&c->hLumFilterPos); |
||
2024 | av_freep(&c->hChrFilterPos); |
||
2025 | |||
2026 | #if HAVE_MMX_INLINE |
||
2027 | #if USE_MMAP |
||
2028 | if (c->lumMmxextFilterCode) |
||
2029 | munmap(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize); |
||
2030 | if (c->chrMmxextFilterCode) |
||
2031 | munmap(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize); |
||
2032 | #elif HAVE_VIRTUALALLOC |
||
2033 | if (c->lumMmxextFilterCode) |
||
2034 | VirtualFree(c->lumMmxextFilterCode, 0, MEM_RELEASE); |
||
2035 | if (c->chrMmxextFilterCode) |
||
2036 | VirtualFree(c->chrMmxextFilterCode, 0, MEM_RELEASE); |
||
2037 | #else |
||
2038 | av_free(c->lumMmxextFilterCode); |
||
2039 | av_free(c->chrMmxextFilterCode); |
||
2040 | #endif |
||
2041 | c->lumMmxextFilterCode = NULL; |
||
2042 | c->chrMmxextFilterCode = NULL; |
||
2043 | #endif /* HAVE_MMX_INLINE */ |
||
2044 | |||
2045 | av_freep(&c->yuvTable); |
||
2046 | av_freep(&c->formatConvBuffer); |
||
2047 | |||
2048 | av_free(c); |
||
2049 | } |
||
2050 | |||
2051 | struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW, |
||
2052 | int srcH, enum AVPixelFormat srcFormat, |
||
2053 | int dstW, int dstH, |
||
2054 | enum AVPixelFormat dstFormat, int flags, |
||
2055 | SwsFilter *srcFilter, |
||
2056 | SwsFilter *dstFilter, |
||
2057 | const double *param) |
||
2058 | { |
||
2059 | static const double default_param[2] = { SWS_PARAM_DEFAULT, |
||
2060 | SWS_PARAM_DEFAULT }; |
||
2061 | |||
2062 | if (!param) |
||
2063 | param = default_param; |
||
2064 | |||
2065 | if (context && |
||
2066 | (context->srcW != srcW || |
||
2067 | context->srcH != srcH || |
||
2068 | context->srcFormat != srcFormat || |
||
2069 | context->dstW != dstW || |
||
2070 | context->dstH != dstH || |
||
2071 | context->dstFormat != dstFormat || |
||
2072 | context->flags != flags || |
||
2073 | context->param[0] != param[0] || |
||
2074 | context->param[1] != param[1])) { |
||
2075 | sws_freeContext(context); |
||
2076 | context = NULL; |
||
2077 | } |
||
2078 | |||
2079 | if (!context) { |
||
2080 | if (!(context = sws_alloc_context())) |
||
2081 | return NULL; |
||
2082 | context->srcW = srcW; |
||
2083 | context->srcH = srcH; |
||
2084 | context->srcFormat = srcFormat; |
||
2085 | context->dstW = dstW; |
||
2086 | context->dstH = dstH; |
||
2087 | context->dstFormat = dstFormat; |
||
2088 | context->flags = flags; |
||
2089 | context->param[0] = param[0]; |
||
2090 | context->param[1] = param[1]; |
||
2091 | if (sws_init_context(context, srcFilter, dstFilter) < 0) { |
||
2092 | sws_freeContext(context); |
||
2093 | return NULL; |
||
2094 | } |
||
2095 | } |
||
2096 | return context; |
||
2097 | }>>>>>>>>>>>>>>>>>>>>>>=>>=>14; |