/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/Makefile |
---|
0,0 → 1,30 |
include $(SUBDIR)../config.mak |
NAME = swscale |
FFLIBS = avutil |
HEADERS = swscale.h \ |
version.h \ |
OBJS = alphablend.o \ |
hscale_fast_bilinear.o \ |
input.o \ |
options.o \ |
output.o \ |
rgb2rgb.o \ |
swscale.o \ |
swscale_unscaled.o \ |
utils.o \ |
yuv2rgb.o \ |
slice.o \ |
hscale.o \ |
vscale.o \ |
gamma.o \ |
OBJS-$(CONFIG_SHARED) += log2_tab.o |
# Windows resource file |
SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o |
TESTPROGS = colorspace \ |
swscale \ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/alphablend.c |
---|
0,0 → 1,169 |
/* |
* Copyright (C) 2015 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "swscale_internal.h" |
int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); |
int nb_components = desc->nb_components; |
int plane, x, y; |
int plane_count = isGray(c->srcFormat) ? 1 : 3; |
int sixteen_bits = desc->comp[0].depth_minus1 >= 8; |
unsigned off = 1<<desc->comp[0].depth_minus1; |
unsigned shift = desc->comp[0].depth_minus1 + 1; |
unsigned max = (1<<shift) - 1; |
int target_table[2][3]; |
for (plane = 0; plane < plane_count; plane++) { |
int a = 0, b = 0; |
if (c->alphablend == SWS_ALPHA_BLEND_CHECKERBOARD) { |
a = (1<<desc->comp[0].depth_minus1)/2; |
b = 3*(1<<desc->comp[0].depth_minus1)/2; |
} |
target_table[0][plane] = plane && !(desc->flags & AV_PIX_FMT_FLAG_RGB) ? 1<<desc->comp[0].depth_minus1 : a; |
target_table[1][plane] = plane && !(desc->flags & AV_PIX_FMT_FLAG_RGB) ? 1<<desc->comp[0].depth_minus1 : b; |
} |
av_assert0(plane_count == nb_components - 1); |
if (desc->flags & AV_PIX_FMT_FLAG_PLANAR) { |
for (plane = 0; plane < plane_count; plane++) { |
int w = plane ? c->chrSrcW : c->srcW; |
int x_subsample = plane ? desc->log2_chroma_w: 0; |
int y_subsample = plane ? desc->log2_chroma_h: 0; |
for (y = srcSliceY >> y_subsample; y < FF_CEIL_RSHIFT(srcSliceH, y_subsample); y++) { |
if (x_subsample || y_subsample) { |
int alpha; |
unsigned u; |
if (sixteen_bits) { |
ptrdiff_t alpha_step = srcStride[plane_count] >> 1; |
const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * y); |
const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * y << y_subsample)); |
uint16_t *d = ( uint16_t *)(dst[plane ] + dstStride[plane ] * y); |
if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) { |
for (x = 0; x < w; x++) { |
if (y_subsample) { |
alpha = (a[2*x] + a[2*x + 1] + 2 + |
a[2*x + alpha_step] + a[2*x + alpha_step + 1]) >> 2; |
} else |
alpha = (a[2*x] + a[2*x + 1]) >> 1; |
u = s[x]*alpha + target_table[((x^y)>>5)&1][plane]*(max-alpha) + off; |
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max); |
} |
} else { |
for (x = 0; x < w; x++) { |
if (y_subsample) { |
alpha = (av_bswap16(a[2*x]) + av_bswap16(a[2*x + 1]) + 2 + |
av_bswap16(a[2*x + alpha_step]) + av_bswap16(a[2*x + alpha_step + 1])) >> 2; |
} else |
alpha = (av_bswap16(a[2*x]) + av_bswap16(a[2*x + 1])) >> 1; |
u = av_bswap16(s[x])*alpha + target_table[((x^y)>>5)&1][plane]*(max-alpha) + off; |
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max); |
} |
} |
} else { |
ptrdiff_t alpha_step = srcStride[plane_count]; |
const uint8_t *s = src[plane ] + srcStride[plane] * y; |
const uint8_t *a = src[plane_count] + (srcStride[plane_count] * y << y_subsample); |
uint8_t *d = dst[plane ] + dstStride[plane] * y; |
for (x = 0; x < w; x++) { |
if (y_subsample) { |
alpha = (a[2*x] + a[2*x + 1] + 2 + |
a[2*x + alpha_step] + a[2*x + alpha_step + 1]) >> 2; |
} else |
alpha = (a[2*x] + a[2*x + 1]) >> 1; |
u = s[x]*alpha + target_table[((x^y)>>5)&1][plane]*(255-alpha) + 128; |
d[x] = (257*u) >> 16; |
} |
} |
} else { |
if (sixteen_bits) { |
const uint16_t *s = (const uint16_t *)(src[plane ] + srcStride[plane ] * y); |
const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * y); |
uint16_t *d = ( uint16_t *)(dst[plane ] + dstStride[plane ] * y); |
if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) { |
for (x = 0; x < w; x++) { |
unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(max-a[x]) + off; |
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max); |
} |
} else { |
for (x = 0; x < w; x++) { |
unsigned aswap =av_bswap16(a[x]); |
unsigned u = av_bswap16(s[x])*aswap + target_table[((x^y)>>5)&1][plane]*(max-aswap) + off; |
d[x] = av_clip((u + (u >> shift)) >> shift, 0, max); |
} |
} |
} else { |
const uint8_t *s = src[plane ] + srcStride[plane] * y; |
const uint8_t *a = src[plane_count] + srcStride[plane_count] * y; |
uint8_t *d = dst[plane ] + dstStride[plane] * y; |
for (x = 0; x < w; x++) { |
unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(255-a[x]) + 128; |
d[x] = (257*u) >> 16; |
} |
} |
} |
} |
} |
} else { |
int alpha_pos = desc->comp[plane_count].offset_plus1 - 1; |
int w = c->srcW; |
for (y = srcSliceY; y < srcSliceH; y++) { |
if (sixteen_bits) { |
const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * y + 2*!alpha_pos); |
const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * y + alpha_pos); |
uint16_t *d = ( uint16_t *)(dst[0] + dstStride[0] * y); |
if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) { |
for (x = 0; x < w; x++) { |
for (plane = 0; plane < plane_count; plane++) { |
int x_index = (plane_count + 1) * x; |
unsigned u = s[x_index + plane]*a[x_index] + target_table[((x^y)>>5)&1][plane]*(max-a[x_index]) + off; |
d[plane_count*x + plane] = av_clip((u + (u >> shift)) >> shift, 0, max); |
} |
} |
} else { |
for (x = 0; x < w; x++) { |
for (plane = 0; plane < plane_count; plane++) { |
int x_index = (plane_count + 1) * x; |
unsigned aswap =av_bswap16(a[x_index]); |
unsigned u = av_bswap16(s[x_index + plane])*aswap + target_table[((x^y)>>5)&1][plane]*(max-aswap) + off; |
d[plane_count*x + plane] = av_clip((u + (u >> shift)) >> shift, 0, max); |
} |
} |
} |
} else { |
const uint8_t *s = src[0] + srcStride[0] * y + !alpha_pos; |
const uint8_t *a = src[0] + srcStride[0] * y + alpha_pos; |
uint8_t *d = dst[0] + dstStride[0] * y; |
for (x = 0; x < w; x++) { |
for (plane = 0; plane < plane_count; plane++) { |
int x_index = (plane_count + 1) * x; |
unsigned u = s[x_index + plane]*a[x_index] + target_table[((x^y)>>5)&1][plane]*(255-a[x_index]) + 128; |
d[plane_count*x + plane] = (257*u) >> 16; |
} |
} |
} |
} |
} |
return 0; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/arm/Makefile |
---|
0,0 → 1,4 |
# OBJS += arm/swscale_unscaled.o |
# NEON-OBJS += arm/rgb2yuv_neon_32.o |
# NEON-OBJS += arm/rgb2yuv_neon_16.o |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/arm/rgb2yuv_neon_16.S |
---|
0,0 → 1,80 |
/* |
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "rgb2yuv_neon_common.S" |
/* downsampled R16G16B16 x8 */ |
alias_qw r16x8, q7 |
alias_qw g16x8, q8 |
alias_qw b16x8, q9 |
alias n16x16_l, q11 |
alias n16x16_h, q12 |
alias y16x16_l, q13 |
alias y16x16_h, q14 |
alias_qw y8x16, q15 |
.macro init src |
vld3.i32 {q13_l, q14_l, q15_l}, [\src]! |
vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src] |
vrshrn.i32 CO_R, q13, #7 |
vrshrn.i32 CO_G, q14, #7 |
vrshrn.i32 CO_B, q15, #7 |
vmov.u8 BIAS_Y, #16 |
vmov.u8 BIAS_U, #128 |
.endm |
.macro compute_y_16x1_step action, s8x16, coeff |
vmovl.u8 n16x16_l, \s8x16\()_l |
vmovl.u8 n16x16_h, \s8x16\()_h |
\action y16x16_l, n16x16_l, \coeff |
\action y16x16_h, n16x16_h, \coeff |
.endm |
.macro compute_y_16x1 |
compute_y_16x1_step vmul, r8x16, CO_RY |
compute_y_16x1_step vmla, g8x16, CO_GY |
compute_y_16x1_step vmla, b8x16, CO_BY |
vrshrn.i16 y8x16_l, y16x16_l, #8 |
vrshrn.i16 y8x16_h, y16x16_h, #8 |
vadd.u8 y8x16, y8x16, BIAS_Y |
.endm |
alias c16x8, q15 |
alias_qw c8x8x2, q10 |
.macro compute_chroma_8x1 c, C |
vmul c16x8, r16x8, CO_R\C |
vmla c16x8, g16x8, CO_G\C |
vmla c16x8, b16x8, CO_B\C |
vrshrn.i16 \c\()8x8, c16x8, #8 |
vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C |
.endm |
loop_420sp rgbx, nv12, init, kernel_420_16x2, 16 |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/arm/rgb2yuv_neon_32.S |
---|
0,0 → 1,119 |
/* |
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "rgb2yuv_neon_common.S" |
/* downsampled R16G16B16 x8 */ |
alias_qw r16x8, q7 |
alias_qw g16x8, q8 |
alias_qw b16x8, q9 |
alias n16x16_o, q11 |
alias n16x16_ol, q11_l |
alias n16x16_oh, q11_h |
alias y32x16_el, q12 |
alias y32x16_eh, q13 |
alias y32x16_ol, q14 |
alias y32x16_oh, q15 |
alias y16x16_e, q12 |
alias y16x16_el, q12_l |
alias y16x16_eh, q12_h |
alias y16x16_o, q13 |
alias y16x16_ol, q13_l |
alias y16x16_oh, q13_h |
alias y8x16, y16x16_e |
.macro init src |
// load s32x3x3, narrow to s16x3x3 |
vld3.i32 {q13_l, q14_l, q15_l}, [\src]! |
vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src] |
vmovn.i32 CO_R, q13 |
vmovn.i32 CO_G, q14 |
vmovn.i32 CO_B, q15 |
vmov.u8 BIAS_Y, #16 |
vmov.u8 BIAS_U, #128 |
.endm |
.macro compute_y_16x1_step action, s8x16, coeff |
vmov.u8 n16x16_o, #0 |
vtrn.u8 \s8x16, n16x16_o |
\action y32x16_el, \s8x16\()_l, \coeff |
\action y32x16_eh, \s8x16\()_h, \coeff |
\action y32x16_ol, n16x16_ol, \coeff |
\action y32x16_oh, n16x16_oh, \coeff |
.endm |
/* |
* in: r8x16, g8x16, b8x16 |
* out: y8x16 |
* clobber: q11-q15, r8x16, g8x16, b8x16 |
*/ |
.macro compute_y_16x1 |
compute_y_16x1_step vmull, r8x16, CO_RY |
compute_y_16x1_step vmlal, g8x16, CO_GY |
compute_y_16x1_step vmlal, b8x16, CO_BY |
vrshrn.i32 y16x16_el, y32x16_el, #15 |
vrshrn.i32 y16x16_eh, y32x16_eh, #15 |
vrshrn.i32 y16x16_ol, y32x16_ol, #15 |
vrshrn.i32 y16x16_oh, y32x16_oh, #15 |
vtrn.8 y16x16_e, y16x16_o |
vadd.u8 y8x16, y8x16, BIAS_Y |
.endm |
alias c32x8_l, q14 |
alias c32x8_h, q15 |
alias_qw c16x8, q13 |
alias_qw c8x8x2, q10 |
.macro compute_chroma_8x1_step action, s16x8, coeff |
\action c32x8_l, \s16x8\()_l, \coeff |
\action c32x8_h, \s16x8\()_h, \coeff |
.endm |
/* |
* in: r16x8, g16x8, b16x8 |
* out: c8x8 |
* clobber: q14-q15 |
*/ |
.macro compute_chroma_8x1 c, C |
compute_chroma_8x1_step vmull, r16x8, CO_R\C |
compute_chroma_8x1_step vmlal, g16x8, CO_G\C |
compute_chroma_8x1_step vmlal, b16x8, CO_B\C |
vrshrn.i32 c16x8_l, c32x8_l, #15 |
vrshrn.i32 c16x8_h, c32x8_h, #15 |
vmovn.i16 \c\()8x8, c16x8 |
vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C |
.endm |
loop_420sp rgbx, nv12, init, kernel_420_16x2, 32 |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/arm/rgb2yuv_neon_common.S |
---|
0,0 → 1,291 |
/* |
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "libavutil/arm/asm.S" |
.macro alias name, tgt, set=1 |
.if \set != 0 |
\name .req \tgt |
.else |
.unreq \name |
.endif |
.endm |
.altmacro |
.macro alias_dw_all qw, dw_l, dw_h |
alias q\qw\()_l, d\dw_l |
alias q\qw\()_h, d\dw_h |
.if \qw < 15 |
alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2) |
.endif |
.endm |
alias_dw_all 0, 0, 1 |
.noaltmacro |
.macro alias_qw name, qw, set=1 |
alias \name\(), \qw, \set |
alias \name\()_l, \qw\()_l, \set |
alias \name\()_h, \qw\()_h, \set |
.endm |
.macro prologue |
push {r4-r12, lr} |
vpush {q4-q7} |
.endm |
.macro epilogue |
vpop {q4-q7} |
pop {r4-r12, pc} |
.endm |
.macro load_arg reg, ix |
ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)] |
.endm |
/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma |
* int width, int height, |
* int y_stride, int c_stride, int src_stride, |
* int32_t coeff_table[9]); |
*/ |
.macro alias_loop_420sp set=1 |
alias src, r0, \set |
alias src0, src, \set |
alias y, r1, \set |
alias y0, y, \set |
alias chroma, r2, \set |
alias width, r3, \set |
alias header, width, \set |
alias height, r4, \set |
alias y_stride, r5, \set |
alias c_stride, r6, \set |
alias c_padding, c_stride, \set |
alias src_stride, r7, \set |
alias y0_end, r8, \set |
alias src_padding,r9, \set |
alias y_padding, r10, \set |
alias src1, r11, \set |
alias y1, r12, \set |
alias coeff_table,r12, \set |
.endm |
.macro loop_420sp s_fmt, d_fmt, init, kernel, precision |
function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1 |
prologue |
alias_loop_420sp |
load_arg height, 4 |
load_arg y_stride, 5 |
load_arg c_stride, 6 |
load_arg src_stride, 7 |
load_arg coeff_table, 8 |
\init coeff_table |
sub y_padding, y_stride, width |
sub c_padding, c_stride, width |
sub src_padding, src_stride, width, LSL #2 |
add y0_end, y0, width |
and header, width, #15 |
add y1, y0, y_stride |
add src1, src0, src_stride |
0: |
cmp header, #0 |
beq 1f |
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header |
1: |
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma |
cmp y0, y0_end |
blt 1b |
2: |
add y0, y1, y_padding |
add y0_end, y1, y_stride |
add chroma, chroma, c_padding |
add src0, src1, src_padding |
add y1, y0, y_stride |
add src1, src0, src_stride |
subs height, height, #2 |
bgt 0b |
epilogue |
alias_loop_420sp 0 |
endfunc |
.endm |
.macro downsample |
vpaddl.u8 r16x8, r8x16 |
vpaddl.u8 g16x8, g8x16 |
vpaddl.u8 b16x8, b8x16 |
.endm |
/* acculumate and right shift by 2 */ |
.macro downsample_ars2 |
vpadal.u8 r16x8, r8x16 |
vpadal.u8 g16x8, g8x16 |
vpadal.u8 b16x8, b8x16 |
vrshr.u16 r16x8, r16x8, #2 |
vrshr.u16 g16x8, g16x8, #2 |
vrshr.u16 b16x8, b16x8, #2 |
.endm |
.macro store_y8_16x1 dst, count |
.ifc "\count","" |
vstmia \dst!, {y8x16} |
.else |
vstmia \dst, {y8x16} |
add \dst, \dst, \count |
.endif |
.endm |
.macro store_chroma_nv12_8x1 dst, count |
.ifc "\count","" |
vst2.i8 {u8x8, v8x8}, [\dst]! |
.else |
vst2.i8 {u8x8, v8x8}, [\dst], \count |
.endif |
.endm |
.macro store_chroma_nv21_8x1 dst, count |
.ifc "\count","" |
vst2.i8 {v8x8, u8x8}, [\dst]! |
.else |
vst2.i8 {v8x8, u8x8}, [\dst], \count |
.endif |
.endm |
.macro load_8888_16x1 a, b, c, d, src, count |
.ifc "\count","" |
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! |
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]! |
.else |
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! |
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src] |
sub \src, \src, #32 |
add \src, \src, \count, LSL #2 |
.endif |
.endm |
.macro load_rgbx_16x1 src, count |
load_8888_16x1 r, g, b, x, \src, \count |
.endm |
.macro load_bgrx_16x1 src, count |
load_8888_16x1 b, g, r, x, \src, \count |
.endm |
.macro alias_src_rgbx set=1 |
alias_src_8888 r, g, b, x, \set |
.endm |
.macro alias_src_bgrx set=1 |
alias_src_8888 b, g, r, x, \set |
.endm |
.macro alias_dst_nv12 set=1 |
alias u8x8, c8x8x2_l, \set |
alias v8x8, c8x8x2_h, \set |
.endm |
.macro alias_dst_nv21 set=1 |
alias v8x8, c8x8x2_l, \set |
alias u8x8, c8x8x2_h, \set |
.endm |
// common aliases |
alias CO_R d0 |
CO_RY .dn d0.s16[0] |
CO_RU .dn d0.s16[1] |
CO_RV .dn d0.s16[2] |
alias CO_G d1 |
CO_GY .dn d1.s16[0] |
CO_GU .dn d1.s16[1] |
CO_GV .dn d1.s16[2] |
alias CO_B d2 |
CO_BY .dn d2.s16[0] |
CO_BU .dn d2.s16[1] |
CO_BV .dn d2.s16[2] |
alias BIAS_U, d3 |
alias BIAS_V, BIAS_U |
alias BIAS_Y, q2 |
/* q3-q6 R8G8B8X8 x16 */ |
.macro alias_src_8888 a, b, c, d, set |
alias_qw \a\()8x16, q3, \set |
alias_qw \b\()8x16, q4, \set |
alias_qw \c\()8x16, q5, \set |
alias_qw \d\()8x16, q6, \set |
.endm |
.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count |
alias_src_\rgb_fmt |
alias_dst_\yuv_fmt |
load_\rgb_fmt\()_16x1 \rgb0, \count |
downsample |
compute_y_16x1 |
store_y8_16x1 \y0, \count |
load_\rgb_fmt\()_16x1 \rgb1, \count |
downsample_ars2 |
compute_y_16x1 |
store_y8_16x1 \y1, \count |
compute_chroma_8x1 u, U |
compute_chroma_8x1 v, V |
store_chroma_\yuv_fmt\()_8x1 \chroma, \count |
alias_dst_\yuv_fmt 0 |
alias_src_\rgb_fmt 0 |
.endm |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/arm/swscale_unscaled.c |
---|
0,0 → 1,79 |
/* |
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "config.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#include "libavutil/arm/cpu.h" |
extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma, |
int width, int height, |
int y_stride, int c_stride, int src_stride, |
int32_t coeff_tbl[9]); |
extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma, |
int width, int height, |
int y_stride, int c_stride, int src_stride, |
int32_t coeff_tbl[9]); |
static int rgbx_to_nv12_neon_32_wrapper(SwsContext *context, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) { |
rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0], |
dst[0] + srcSliceY * dstStride[0], |
dst[1] + (srcSliceY / 2) * dstStride[1], |
context->srcW, srcSliceH, |
dstStride[0], dstStride[1], srcStride[0], |
context->input_rgb2yuv_table); |
return 0; |
} |
static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) { |
rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0], |
dst[0] + srcSliceY * dstStride[0], |
dst[1] + (srcSliceY / 2) * dstStride[1], |
context->srcW, srcSliceH, |
dstStride[0], dstStride[1], srcStride[0], |
context->input_rgb2yuv_table); |
return 0; |
} |
static void get_unscaled_swscale_neon(SwsContext *c) { |
int accurate_rnd = c->flags & SWS_ACCURATE_RND; |
if (c->srcFormat == AV_PIX_FMT_RGBA |
&& c->dstFormat == AV_PIX_FMT_NV12 |
&& (c->srcW >= 16)) { |
c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper |
: rgbx_to_nv12_neon_16_wrapper; |
} |
} |
void ff_get_unscaled_swscale_arm(SwsContext *c) |
{ |
int cpu_flags = av_get_cpu_flags(); |
if (have_neon(cpu_flags)) |
get_unscaled_swscale_neon(c); |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/bayer_template.c |
---|
0,0 → 1,236 |
/* |
* Bayer-to-RGB/YV12 template |
* Copyright (c) 2011-2014 Peter Ross <pross@xvid.org> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#if defined(BAYER_BGGR) || defined(BAYER_GBRG) |
#define BAYER_R 0 |
#define BAYER_G 1 |
#define BAYER_B 2 |
#endif |
#if defined(BAYER_RGGB) || defined(BAYER_GRBG) |
#define BAYER_R 2 |
#define BAYER_G 1 |
#define BAYER_B 0 |
#endif |
#if defined(BAYER_8) |
#define BAYER_READ(x) (x) |
#define BAYER_SIZEOF 1 |
#define BAYER_SHIFT 0 |
#endif |
#if defined(BAYER_16LE) |
#define BAYER_READ(x) AV_RL16(&(x)) |
#define BAYER_SIZEOF 2 |
#define BAYER_SHIFT 8 |
#endif |
#if defined(BAYER_16BE) |
#define BAYER_READ(x) AV_RB16(&(x)) |
#define BAYER_SIZEOF 2 |
#define BAYER_SHIFT 8 |
#endif |
#define S(y, x) BAYER_READ(src[(y)*src_stride + BAYER_SIZEOF*(x)]) |
#define T(y, x) (unsigned int)S(y, x) |
#define R(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_R] |
#define G(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_G] |
#define B(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_B] |
#if defined(BAYER_BGGR) || defined(BAYER_RGGB) |
#define BAYER_TO_RGB24_COPY \ |
R(0, 0) = \ |
R(0, 1) = \ |
R(1, 1) = \ |
R(1, 0) = S(1, 1) >> BAYER_SHIFT; \ |
\ |
G(0, 1) = S(0, 1) >> BAYER_SHIFT; \ |
G(0, 0) = \ |
G(1, 1) = (T(0, 1) + T(1, 0)) >> (1 + BAYER_SHIFT); \ |
G(1, 0) = S(1, 0) >> BAYER_SHIFT; \ |
\ |
B(1, 1) = \ |
B(0, 0) = \ |
B(0, 1) = \ |
B(1, 0) = S(0, 0) >> BAYER_SHIFT; |
#define BAYER_TO_RGB24_INTERPOLATE \ |
R(0, 0) = (T(-1, -1) + T(-1, 1) + T(1, -1) + T(1, 1)) >> (2 + BAYER_SHIFT); \ |
G(0, 0) = (T(-1, 0) + T( 0, -1) + T(0, 1) + T(1, 0)) >> (2 + BAYER_SHIFT); \ |
B(0, 0) = S(0, 0) >> BAYER_SHIFT; \ |
\ |
R(0, 1) = (T(-1, 1) + T(1, 1)) >> (1 + BAYER_SHIFT); \ |
G(0, 1) = S(0, 1) >> BAYER_SHIFT; \ |
B(0, 1) = (T(0, 0) + T(0, 2)) >> (1 + BAYER_SHIFT); \ |
\ |
R(1, 0) = (T(1, -1) + T(1, 1)) >> (1 + BAYER_SHIFT); \ |
G(1, 0) = S(1, 0) >> BAYER_SHIFT; \ |
B(1, 0) = (T(0, 0) + T(2, 0)) >> (1 + BAYER_SHIFT); \ |
\ |
R(1, 1) = S(1, 1) >> BAYER_SHIFT; \ |
G(1, 1) = (T(0, 1) + T(1, 0) + T(1, 2) + T(2, 1)) >> (2 + BAYER_SHIFT); \ |
B(1, 1) = (T(0, 0) + T(0, 2) + T(2, 0) + T(2, 2)) >> (2 + BAYER_SHIFT); |
#else |
#define BAYER_TO_RGB24_COPY \ |
R(0, 0) = \ |
R(0, 1) = \ |
R(1, 1) = \ |
R(1, 0) = S(1, 0) >> BAYER_SHIFT; \ |
\ |
G(0, 0) = S(0, 0) >> BAYER_SHIFT; \ |
G(1, 1) = S(1, 1) >> BAYER_SHIFT; \ |
G(0, 1) = \ |
G(1, 0) = (T(0, 0) + T(1, 1)) >> (1 + BAYER_SHIFT); \ |
\ |
B(1, 1) = \ |
B(0, 0) = \ |
B(0, 1) = \ |
B(1, 0) = S(0, 1) >> BAYER_SHIFT; |
#define BAYER_TO_RGB24_INTERPOLATE \ |
R(0, 0) = (T(-1, 0) + T(1, 0)) >> (1 + BAYER_SHIFT); \ |
G(0, 0) = S(0, 0) >> BAYER_SHIFT; \ |
B(0, 0) = (T(0, -1) + T(0, 1)) >> (1 + BAYER_SHIFT); \ |
\ |
R(0, 1) = (T(-1, 0) + T(-1, 2) + T(1, 0) + T(1, 2)) >> (2 + BAYER_SHIFT); \ |
G(0, 1) = (T(-1, 1) + T(0, 0) + T(0, 2) + T(1, 1)) >> (2 + BAYER_SHIFT); \ |
B(0, 1) = S(0, 1) >> BAYER_SHIFT; \ |
\ |
R(1, 0) = S(1, 0) >> BAYER_SHIFT; \ |
G(1, 0) = (T(0, 0) + T(1, -1) + T(1, 1) + T(2, 0)) >> (2 + BAYER_SHIFT); \ |
B(1, 0) = (T(0, -1) + T(0, 1) + T(2, -1) + T(2, 1)) >> (2 + BAYER_SHIFT); \ |
\ |
R(1, 1) = (T(1, 0) + T(1, 2)) >> (1 + BAYER_SHIFT); \ |
G(1, 1) = S(1, 1) >> BAYER_SHIFT; \ |
B(1, 1) = (T(0, 1) + T(2, 1)) >> (1 + BAYER_SHIFT); |
#endif |
/** |
* invoke ff_rgb24toyv12 for 2x2 pixels |
*/ |
#define rgb24toyv12_2x2(src, dstY, dstU, dstV, luma_stride, src_stride, rgb2yuv) \ |
ff_rgb24toyv12(src, dstY, dstV, dstU, 2, 2, luma_stride, 0, src_stride, rgb2yuv) |
static void BAYER_RENAME(rgb24_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width) |
{ |
int i; |
for (i = 0 ; i < width; i+= 2) { |
BAYER_TO_RGB24_COPY |
src += 2 * BAYER_SIZEOF; |
dst += 6; |
} |
} |
static void BAYER_RENAME(rgb24_interpolate)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width) |
{ |
int i; |
BAYER_TO_RGB24_COPY |
src += 2 * BAYER_SIZEOF; |
dst += 6; |
for (i = 2 ; i < width - 2; i+= 2) { |
BAYER_TO_RGB24_INTERPOLATE |
src += 2 * BAYER_SIZEOF; |
dst += 6; |
} |
if (width > 2) { |
BAYER_TO_RGB24_COPY |
} |
} |
static void BAYER_RENAME(yv12_copy)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv) |
{ |
uint8_t dst[12]; |
const int dst_stride = 6; |
int i; |
for (i = 0 ; i < width; i+= 2) { |
BAYER_TO_RGB24_COPY |
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv); |
src += 2 * BAYER_SIZEOF; |
dstY += 2; |
dstU++; |
dstV++; |
} |
} |
static void BAYER_RENAME(yv12_interpolate)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv) |
{ |
uint8_t dst[12]; |
const int dst_stride = 6; |
int i; |
BAYER_TO_RGB24_COPY |
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv); |
src += 2 * BAYER_SIZEOF; |
dstY += 2; |
dstU++; |
dstV++; |
for (i = 2 ; i < width - 2; i+= 2) { |
BAYER_TO_RGB24_INTERPOLATE |
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv); |
src += 2 * BAYER_SIZEOF; |
dstY += 2; |
dstU++; |
dstV++; |
} |
if (width > 2) { |
BAYER_TO_RGB24_COPY |
rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv); |
} |
} |
#undef S |
#undef T |
#undef R |
#undef G |
#undef B |
#undef BAYER_TO_RGB24_COPY |
#undef BAYER_TO_RGB24_INTERPOLATE |
#undef BAYER_RENAME |
#undef BAYER_R |
#undef BAYER_G |
#undef BAYER_B |
#undef BAYER_READ |
#undef BAYER_SIZEOF |
#undef BAYER_SHIFT |
#if defined(BAYER_BGGR) |
#undef BAYER_BGGR |
#endif |
#if defined(BAYER_RGGB) |
#undef BAYER_RGGB |
#endif |
#if defined(BAYER_GBRG) |
#undef BAYER_GBRG |
#endif |
#if defined(BAYER_GRBG) |
#undef BAYER_GRBG |
#endif |
#if defined(BAYER_8) |
#undef BAYER_8 |
#endif |
#if defined(BAYER_16LE) |
#undef BAYER_16LE |
#endif |
#if defined(BAYER_16BE) |
#undef BAYER_16BE |
#endif |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/colorspace-test.c |
---|
0,0 → 1,170 |
/* |
* Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdio.h> |
#include <string.h> /* for memset() */ |
#include <stdlib.h> |
#include <inttypes.h> |
#include "swscale.h" |
#include "rgb2rgb.h" |
#include "libavutil/mem.h" |
#define SIZE 1000 |
#define srcByte 0x55 |
#define dstByte 0xBB |
#define FUNC(s, d, n) { s, d, #n, n } |
int main(int argc, char **argv) |
{ |
int i, funcNum; |
uint8_t *srcBuffer = av_malloc(SIZE); |
uint8_t *dstBuffer = av_malloc(SIZE); |
int failedNum = 0; |
int passedNum = 0; |
if (!srcBuffer || !dstBuffer) |
return -1; |
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); |
sws_rgb2rgb_init(); |
for (funcNum = 0; ; funcNum++) { |
struct func_info_s { |
int src_bpp; |
int dst_bpp; |
const char *name; |
void (*func)(const uint8_t *src, uint8_t *dst, int src_size); |
} func_info[] = { |
FUNC(2, 2, rgb12to15), |
FUNC(2, 2, rgb15to16), |
FUNC(2, 3, rgb15to24), |
FUNC(2, 4, rgb15to32), |
FUNC(2, 3, rgb16to24), |
FUNC(2, 4, rgb16to32), |
FUNC(3, 2, rgb24to15), |
FUNC(3, 2, rgb24to16), |
FUNC(3, 4, rgb24to32), |
FUNC(4, 2, rgb32to15), |
FUNC(4, 2, rgb32to16), |
FUNC(4, 3, rgb32to24), |
FUNC(2, 2, rgb16to15), |
FUNC(2, 2, rgb12tobgr12), |
FUNC(2, 2, rgb15tobgr15), |
FUNC(2, 2, rgb15tobgr16), |
FUNC(2, 3, rgb15tobgr24), |
FUNC(2, 4, rgb15tobgr32), |
FUNC(2, 2, rgb16tobgr15), |
FUNC(2, 2, rgb16tobgr16), |
FUNC(2, 3, rgb16tobgr24), |
FUNC(2, 4, rgb16tobgr32), |
FUNC(3, 2, rgb24tobgr15), |
FUNC(3, 2, rgb24tobgr16), |
FUNC(3, 3, rgb24tobgr24), |
FUNC(3, 4, rgb24tobgr32), |
FUNC(4, 2, rgb32tobgr15), |
FUNC(4, 2, rgb32tobgr16), |
FUNC(4, 3, rgb32tobgr24), |
FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */ |
FUNC(6, 6, rgb48tobgr48_nobswap), |
FUNC(6, 6, rgb48tobgr48_bswap), |
FUNC(8, 6, rgb64to48_nobswap), |
FUNC(8, 6, rgb64to48_bswap), |
FUNC(8, 6, rgb64tobgr48_nobswap), |
FUNC(8, 6, rgb64tobgr48_bswap), |
FUNC(0, 0, NULL) |
}; |
int width; |
int failed = 0; |
int srcBpp = 0; |
int dstBpp = 0; |
if (!func_info[funcNum].func) |
break; |
av_log(NULL, AV_LOG_INFO, "."); |
memset(srcBuffer, srcByte, SIZE); |
for (width = 63; width > 0; width--) { |
int dstOffset; |
for (dstOffset = 128; dstOffset < 196; dstOffset += 4) { |
int srcOffset; |
memset(dstBuffer, dstByte, SIZE); |
for (srcOffset = 128; srcOffset < 196; srcOffset += 4) { |
uint8_t *src = srcBuffer + srcOffset; |
uint8_t *dst = dstBuffer + dstOffset; |
const char *name = NULL; |
// don't fill the screen with shit ... |
if (failed) |
break; |
srcBpp = func_info[funcNum].src_bpp; |
dstBpp = func_info[funcNum].dst_bpp; |
name = func_info[funcNum].name; |
func_info[funcNum].func(src, dst, width * srcBpp); |
if (!srcBpp) |
break; |
for (i = 0; i < SIZE; i++) { |
if (srcBuffer[i] != srcByte) { |
av_log(NULL, AV_LOG_INFO, |
"src damaged at %d w:%d src:%d dst:%d %s\n", |
i, width, srcOffset, dstOffset, name); |
failed = 1; |
break; |
} |
} |
for (i = 0; i < dstOffset; i++) { |
if (dstBuffer[i] != dstByte) { |
av_log(NULL, AV_LOG_INFO, |
"dst damaged at %d w:%d src:%d dst:%d %s\n", |
i, width, srcOffset, dstOffset, name); |
failed = 1; |
break; |
} |
} |
for (i = dstOffset + width * dstBpp; i < SIZE; i++) { |
if (dstBuffer[i] != dstByte) { |
av_log(NULL, AV_LOG_INFO, |
"dst damaged at %d w:%d src:%d dst:%d %s\n", |
i, width, srcOffset, dstOffset, name); |
failed = 1; |
break; |
} |
} |
} |
} |
} |
if (failed) |
failedNum++; |
else if (srcBpp) |
passedNum++; |
} |
av_log(NULL, AV_LOG_INFO, |
"\n%d converters passed, %d converters randomly overwrote memory\n", |
passedNum, failedNum); |
return failedNum; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/gamma.c |
---|
0,0 → 1,72 |
/* |
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "swscale_internal.h" |
typedef struct GammaContext |
{ |
uint16_t *table; |
} GammaContext; |
// gamma_convert expects 16 bit rgb format |
// it writes directly in src slice thus it must be modifiable (done through cascade context) |
static int gamma_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
GammaContext *instance = desc->instance; |
uint16_t *table = instance->table; |
int srcW = desc->src->width; |
int i; |
for (i = 0; i < sliceH; ++i) { |
uint8_t ** src = desc->src->plane[0].line; |
int src_pos = sliceY+i - desc->src->plane[0].sliceY; |
uint16_t *src1 = (uint16_t*)*(src+src_pos); |
int j; |
for (j = 0; j < srcW; ++j) { |
uint16_t r = AV_RL16(src1 + j*4 + 0); |
uint16_t g = AV_RL16(src1 + j*4 + 1); |
uint16_t b = AV_RL16(src1 + j*4 + 2); |
AV_WL16(src1 + j*4 + 0, table[r]); |
AV_WL16(src1 + j*4 + 1, table[g]); |
AV_WL16(src1 + j*4 + 2, table[b]); |
} |
} |
return sliceH; |
} |
int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice * src, uint16_t *table) |
{ |
GammaContext *li = av_malloc(sizeof(GammaContext)); |
if (!li) |
return AVERROR(ENOMEM); |
li->table = table; |
desc->instance = li; |
desc->src = src; |
desc->dst = NULL; |
desc->process = &gamma_convert; |
return 0; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/hscale.c |
---|
0,0 → 1,274 |
/* |
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "swscale_internal.h" |
static int lum_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
FilterContext *instance = desc->instance; |
int srcW = desc->src->width; |
int dstW = desc->dst->width; |
int xInc = instance->xInc; |
int i; |
for (i = 0; i < sliceH; ++i) { |
uint8_t ** src = desc->src->plane[0].line; |
uint8_t ** dst = desc->dst->plane[0].line; |
int src_pos = sliceY+i - desc->src->plane[0].sliceY; |
int dst_pos = sliceY+i - desc->dst->plane[0].sliceY; |
if (c->hyscale_fast) { |
c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc); |
} else { |
c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], instance->filter, |
instance->filter_pos, instance->filter_size); |
} |
if (c->lumConvertRange) |
c->lumConvertRange((int16_t*)dst[dst_pos], dstW); |
desc->dst->plane[0].sliceH += 1; |
if (desc->alpha) { |
src = desc->src->plane[3].line; |
dst = desc->dst->plane[3].line; |
src_pos = sliceY+i - desc->src->plane[3].sliceY; |
dst_pos = sliceY+i - desc->dst->plane[3].sliceY; |
desc->dst->plane[3].sliceH += 1; |
if (c->hyscale_fast) { |
c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc); |
} else { |
c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], instance->filter, |
instance->filter_pos, instance->filter_size); |
} |
} |
} |
return sliceH; |
} |
static int lum_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
int srcW = desc->src->width; |
ColorContext * instance = desc->instance; |
uint32_t * pal = instance->pal; |
int i; |
desc->dst->plane[0].sliceY = sliceY; |
desc->dst->plane[0].sliceH = sliceH; |
desc->dst->plane[3].sliceY = sliceY; |
desc->dst->plane[3].sliceH = sliceH; |
for (i = 0; i < sliceH; ++i) { |
int sp0 = sliceY+i - desc->src->plane[0].sliceY; |
int sp1 = ((sliceY+i) >> desc->src->v_chr_sub_sample) - desc->src->plane[1].sliceY; |
const uint8_t * src[4] = { desc->src->plane[0].line[sp0], |
desc->src->plane[1].line[sp1], |
desc->src->plane[2].line[sp1], |
desc->src->plane[3].line[sp0]}; |
uint8_t * dst = desc->dst->plane[0].line[i]; |
if (c->lumToYV12) { |
c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal); |
} else if (c->readLumPlanar) { |
c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table); |
} |
if (desc->alpha) { |
dst = desc->dst->plane[3].line[i]; |
if (c->alpToYV12) { |
c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal); |
} else if (c->readAlpPlanar) { |
c->readAlpPlanar(dst, src, srcW, NULL); |
} |
} |
} |
return sliceH; |
} |
int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal) |
{ |
ColorContext * li = av_malloc(sizeof(ColorContext)); |
if (!li) |
return AVERROR(ENOMEM); |
li->pal = pal; |
desc->instance = li; |
desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt); |
desc->src =src; |
desc->dst = dst; |
desc->process = &lum_convert; |
return 0; |
} |
int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc) |
{ |
FilterContext *li = av_malloc(sizeof(FilterContext)); |
if (!li) |
return AVERROR(ENOMEM); |
li->filter = filter; |
li->filter_pos = filter_pos; |
li->filter_size = filter_size; |
li->xInc = xInc; |
desc->instance = li; |
desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt); |
desc->src = src; |
desc->dst = dst; |
desc->process = &lum_h_scale; |
return 0; |
} |
static int chr_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
FilterContext *instance = desc->instance; |
int srcW = FF_CEIL_RSHIFT(desc->src->width, desc->src->h_chr_sub_sample); |
int dstW = FF_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample); |
int xInc = instance->xInc; |
uint8_t ** src1 = desc->src->plane[1].line; |
uint8_t ** dst1 = desc->dst->plane[1].line; |
uint8_t ** src2 = desc->src->plane[2].line; |
uint8_t ** dst2 = desc->dst->plane[2].line; |
int src_pos1 = sliceY - desc->src->plane[1].sliceY; |
int dst_pos1 = sliceY - desc->dst->plane[1].sliceY; |
int src_pos2 = sliceY - desc->src->plane[2].sliceY; |
int dst_pos2 = sliceY - desc->dst->plane[2].sliceY; |
int i; |
for (i = 0; i < sliceH; ++i) { |
if (c->hcscale_fast) { |
c->hcscale_fast(c, (uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW, src1[src_pos1+i], src2[src_pos2+i], srcW, xInc); |
} else { |
c->hcScale(c, (uint16_t*)dst1[dst_pos1+i], dstW, src1[src_pos1+i], instance->filter, instance->filter_pos, instance->filter_size); |
c->hcScale(c, (uint16_t*)dst2[dst_pos2+i], dstW, src2[src_pos2+i], instance->filter, instance->filter_pos, instance->filter_size); |
} |
if (c->chrConvertRange) |
c->chrConvertRange((uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW); |
desc->dst->plane[1].sliceH += 1; |
desc->dst->plane[2].sliceH += 1; |
} |
return sliceH; |
} |
static int chr_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
int srcW = FF_CEIL_RSHIFT(desc->src->width, desc->src->h_chr_sub_sample); |
ColorContext * instance = desc->instance; |
uint32_t * pal = instance->pal; |
int sp0 = (sliceY - (desc->src->plane[0].sliceY >> desc->src->v_chr_sub_sample)) << desc->src->v_chr_sub_sample; |
int sp1 = sliceY - desc->src->plane[1].sliceY; |
int i; |
desc->dst->plane[1].sliceY = sliceY; |
desc->dst->plane[1].sliceH = sliceH; |
desc->dst->plane[2].sliceY = sliceY; |
desc->dst->plane[2].sliceH = sliceH; |
for (i = 0; i < sliceH; ++i) { |
const uint8_t * src[4] = { desc->src->plane[0].line[sp0+i], |
desc->src->plane[1].line[sp1+i], |
desc->src->plane[2].line[sp1+i], |
desc->src->plane[3].line[sp0+i]}; |
uint8_t * dst1 = desc->dst->plane[1].line[i]; |
uint8_t * dst2 = desc->dst->plane[2].line[i]; |
if (c->chrToYV12) { |
c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal); |
} else if (c->readChrPlanar) { |
c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table); |
} |
} |
return sliceH; |
} |
int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal) |
{ |
ColorContext * li = av_malloc(sizeof(ColorContext)); |
if (!li) |
return AVERROR(ENOMEM); |
li->pal = pal; |
desc->instance = li; |
desc->src =src; |
desc->dst = dst; |
desc->process = &chr_convert; |
return 0; |
} |
int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc) |
{ |
FilterContext *li = av_malloc(sizeof(FilterContext)); |
if (!li) |
return AVERROR(ENOMEM); |
li->filter = filter; |
li->filter_pos = filter_pos; |
li->filter_size = filter_size; |
li->xInc = xInc; |
desc->instance = li; |
desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt); |
desc->src = src; |
desc->dst = dst; |
desc->process = &chr_h_scale; |
return 0; |
} |
static int no_chr_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
desc->dst->plane[1].sliceY = sliceY + sliceH - desc->dst->plane[1].available_lines; |
desc->dst->plane[1].sliceH = desc->dst->plane[1].available_lines; |
desc->dst->plane[2].sliceY = sliceY + sliceH - desc->dst->plane[2].available_lines; |
desc->dst->plane[2].sliceH = desc->dst->plane[2].available_lines; |
return 0; |
} |
int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst) |
{ |
desc->src = src; |
desc->dst = dst; |
desc->alpha = 0; |
desc->instance = NULL; |
desc->process = &no_chr_scale; |
return 0; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/hscale_fast_bilinear.c |
---|
0,0 → 1,55 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "swscale_internal.h" |
void ff_hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, |
const uint8_t *src, int srcW, int xInc) |
{ |
int i; |
unsigned int xpos = 0; |
for (i = 0; i < dstWidth; i++) { |
register unsigned int xx = xpos >> 16; |
register unsigned int xalpha = (xpos & 0xFFFF) >> 9; |
dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha; |
xpos += xInc; |
} |
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) |
dst[i] = src[srcW-1]*128; |
} |
void ff_hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, |
int dstWidth, const uint8_t *src1, |
const uint8_t *src2, int srcW, int xInc) |
{ |
int i; |
unsigned int xpos = 0; |
for (i = 0; i < dstWidth; i++) { |
register unsigned int xx = xpos >> 16; |
register unsigned int xalpha = (xpos & 0xFFFF) >> 9; |
dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha); |
dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha); |
xpos += xInc; |
} |
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { |
dst1[i] = src1[srcW-1]*128; |
dst2[i] = src2[srcW-1]*128; |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/input.c |
---|
0,0 → 1,1441 |
/* |
* Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <math.h> |
#include <stdint.h> |
#include <stdio.h> |
#include <string.h> |
#include "libavutil/avutil.h" |
#include "libavutil/bswap.h" |
#include "libavutil/cpu.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/mathematics.h" |
#include "libavutil/pixdesc.h" |
#include "libavutil/avassert.h" |
#include "config.h" |
#include "rgb2rgb.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) |
#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b) |
#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r) |
static av_always_inline void |
rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, |
enum AVPixelFormat origin, int32_t *rgb2yuv) |
{ |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
unsigned int r_b = input_pixel(&src[i*4+0]); |
unsigned int g = input_pixel(&src[i*4+1]); |
unsigned int b_r = input_pixel(&src[i*4+2]); |
dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; |
} |
} |
static av_always_inline void |
rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, |
const uint16_t *src1, const uint16_t *src2, |
int width, enum AVPixelFormat origin, int32_t *rgb2yuv) |
{ |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
av_assert1(src1==src2); |
for (i = 0; i < width; i++) { |
int r_b = input_pixel(&src1[i*4+0]); |
int g = input_pixel(&src1[i*4+1]); |
int b_r = input_pixel(&src1[i*4+2]); |
dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; |
dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; |
} |
} |
static av_always_inline void |
rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, |
const uint16_t *src1, const uint16_t *src2, |
int width, enum AVPixelFormat origin, int32_t *rgb2yuv) |
{ |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
av_assert1(src1==src2); |
for (i = 0; i < width; i++) { |
int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1; |
int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1; |
int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1; |
dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; |
dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; |
} |
} |
#define rgb64funcs(pattern, BE_LE, origin) \ |
static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ |
int width, uint32_t *rgb2yuv) \ |
{ \ |
const uint16_t *src = (const uint16_t *) _src; \ |
uint16_t *dst = (uint16_t *) _dst; \ |
rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \ |
} \ |
\ |
static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ |
const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ |
int width, uint32_t *rgb2yuv) \ |
{ \ |
const uint16_t *src1 = (const uint16_t *) _src1, \ |
*src2 = (const uint16_t *) _src2; \ |
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ |
rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ |
} \ |
\ |
static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ |
const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ |
int width, uint32_t *rgb2yuv) \ |
{ \ |
const uint16_t *src1 = (const uint16_t *) _src1, \ |
*src2 = (const uint16_t *) _src2; \ |
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ |
rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ |
} |
rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE) |
rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE) |
rgb64funcs(bgr, LE, AV_PIX_FMT_BGRA64LE) |
rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE) |
static av_always_inline void rgb48ToY_c_template(uint16_t *dst, |
const uint16_t *src, int width, |
enum AVPixelFormat origin, |
int32_t *rgb2yuv) |
{ |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
unsigned int r_b = input_pixel(&src[i * 3 + 0]); |
unsigned int g = input_pixel(&src[i * 3 + 1]); |
unsigned int b_r = input_pixel(&src[i * 3 + 2]); |
dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; |
} |
} |
static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU, |
uint16_t *dstV, |
const uint16_t *src1, |
const uint16_t *src2, |
int width, |
enum AVPixelFormat origin, |
int32_t *rgb2yuv) |
{ |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
av_assert1(src1 == src2); |
for (i = 0; i < width; i++) { |
int r_b = input_pixel(&src1[i * 3 + 0]); |
int g = input_pixel(&src1[i * 3 + 1]); |
int b_r = input_pixel(&src1[i * 3 + 2]); |
dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; |
dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; |
} |
} |
static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, |
uint16_t *dstV, |
const uint16_t *src1, |
const uint16_t *src2, |
int width, |
enum AVPixelFormat origin, |
int32_t *rgb2yuv) |
{ |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
av_assert1(src1 == src2); |
for (i = 0; i < width; i++) { |
int r_b = (input_pixel(&src1[6 * i + 0]) + |
input_pixel(&src1[6 * i + 3]) + 1) >> 1; |
int g = (input_pixel(&src1[6 * i + 1]) + |
input_pixel(&src1[6 * i + 4]) + 1) >> 1; |
int b_r = (input_pixel(&src1[6 * i + 2]) + |
input_pixel(&src1[6 * i + 5]) + 1) >> 1; |
dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; |
dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; |
} |
} |
#undef r |
#undef b |
#undef input_pixel |
#define rgb48funcs(pattern, BE_LE, origin) \ |
static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ |
const uint8_t *_src, \ |
const uint8_t *unused0, const uint8_t *unused1,\ |
int width, \ |
uint32_t *rgb2yuv) \ |
{ \ |
const uint16_t *src = (const uint16_t *)_src; \ |
uint16_t *dst = (uint16_t *)_dst; \ |
rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \ |
} \ |
\ |
static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ |
uint8_t *_dstV, \ |
const uint8_t *unused0, \ |
const uint8_t *_src1, \ |
const uint8_t *_src2, \ |
int width, \ |
uint32_t *rgb2yuv) \ |
{ \ |
const uint16_t *src1 = (const uint16_t *)_src1, \ |
*src2 = (const uint16_t *)_src2; \ |
uint16_t *dstU = (uint16_t *)_dstU, \ |
*dstV = (uint16_t *)_dstV; \ |
rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ |
} \ |
\ |
static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ |
uint8_t *_dstV, \ |
const uint8_t *unused0, \ |
const uint8_t *_src1, \ |
const uint8_t *_src2, \ |
int width, \ |
uint32_t *rgb2yuv) \ |
{ \ |
const uint16_t *src1 = (const uint16_t *)_src1, \ |
*src2 = (const uint16_t *)_src2; \ |
uint16_t *dstU = (uint16_t *)_dstU, \ |
*dstV = (uint16_t *)_dstV; \ |
rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ |
} |
rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE) |
rgb48funcs(rgb, BE, AV_PIX_FMT_RGB48BE) |
rgb48funcs(bgr, LE, AV_PIX_FMT_BGR48LE) |
rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE) |
#define input_pixel(i) ((origin == AV_PIX_FMT_RGBA || \ |
origin == AV_PIX_FMT_BGRA || \ |
origin == AV_PIX_FMT_ARGB || \ |
origin == AV_PIX_FMT_ABGR) \ |
? AV_RN32A(&src[(i) * 4]) \ |
: (isBE(origin) ? AV_RB16(&src[(i) * 2]) \ |
: AV_RL16(&src[(i) * 2]))) |
static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, |
const uint8_t *src, |
int width, |
enum AVPixelFormat origin, |
int shr, int shg, |
int shb, int shp, |
int maskr, int maskg, |
int maskb, int rsh, |
int gsh, int bsh, int S, |
int32_t *rgb2yuv) |
{ |
const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh; |
const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); |
int i; |
for (i = 0; i < width; i++) { |
int px = input_pixel(i) >> shp; |
int b = (px & maskb) >> shb; |
int g = (px & maskg) >> shg; |
int r = (px & maskr) >> shr; |
dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); |
} |
} |
static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU, |
int16_t *dstV, |
const uint8_t *src, |
int width, |
enum AVPixelFormat origin, |
int shr, int shg, |
int shb, int shp, |
int maskr, int maskg, |
int maskb, int rsh, |
int gsh, int bsh, int S, |
int32_t *rgb2yuv) |
{ |
const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, |
rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh; |
const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7)); |
int i; |
for (i = 0; i < width; i++) { |
int px = input_pixel(i) >> shp; |
int b = (px & maskb) >> shb; |
int g = (px & maskg) >> shg; |
int r = (px & maskr) >> shr; |
dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); |
dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); |
} |
} |
static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, |
int16_t *dstV, |
const uint8_t *src, |
int width, |
enum AVPixelFormat origin, |
int shr, int shg, |
int shb, int shp, |
int maskr, int maskg, |
int maskb, int rsh, |
int gsh, int bsh, int S, |
int32_t *rgb2yuv) |
{ |
const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh, |
rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh, |
maskgx = ~(maskr | maskb); |
const unsigned rnd = (256U<<(S)) + (1<<(S-6)); |
int i; |
maskr |= maskr << 1; |
maskb |= maskb << 1; |
maskg |= maskg << 1; |
for (i = 0; i < width; i++) { |
unsigned px0 = input_pixel(2 * i + 0) >> shp; |
unsigned px1 = input_pixel(2 * i + 1) >> shp; |
int b, r, g = (px0 & maskgx) + (px1 & maskgx); |
int rb = px0 + px1 - g; |
b = (rb & maskb) >> shb; |
if (shp || |
origin == AV_PIX_FMT_BGR565LE || origin == AV_PIX_FMT_BGR565BE || |
origin == AV_PIX_FMT_RGB565LE || origin == AV_PIX_FMT_RGB565BE) { |
g >>= shg; |
} else { |
g = (g & maskg) >> shg; |
} |
r = (rb & maskr) >> shr; |
dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); |
dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); |
} |
} |
#undef input_pixel |
#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ |
maskg, maskb, rsh, gsh, bsh, S) \ |
static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ |
int width, uint32_t *tab) \ |
{ \ |
rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ |
maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \ |
} \ |
\ |
static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ |
const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ |
int width, uint32_t *tab) \ |
{ \ |
rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ |
shr, shg, shb, shp, \ |
maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\ |
} \ |
\ |
static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ |
const uint8_t *unused0, const uint8_t *src, \ |
const uint8_t *dummy, \ |
int width, uint32_t *tab) \ |
{ \ |
rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ |
shr, shg, shb, shp, \ |
maskr, maskg, maskb, \ |
rsh, gsh, bsh, S, tab); \ |
} |
rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) |
rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) |
rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) |
rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) |
rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) |
rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) |
rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) |
rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) |
rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) |
static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, |
const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, |
int width, uint32_t *rgb2yuv) |
{ |
uint16_t *dstU = (uint16_t *)_dstU; |
uint16_t *dstV = (uint16_t *)_dstV; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
unsigned int g = gsrc[2*i] + gsrc[2*i+1]; |
unsigned int b = bsrc[2*i] + bsrc[2*i+1]; |
unsigned int r = rsrc[2*i] + rsrc[2*i+1]; |
dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); |
dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); |
} |
} |
static void rgba64leToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, |
const uint8_t *unused2, int width, uint32_t *unused) |
{ |
int16_t *dst = (int16_t *)_dst; |
const uint16_t *src = (const uint16_t *)_src; |
int i; |
for (i = 0; i < width; i++) |
dst[i] = AV_RL16(src + 4 * i + 3); |
} |
static void rgba64beToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, |
const uint8_t *unused2, int width, uint32_t *unused) |
{ |
int16_t *dst = (int16_t *)_dst; |
const uint16_t *src = (const uint16_t *)_src; |
int i; |
for (i = 0; i < width; i++) |
dst[i] = AV_RB16(src + 4 * i + 3); |
} |
static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) |
{ |
int16_t *dst = (int16_t *)_dst; |
int i; |
for (i=0; i<width; i++) { |
dst[i]= src[4*i]<<6; |
} |
} |
static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) |
{ |
int16_t *dst = (int16_t *)_dst; |
int i; |
for (i=0; i<width; i++) { |
dst[i]= src[4*i+3]<<6; |
} |
} |
static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) |
{ |
int16_t *dst = (int16_t *)_dst; |
int i; |
for (i=0; i<width; i++) { |
int d= src[i]; |
dst[i]= (pal[d] >> 24)<<6; |
} |
} |
static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) |
{ |
int16_t *dst = (int16_t *)_dst; |
int i; |
for (i = 0; i < width; i++) { |
int d = src[i]; |
dst[i] = (pal[d] & 0xFF)<<6; |
} |
} |
static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV, |
const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, |
int width, uint32_t *pal) |
{ |
uint16_t *dstU = (uint16_t *)_dstU; |
int16_t *dstV = (int16_t *)_dstV; |
int i; |
av_assert1(src1 == src2); |
for (i = 0; i < width; i++) { |
int p = pal[src1[i]]; |
dstU[i] = (uint8_t)(p>> 8)<<6; |
dstV[i] = (uint8_t)(p>>16)<<6; |
} |
} |
static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) |
{ |
int16_t *dst = (int16_t *)_dst; |
int i, j; |
width = (width + 7) >> 3; |
for (i = 0; i < width; i++) { |
int d = ~src[i]; |
for (j = 0; j < 8; j++) |
dst[8*i+j]= ((d>>(7-j))&1) * 16383; |
} |
if(width&7){ |
int d= ~src[i]; |
for (j = 0; j < (width&7); j++) |
dst[8*i+j]= ((d>>(7-j))&1) * 16383; |
} |
} |
static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) |
{ |
int16_t *dst = (int16_t *)_dst; |
int i, j; |
width = (width + 7) >> 3; |
for (i = 0; i < width; i++) { |
int d = src[i]; |
for (j = 0; j < 8; j++) |
dst[8*i+j]= ((d>>(7-j))&1) * 16383; |
} |
if(width&7){ |
int d = src[i]; |
for (j = 0; j < (width&7); j++) |
dst[8*i+j] = ((d>>(7-j))&1) * 16383; |
} |
} |
static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) |
dst[i] = src[2 * i]; |
} |
static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) { |
dstU[i] = src1[4 * i + 1]; |
dstV[i] = src1[4 * i + 3]; |
} |
av_assert1(src1 == src2); |
} |
static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) { |
dstV[i] = src1[4 * i + 1]; |
dstU[i] = src1[4 * i + 3]; |
} |
av_assert1(src1 == src2); |
} |
static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
const uint16_t *src = (const uint16_t *)_src; |
uint16_t *dst = (uint16_t *)_dst; |
for (i = 0; i < width; i++) |
dst[i] = av_bswap16(src[i]); |
} |
static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1, |
const uint8_t *_src2, int width, uint32_t *unused) |
{ |
int i; |
const uint16_t *src1 = (const uint16_t *)_src1, |
*src2 = (const uint16_t *)_src2; |
uint16_t *dstU = (uint16_t *)_dstU, *dstV = (uint16_t *)_dstV; |
for (i = 0; i < width; i++) { |
dstU[i] = av_bswap16(src1[i]); |
dstV[i] = av_bswap16(src2[i]); |
} |
} |
static void read_ya16le_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) |
AV_WN16(dst + i * 2, AV_RL16(src + i * 4)); |
} |
static void read_ya16le_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) |
AV_WN16(dst + i * 2, AV_RL16(src + i * 4 + 2)); |
} |
static void read_ya16be_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) |
AV_WN16(dst + i * 2, AV_RB16(src + i * 4)); |
} |
static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) |
AV_WN16(dst + i * 2, AV_RB16(src + i * 4 + 2)); |
} |
static void read_ayuv64le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, |
uint32_t *unused2) |
{ |
int i; |
for (i = 0; i < width; i++) |
AV_WN16(dst + i * 2, AV_RL16(src + i * 8 + 2)); |
} |
static void read_ayuv64le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, |
const uint8_t *unused1, int width, uint32_t *unused2) |
{ |
int i; |
for (i = 0; i < width; i++) { |
AV_WN16(dstU + i * 2, AV_RL16(src + i * 8 + 4)); |
AV_WN16(dstV + i * 2, AV_RL16(src + i * 8 + 6)); |
} |
} |
static void read_ayuv64le_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, |
uint32_t *unused2) |
{ |
int i; |
for (i = 0; i < width; i++) |
AV_WN16(dst + i * 2, AV_RL16(src + i * 8)); |
} |
/* This is almost identical to the previous, end exists only because |
* yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */ |
static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) |
dst[i] = src[2 * i + 1]; |
} |
static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *unused) |
{ |
int i; |
for (i = 0; i < width; i++) { |
dstU[i] = src1[4 * i + 0]; |
dstV[i] = src1[4 * i + 2]; |
} |
av_assert1(src1 == src2); |
} |
static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, |
const uint8_t *src, int width) |
{ |
int i; |
for (i = 0; i < width; i++) { |
dst1[i] = src[2 * i + 0]; |
dst2[i] = src[2 * i + 1]; |
} |
} |
static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, |
const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, |
int width, uint32_t *unused) |
{ |
nvXXtoUV_c(dstU, dstV, src1, width); |
} |
static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, |
const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, |
int width, uint32_t *unused) |
{ |
nvXXtoUV_c(dstV, dstU, src1, width); |
} |
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) |
static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, |
int width, uint32_t *rgb2yuv) |
{ |
int16_t *dst = (int16_t *)_dst; |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
int b = src[i * 3 + 0]; |
int g = src[i * 3 + 1]; |
int r = src[i * 3 + 2]; |
dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); |
} |
} |
static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *rgb2yuv) |
{ |
int16_t *dstU = (int16_t *)_dstU; |
int16_t *dstV = (int16_t *)_dstV; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
int b = src1[3 * i + 0]; |
int g = src1[3 * i + 1]; |
int r = src1[3 * i + 2]; |
dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); |
dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); |
} |
av_assert1(src1 == src2); |
} |
static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *rgb2yuv) |
{ |
int16_t *dstU = (int16_t *)_dstU; |
int16_t *dstV = (int16_t *)_dstV; |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
for (i = 0; i < width; i++) { |
int b = src1[6 * i + 0] + src1[6 * i + 3]; |
int g = src1[6 * i + 1] + src1[6 * i + 4]; |
int r = src1[6 * i + 2] + src1[6 * i + 5]; |
dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); |
dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); |
} |
av_assert1(src1 == src2); |
} |
static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, |
uint32_t *rgb2yuv) |
{ |
int16_t *dst = (int16_t *)_dst; |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
int r = src[i * 3 + 0]; |
int g = src[i * 3 + 1]; |
int b = src[i * 3 + 2]; |
dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); |
} |
} |
static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *rgb2yuv) |
{ |
int16_t *dstU = (int16_t *)_dstU; |
int16_t *dstV = (int16_t *)_dstV; |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
av_assert1(src1 == src2); |
for (i = 0; i < width; i++) { |
int r = src1[3 * i + 0]; |
int g = src1[3 * i + 1]; |
int b = src1[3 * i + 2]; |
dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); |
dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); |
} |
} |
static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, |
const uint8_t *src2, int width, uint32_t *rgb2yuv) |
{ |
int16_t *dstU = (int16_t *)_dstU; |
int16_t *dstV = (int16_t *)_dstV; |
int i; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
av_assert1(src1 == src2); |
for (i = 0; i < width; i++) { |
int r = src1[6 * i + 0] + src1[6 * i + 3]; |
int g = src1[6 * i + 1] + src1[6 * i + 4]; |
int b = src1[6 * i + 2] + src1[6 * i + 5]; |
dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); |
dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); |
} |
} |
static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv) |
{ |
uint16_t *dst = (uint16_t *)_dst; |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
int g = src[0][i]; |
int b = src[1][i]; |
int r = src[2][i]; |
dst[i] = (ry*r + gy*g + by*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); |
} |
} |
static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused) |
{ |
uint16_t *dst = (uint16_t *)_dst; |
int i; |
for (i = 0; i < width; i++) |
dst[i] = src[3][i] << 6; |
} |
static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv) |
{ |
uint16_t *dstU = (uint16_t *)_dstU; |
uint16_t *dstV = (uint16_t *)_dstV; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
int i; |
for (i = 0; i < width; i++) { |
int g = src[0][i]; |
int b = src[1][i]; |
int r = src[2][i]; |
dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); |
dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); |
} |
} |
#define rdpx(src) \ |
is_be ? AV_RB16(src) : AV_RL16(src) |
static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4], |
int width, int bpc, int is_be, int32_t *rgb2yuv) |
{ |
int i; |
const uint16_t **src = (const uint16_t **)_src; |
uint16_t *dst = (uint16_t *)_dst; |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int shift = bpc < 16 ? bpc : 14; |
for (i = 0; i < width; i++) { |
int g = rdpx(src[0] + i); |
int b = rdpx(src[1] + i); |
int r = rdpx(src[2] + i); |
dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14)); |
} |
} |
static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, |
const uint8_t *_src[4], int width, |
int bpc, int is_be, int32_t *rgb2yuv) |
{ |
int i; |
const uint16_t **src = (const uint16_t **)_src; |
uint16_t *dstU = (uint16_t *)_dstU; |
uint16_t *dstV = (uint16_t *)_dstV; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
int shift = bpc < 16 ? bpc : 14; |
for (i = 0; i < width; i++) { |
int g = rdpx(src[0] + i); |
int b = rdpx(src[1] + i); |
int r = rdpx(src[2] + i); |
dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); |
dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14); |
} |
} |
#undef rdpx |
#define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ |
static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ |
int w, int32_t *rgb2yuv) \ |
{ \ |
planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv); \ |
} \ |
static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ |
const uint8_t *src[4], int w, int32_t *rgb2yuv) \ |
{ \ |
planar_rgb16_to_uv(dstU, dstV, src, w, nbits, endian, rgb2yuv); \ |
} \ |
#define rgb9plus_planar_funcs(nbits) \ |
rgb9plus_planar_funcs_endian(nbits, le, 0) \ |
rgb9plus_planar_funcs_endian(nbits, be, 1) |
rgb9plus_planar_funcs(9) |
rgb9plus_planar_funcs(10) |
rgb9plus_planar_funcs(12) |
rgb9plus_planar_funcs(14) |
rgb9plus_planar_funcs(16) |
av_cold void ff_sws_init_input_funcs(SwsContext *c) |
{ |
enum AVPixelFormat srcFormat = c->srcFormat; |
c->chrToYV12 = NULL; |
switch (srcFormat) { |
case AV_PIX_FMT_YUYV422: |
c->chrToYV12 = yuy2ToUV_c; |
break; |
case AV_PIX_FMT_YVYU422: |
c->chrToYV12 = yvy2ToUV_c; |
break; |
case AV_PIX_FMT_UYVY422: |
c->chrToYV12 = uyvyToUV_c; |
break; |
case AV_PIX_FMT_NV12: |
c->chrToYV12 = nv12ToUV_c; |
break; |
case AV_PIX_FMT_NV21: |
c->chrToYV12 = nv21ToUV_c; |
break; |
case AV_PIX_FMT_RGB8: |
case AV_PIX_FMT_BGR8: |
case AV_PIX_FMT_PAL8: |
case AV_PIX_FMT_BGR4_BYTE: |
case AV_PIX_FMT_RGB4_BYTE: |
c->chrToYV12 = palToUV_c; |
break; |
case AV_PIX_FMT_GBRP9LE: |
c->readChrPlanar = planar_rgb9le_to_uv; |
break; |
case AV_PIX_FMT_GBRP10LE: |
c->readChrPlanar = planar_rgb10le_to_uv; |
break; |
case AV_PIX_FMT_GBRP12LE: |
c->readChrPlanar = planar_rgb12le_to_uv; |
break; |
case AV_PIX_FMT_GBRP14LE: |
c->readChrPlanar = planar_rgb14le_to_uv; |
break; |
case AV_PIX_FMT_GBRAP16LE: |
case AV_PIX_FMT_GBRP16LE: |
c->readChrPlanar = planar_rgb16le_to_uv; |
break; |
case AV_PIX_FMT_GBRP9BE: |
c->readChrPlanar = planar_rgb9be_to_uv; |
break; |
case AV_PIX_FMT_GBRP10BE: |
c->readChrPlanar = planar_rgb10be_to_uv; |
break; |
case AV_PIX_FMT_GBRP12BE: |
c->readChrPlanar = planar_rgb12be_to_uv; |
break; |
case AV_PIX_FMT_GBRP14BE: |
c->readChrPlanar = planar_rgb14be_to_uv; |
break; |
case AV_PIX_FMT_GBRAP16BE: |
case AV_PIX_FMT_GBRP16BE: |
c->readChrPlanar = planar_rgb16be_to_uv; |
break; |
case AV_PIX_FMT_GBRAP: |
case AV_PIX_FMT_GBRP: |
c->readChrPlanar = planar_rgb_to_uv; |
break; |
#if HAVE_BIGENDIAN |
case AV_PIX_FMT_YUV444P9LE: |
case AV_PIX_FMT_YUV422P9LE: |
case AV_PIX_FMT_YUV420P9LE: |
case AV_PIX_FMT_YUV422P10LE: |
case AV_PIX_FMT_YUV440P10LE: |
case AV_PIX_FMT_YUV444P10LE: |
case AV_PIX_FMT_YUV420P10LE: |
case AV_PIX_FMT_YUV422P12LE: |
case AV_PIX_FMT_YUV440P12LE: |
case AV_PIX_FMT_YUV444P12LE: |
case AV_PIX_FMT_YUV420P12LE: |
case AV_PIX_FMT_YUV422P14LE: |
case AV_PIX_FMT_YUV444P14LE: |
case AV_PIX_FMT_YUV420P14LE: |
case AV_PIX_FMT_YUV420P16LE: |
case AV_PIX_FMT_YUV422P16LE: |
case AV_PIX_FMT_YUV444P16LE: |
case AV_PIX_FMT_YUVA444P9LE: |
case AV_PIX_FMT_YUVA422P9LE: |
case AV_PIX_FMT_YUVA420P9LE: |
case AV_PIX_FMT_YUVA444P10LE: |
case AV_PIX_FMT_YUVA422P10LE: |
case AV_PIX_FMT_YUVA420P10LE: |
case AV_PIX_FMT_YUVA420P16LE: |
case AV_PIX_FMT_YUVA422P16LE: |
case AV_PIX_FMT_YUVA444P16LE: |
c->chrToYV12 = bswap16UV_c; |
break; |
#else |
case AV_PIX_FMT_YUV444P9BE: |
case AV_PIX_FMT_YUV422P9BE: |
case AV_PIX_FMT_YUV420P9BE: |
case AV_PIX_FMT_YUV440P10BE: |
case AV_PIX_FMT_YUV444P10BE: |
case AV_PIX_FMT_YUV422P10BE: |
case AV_PIX_FMT_YUV420P10BE: |
case AV_PIX_FMT_YUV440P12BE: |
case AV_PIX_FMT_YUV444P12BE: |
case AV_PIX_FMT_YUV422P12BE: |
case AV_PIX_FMT_YUV420P12BE: |
case AV_PIX_FMT_YUV444P14BE: |
case AV_PIX_FMT_YUV422P14BE: |
case AV_PIX_FMT_YUV420P14BE: |
case AV_PIX_FMT_YUV420P16BE: |
case AV_PIX_FMT_YUV422P16BE: |
case AV_PIX_FMT_YUV444P16BE: |
case AV_PIX_FMT_YUVA444P9BE: |
case AV_PIX_FMT_YUVA422P9BE: |
case AV_PIX_FMT_YUVA420P9BE: |
case AV_PIX_FMT_YUVA444P10BE: |
case AV_PIX_FMT_YUVA422P10BE: |
case AV_PIX_FMT_YUVA420P10BE: |
case AV_PIX_FMT_YUVA420P16BE: |
case AV_PIX_FMT_YUVA422P16BE: |
case AV_PIX_FMT_YUVA444P16BE: |
c->chrToYV12 = bswap16UV_c; |
break; |
#endif |
case AV_PIX_FMT_AYUV64LE: |
c->chrToYV12 = read_ayuv64le_UV_c; |
break; |
} |
if (c->chrSrcHSubSample) { |
switch (srcFormat) { |
case AV_PIX_FMT_RGBA64BE: |
c->chrToYV12 = rgb64BEToUV_half_c; |
break; |
case AV_PIX_FMT_RGBA64LE: |
c->chrToYV12 = rgb64LEToUV_half_c; |
break; |
case AV_PIX_FMT_BGRA64BE: |
c->chrToYV12 = bgr64BEToUV_half_c; |
break; |
case AV_PIX_FMT_BGRA64LE: |
c->chrToYV12 = bgr64LEToUV_half_c; |
break; |
case AV_PIX_FMT_RGB48BE: |
c->chrToYV12 = rgb48BEToUV_half_c; |
break; |
case AV_PIX_FMT_RGB48LE: |
c->chrToYV12 = rgb48LEToUV_half_c; |
break; |
case AV_PIX_FMT_BGR48BE: |
c->chrToYV12 = bgr48BEToUV_half_c; |
break; |
case AV_PIX_FMT_BGR48LE: |
c->chrToYV12 = bgr48LEToUV_half_c; |
break; |
case AV_PIX_FMT_RGB32: |
c->chrToYV12 = bgr32ToUV_half_c; |
break; |
case AV_PIX_FMT_RGB32_1: |
c->chrToYV12 = bgr321ToUV_half_c; |
break; |
case AV_PIX_FMT_BGR24: |
c->chrToYV12 = bgr24ToUV_half_c; |
break; |
case AV_PIX_FMT_BGR565LE: |
c->chrToYV12 = bgr16leToUV_half_c; |
break; |
case AV_PIX_FMT_BGR565BE: |
c->chrToYV12 = bgr16beToUV_half_c; |
break; |
case AV_PIX_FMT_BGR555LE: |
c->chrToYV12 = bgr15leToUV_half_c; |
break; |
case AV_PIX_FMT_BGR555BE: |
c->chrToYV12 = bgr15beToUV_half_c; |
break; |
case AV_PIX_FMT_GBRAP: |
case AV_PIX_FMT_GBRP: |
c->chrToYV12 = gbr24pToUV_half_c; |
break; |
case AV_PIX_FMT_BGR444LE: |
c->chrToYV12 = bgr12leToUV_half_c; |
break; |
case AV_PIX_FMT_BGR444BE: |
c->chrToYV12 = bgr12beToUV_half_c; |
break; |
case AV_PIX_FMT_BGR32: |
c->chrToYV12 = rgb32ToUV_half_c; |
break; |
case AV_PIX_FMT_BGR32_1: |
c->chrToYV12 = rgb321ToUV_half_c; |
break; |
case AV_PIX_FMT_RGB24: |
c->chrToYV12 = rgb24ToUV_half_c; |
break; |
case AV_PIX_FMT_RGB565LE: |
c->chrToYV12 = rgb16leToUV_half_c; |
break; |
case AV_PIX_FMT_RGB565BE: |
c->chrToYV12 = rgb16beToUV_half_c; |
break; |
case AV_PIX_FMT_RGB555LE: |
c->chrToYV12 = rgb15leToUV_half_c; |
break; |
case AV_PIX_FMT_RGB555BE: |
c->chrToYV12 = rgb15beToUV_half_c; |
break; |
case AV_PIX_FMT_RGB444LE: |
c->chrToYV12 = rgb12leToUV_half_c; |
break; |
case AV_PIX_FMT_RGB444BE: |
c->chrToYV12 = rgb12beToUV_half_c; |
break; |
} |
} else { |
switch (srcFormat) { |
case AV_PIX_FMT_RGBA64BE: |
c->chrToYV12 = rgb64BEToUV_c; |
break; |
case AV_PIX_FMT_RGBA64LE: |
c->chrToYV12 = rgb64LEToUV_c; |
break; |
case AV_PIX_FMT_BGRA64BE: |
c->chrToYV12 = bgr64BEToUV_c; |
break; |
case AV_PIX_FMT_BGRA64LE: |
c->chrToYV12 = bgr64LEToUV_c; |
break; |
case AV_PIX_FMT_RGB48BE: |
c->chrToYV12 = rgb48BEToUV_c; |
break; |
case AV_PIX_FMT_RGB48LE: |
c->chrToYV12 = rgb48LEToUV_c; |
break; |
case AV_PIX_FMT_BGR48BE: |
c->chrToYV12 = bgr48BEToUV_c; |
break; |
case AV_PIX_FMT_BGR48LE: |
c->chrToYV12 = bgr48LEToUV_c; |
break; |
case AV_PIX_FMT_RGB32: |
c->chrToYV12 = bgr32ToUV_c; |
break; |
case AV_PIX_FMT_RGB32_1: |
c->chrToYV12 = bgr321ToUV_c; |
break; |
case AV_PIX_FMT_BGR24: |
c->chrToYV12 = bgr24ToUV_c; |
break; |
case AV_PIX_FMT_BGR565LE: |
c->chrToYV12 = bgr16leToUV_c; |
break; |
case AV_PIX_FMT_BGR565BE: |
c->chrToYV12 = bgr16beToUV_c; |
break; |
case AV_PIX_FMT_BGR555LE: |
c->chrToYV12 = bgr15leToUV_c; |
break; |
case AV_PIX_FMT_BGR555BE: |
c->chrToYV12 = bgr15beToUV_c; |
break; |
case AV_PIX_FMT_BGR444LE: |
c->chrToYV12 = bgr12leToUV_c; |
break; |
case AV_PIX_FMT_BGR444BE: |
c->chrToYV12 = bgr12beToUV_c; |
break; |
case AV_PIX_FMT_BGR32: |
c->chrToYV12 = rgb32ToUV_c; |
break; |
case AV_PIX_FMT_BGR32_1: |
c->chrToYV12 = rgb321ToUV_c; |
break; |
case AV_PIX_FMT_RGB24: |
c->chrToYV12 = rgb24ToUV_c; |
break; |
case AV_PIX_FMT_RGB565LE: |
c->chrToYV12 = rgb16leToUV_c; |
break; |
case AV_PIX_FMT_RGB565BE: |
c->chrToYV12 = rgb16beToUV_c; |
break; |
case AV_PIX_FMT_RGB555LE: |
c->chrToYV12 = rgb15leToUV_c; |
break; |
case AV_PIX_FMT_RGB555BE: |
c->chrToYV12 = rgb15beToUV_c; |
break; |
case AV_PIX_FMT_RGB444LE: |
c->chrToYV12 = rgb12leToUV_c; |
break; |
case AV_PIX_FMT_RGB444BE: |
c->chrToYV12 = rgb12beToUV_c; |
break; |
} |
} |
c->lumToYV12 = NULL; |
c->alpToYV12 = NULL; |
switch (srcFormat) { |
case AV_PIX_FMT_GBRP9LE: |
c->readLumPlanar = planar_rgb9le_to_y; |
break; |
case AV_PIX_FMT_GBRP10LE: |
c->readLumPlanar = planar_rgb10le_to_y; |
break; |
case AV_PIX_FMT_GBRP12LE: |
c->readLumPlanar = planar_rgb12le_to_y; |
break; |
case AV_PIX_FMT_GBRP14LE: |
c->readLumPlanar = planar_rgb14le_to_y; |
break; |
case AV_PIX_FMT_GBRAP16LE: |
case AV_PIX_FMT_GBRP16LE: |
c->readLumPlanar = planar_rgb16le_to_y; |
break; |
case AV_PIX_FMT_GBRP9BE: |
c->readLumPlanar = planar_rgb9be_to_y; |
break; |
case AV_PIX_FMT_GBRP10BE: |
c->readLumPlanar = planar_rgb10be_to_y; |
break; |
case AV_PIX_FMT_GBRP12BE: |
c->readLumPlanar = planar_rgb12be_to_y; |
break; |
case AV_PIX_FMT_GBRP14BE: |
c->readLumPlanar = planar_rgb14be_to_y; |
break; |
case AV_PIX_FMT_GBRAP16BE: |
case AV_PIX_FMT_GBRP16BE: |
c->readLumPlanar = planar_rgb16be_to_y; |
break; |
case AV_PIX_FMT_GBRAP: |
c->readAlpPlanar = planar_rgb_to_a; |
case AV_PIX_FMT_GBRP: |
c->readLumPlanar = planar_rgb_to_y; |
break; |
#if HAVE_BIGENDIAN |
case AV_PIX_FMT_YUV444P9LE: |
case AV_PIX_FMT_YUV422P9LE: |
case AV_PIX_FMT_YUV420P9LE: |
case AV_PIX_FMT_YUV444P10LE: |
case AV_PIX_FMT_YUV440P10LE: |
case AV_PIX_FMT_YUV422P10LE: |
case AV_PIX_FMT_YUV420P10LE: |
case AV_PIX_FMT_YUV444P12LE: |
case AV_PIX_FMT_YUV440P12LE: |
case AV_PIX_FMT_YUV422P12LE: |
case AV_PIX_FMT_YUV420P12LE: |
case AV_PIX_FMT_YUV444P14LE: |
case AV_PIX_FMT_YUV422P14LE: |
case AV_PIX_FMT_YUV420P14LE: |
case AV_PIX_FMT_YUV420P16LE: |
case AV_PIX_FMT_YUV422P16LE: |
case AV_PIX_FMT_YUV444P16LE: |
case AV_PIX_FMT_GRAY16LE: |
c->lumToYV12 = bswap16Y_c; |
break; |
case AV_PIX_FMT_YUVA444P9LE: |
case AV_PIX_FMT_YUVA422P9LE: |
case AV_PIX_FMT_YUVA420P9LE: |
case AV_PIX_FMT_YUVA444P10LE: |
case AV_PIX_FMT_YUVA422P10LE: |
case AV_PIX_FMT_YUVA420P10LE: |
case AV_PIX_FMT_YUVA420P16LE: |
case AV_PIX_FMT_YUVA422P16LE: |
case AV_PIX_FMT_YUVA444P16LE: |
c->lumToYV12 = bswap16Y_c; |
c->alpToYV12 = bswap16Y_c; |
break; |
#else |
case AV_PIX_FMT_YUV444P9BE: |
case AV_PIX_FMT_YUV422P9BE: |
case AV_PIX_FMT_YUV420P9BE: |
case AV_PIX_FMT_YUV444P10BE: |
case AV_PIX_FMT_YUV440P10BE: |
case AV_PIX_FMT_YUV422P10BE: |
case AV_PIX_FMT_YUV420P10BE: |
case AV_PIX_FMT_YUV444P12BE: |
case AV_PIX_FMT_YUV440P12BE: |
case AV_PIX_FMT_YUV422P12BE: |
case AV_PIX_FMT_YUV420P12BE: |
case AV_PIX_FMT_YUV444P14BE: |
case AV_PIX_FMT_YUV422P14BE: |
case AV_PIX_FMT_YUV420P14BE: |
case AV_PIX_FMT_YUV420P16BE: |
case AV_PIX_FMT_YUV422P16BE: |
case AV_PIX_FMT_YUV444P16BE: |
case AV_PIX_FMT_GRAY16BE: |
c->lumToYV12 = bswap16Y_c; |
break; |
case AV_PIX_FMT_YUVA444P9BE: |
case AV_PIX_FMT_YUVA422P9BE: |
case AV_PIX_FMT_YUVA420P9BE: |
case AV_PIX_FMT_YUVA444P10BE: |
case AV_PIX_FMT_YUVA422P10BE: |
case AV_PIX_FMT_YUVA420P10BE: |
case AV_PIX_FMT_YUVA420P16BE: |
case AV_PIX_FMT_YUVA422P16BE: |
case AV_PIX_FMT_YUVA444P16BE: |
c->lumToYV12 = bswap16Y_c; |
c->alpToYV12 = bswap16Y_c; |
break; |
#endif |
case AV_PIX_FMT_YA16LE: |
c->lumToYV12 = read_ya16le_gray_c; |
break; |
case AV_PIX_FMT_YA16BE: |
c->lumToYV12 = read_ya16be_gray_c; |
break; |
case AV_PIX_FMT_AYUV64LE: |
c->lumToYV12 = read_ayuv64le_Y_c; |
break; |
case AV_PIX_FMT_YUYV422: |
case AV_PIX_FMT_YVYU422: |
case AV_PIX_FMT_YA8: |
c->lumToYV12 = yuy2ToY_c; |
break; |
case AV_PIX_FMT_UYVY422: |
c->lumToYV12 = uyvyToY_c; |
break; |
case AV_PIX_FMT_BGR24: |
c->lumToYV12 = bgr24ToY_c; |
break; |
case AV_PIX_FMT_BGR565LE: |
c->lumToYV12 = bgr16leToY_c; |
break; |
case AV_PIX_FMT_BGR565BE: |
c->lumToYV12 = bgr16beToY_c; |
break; |
case AV_PIX_FMT_BGR555LE: |
c->lumToYV12 = bgr15leToY_c; |
break; |
case AV_PIX_FMT_BGR555BE: |
c->lumToYV12 = bgr15beToY_c; |
break; |
case AV_PIX_FMT_BGR444LE: |
c->lumToYV12 = bgr12leToY_c; |
break; |
case AV_PIX_FMT_BGR444BE: |
c->lumToYV12 = bgr12beToY_c; |
break; |
case AV_PIX_FMT_RGB24: |
c->lumToYV12 = rgb24ToY_c; |
break; |
case AV_PIX_FMT_RGB565LE: |
c->lumToYV12 = rgb16leToY_c; |
break; |
case AV_PIX_FMT_RGB565BE: |
c->lumToYV12 = rgb16beToY_c; |
break; |
case AV_PIX_FMT_RGB555LE: |
c->lumToYV12 = rgb15leToY_c; |
break; |
case AV_PIX_FMT_RGB555BE: |
c->lumToYV12 = rgb15beToY_c; |
break; |
case AV_PIX_FMT_RGB444LE: |
c->lumToYV12 = rgb12leToY_c; |
break; |
case AV_PIX_FMT_RGB444BE: |
c->lumToYV12 = rgb12beToY_c; |
break; |
case AV_PIX_FMT_RGB8: |
case AV_PIX_FMT_BGR8: |
case AV_PIX_FMT_PAL8: |
case AV_PIX_FMT_BGR4_BYTE: |
case AV_PIX_FMT_RGB4_BYTE: |
c->lumToYV12 = palToY_c; |
break; |
case AV_PIX_FMT_MONOBLACK: |
c->lumToYV12 = monoblack2Y_c; |
break; |
case AV_PIX_FMT_MONOWHITE: |
c->lumToYV12 = monowhite2Y_c; |
break; |
case AV_PIX_FMT_RGB32: |
c->lumToYV12 = bgr32ToY_c; |
break; |
case AV_PIX_FMT_RGB32_1: |
c->lumToYV12 = bgr321ToY_c; |
break; |
case AV_PIX_FMT_BGR32: |
c->lumToYV12 = rgb32ToY_c; |
break; |
case AV_PIX_FMT_BGR32_1: |
c->lumToYV12 = rgb321ToY_c; |
break; |
case AV_PIX_FMT_RGB48BE: |
c->lumToYV12 = rgb48BEToY_c; |
break; |
case AV_PIX_FMT_RGB48LE: |
c->lumToYV12 = rgb48LEToY_c; |
break; |
case AV_PIX_FMT_BGR48BE: |
c->lumToYV12 = bgr48BEToY_c; |
break; |
case AV_PIX_FMT_BGR48LE: |
c->lumToYV12 = bgr48LEToY_c; |
break; |
case AV_PIX_FMT_RGBA64BE: |
c->lumToYV12 = rgb64BEToY_c; |
break; |
case AV_PIX_FMT_RGBA64LE: |
c->lumToYV12 = rgb64LEToY_c; |
break; |
case AV_PIX_FMT_BGRA64BE: |
c->lumToYV12 = bgr64BEToY_c; |
break; |
case AV_PIX_FMT_BGRA64LE: |
c->lumToYV12 = bgr64LEToY_c; |
} |
if (c->alpPixBuf) { |
if (is16BPS(srcFormat) || isNBPS(srcFormat)) { |
if (HAVE_BIGENDIAN == !isBE(srcFormat)) |
c->alpToYV12 = bswap16Y_c; |
} |
switch (srcFormat) { |
case AV_PIX_FMT_BGRA64LE: |
case AV_PIX_FMT_RGBA64LE: c->alpToYV12 = rgba64leToA_c; break; |
case AV_PIX_FMT_BGRA64BE: |
case AV_PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64beToA_c; break; |
case AV_PIX_FMT_BGRA: |
case AV_PIX_FMT_RGBA: |
c->alpToYV12 = rgbaToA_c; |
break; |
case AV_PIX_FMT_ABGR: |
case AV_PIX_FMT_ARGB: |
c->alpToYV12 = abgrToA_c; |
break; |
case AV_PIX_FMT_YA8: |
c->alpToYV12 = uyvyToY_c; |
break; |
case AV_PIX_FMT_YA16LE: |
c->alpToYV12 = read_ya16le_alpha_c; |
break; |
case AV_PIX_FMT_YA16BE: |
c->alpToYV12 = read_ya16be_alpha_c; |
break; |
case AV_PIX_FMT_AYUV64LE: |
c->alpToYV12 = read_ayuv64le_A_c; |
break; |
case AV_PIX_FMT_PAL8 : |
c->alpToYV12 = palToA_c; |
break; |
} |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/libswscale.pc |
---|
0,0 → 1,14 |
prefix=/usr/local |
exec_prefix=${prefix} |
libdir=${prefix}/lib |
includedir=${prefix}/include |
Name: libswscale |
Description: FFmpeg image rescaling library |
Version: 3.1.101 |
Requires: |
Requires.private: libavutil >= 54.31.100 |
Conflicts: |
Libs: -L${libdir} -lswscale |
Libs.private: -lm |
Cflags: -I${includedir} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/libswscale.v |
---|
0,0 → 1,4 |
LIBSWSCALE_$MAJOR { |
global: swscale_*; sws_*; |
local: *; |
}; |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/libswscale.ver |
---|
0,0 → 1,5 |
LIBSWSCALE_3 { |
global: DllStartup; |
swscale_*; sws_*; |
local: *; |
}; |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/log2_tab.c |
---|
0,0 → 1,0 |
#include "libavutil/log2_tab.c" |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/options.c |
---|
0,0 → 1,100 |
/* |
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "libavutil/avutil.h" |
#include "libavutil/opt.h" |
#include "libavutil/pixfmt.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
static const char *sws_context_to_name(void *ptr) |
{ |
return "swscaler"; |
} |
#define OFFSET(x) offsetof(SwsContext, x) |
#define DEFAULT 0 |
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM |
static const AVOption swscale_options[] = { |
{ "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = SWS_BICUBIC }, 0, UINT_MAX, VE, "sws_flags" }, |
{ "fast_bilinear", "fast bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "experimental", "experimental", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_X }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "neighbor", "nearest neighbor", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_POINT }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "area", "averaging area", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_AREA }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "bicublin", "luma bicubic, chroma bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBLIN }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "gauss", "gaussian", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_GAUSS }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "sinc", "sinc", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_SINC }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_LANCZOS }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "spline", "natural bicubic spline", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "print_info", "print info", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "accurate_rnd", "accurate rounding", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "full_chroma_int", "full chroma interpolation", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "full_chroma_inp", "full chroma input", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "bitexact", "", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "error_diffusion", "error diffusion dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ERROR_DIFFUSION}, INT_MIN, INT_MAX, VE, "sws_flags" }, |
{ "srcw", "source width", OFFSET(srcW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, |
{ "srch", "source height", OFFSET(srcH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, |
{ "dstw", "destination width", OFFSET(dstW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, |
{ "dsth", "destination height", OFFSET(dstH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE }, |
{ "src_format", "source format", OFFSET(srcFormat), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, AV_PIX_FMT_NB - 1, VE }, |
{ "dst_format", "destination format", OFFSET(dstFormat), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, AV_PIX_FMT_NB - 1, VE }, |
{ "src_range", "source range", OFFSET(srcRange), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, 1, VE }, |
{ "dst_range", "destination range", OFFSET(dstRange), AV_OPT_TYPE_INT, { .i64 = DEFAULT }, 0, 1, VE }, |
{ "param0", "scaler param 0", OFFSET(param[0]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE }, |
{ "param1", "scaler param 1", OFFSET(param[1]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE }, |
{ "src_v_chr_pos", "source vertical chroma position in luma grid/256" , OFFSET(src_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE }, |
{ "src_h_chr_pos", "source horizontal chroma position in luma grid/256", OFFSET(src_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE }, |
{ "dst_v_chr_pos", "destination vertical chroma position in luma grid/256" , OFFSET(dst_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE }, |
{ "dst_h_chr_pos", "destination horizontal chroma position in luma grid/256", OFFSET(dst_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -513 }, -513, 512, VE }, |
{ "sws_dither", "set dithering algorithm", OFFSET(dither), AV_OPT_TYPE_INT, { .i64 = SWS_DITHER_AUTO }, 0, NB_SWS_DITHER, VE, "sws_dither" }, |
{ "auto", "leave choice to sws", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_AUTO }, INT_MIN, INT_MAX, VE, "sws_dither" }, |
{ "bayer", "bayer dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_BAYER }, INT_MIN, INT_MAX, VE, "sws_dither" }, |
{ "ed", "error diffusion", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_ED }, INT_MIN, INT_MAX, VE, "sws_dither" }, |
{ "a_dither", "arithmetic addition dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_A_DITHER}, INT_MIN, INT_MAX, VE, "sws_dither" }, |
{ "x_dither", "arithmetic xor dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_X_DITHER}, INT_MIN, INT_MAX, VE, "sws_dither" }, |
{ "gamma", "gamma correct scaling", OFFSET(gamma_flag), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE, "gamma" }, |
{ "true", "enable", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "gamma" }, |
{ "false", "disable", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, "gamma" }, |
{ "alphablend", "mode for alpha -> non alpha", OFFSET(alphablend),AV_OPT_TYPE_INT, { .i64 = SWS_ALPHA_BLEND_NONE}, 0, SWS_ALPHA_BLEND_NB-1, VE, "alphablend" }, |
{ "none", "ignore alpha", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_NONE}, INT_MIN, INT_MAX, VE, "alphablend" }, |
{ "uniform_color", "blend onto a uniform color", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_UNIFORM},INT_MIN, INT_MAX, VE, "alphablend" }, |
{ "checkerboard", "blend onto a checkerboard", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_CHECKERBOARD},INT_MIN, INT_MAX, VE, "alphablend" }, |
{ NULL } |
}; |
const AVClass sws_context_class = { |
.class_name = "SWScaler", |
.item_name = sws_context_to_name, |
.option = swscale_options, |
.category = AV_CLASS_CATEGORY_SWSCALER, |
.version = LIBAVUTIL_VERSION_INT, |
}; |
const AVClass *sws_get_class(void) |
{ |
return &sws_context_class; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/output.c |
---|
0,0 → 1,2513 |
/* |
* Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <math.h> |
#include <stdint.h> |
#include <stdio.h> |
#include <string.h> |
#include "libavutil/attributes.h" |
#include "libavutil/avutil.h" |
#include "libavutil/avassert.h" |
#include "libavutil/bswap.h" |
#include "libavutil/cpu.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/mathematics.h" |
#include "libavutil/pixdesc.h" |
#include "config.h" |
#include "rgb2rgb.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = { |
{ 1, 3, 1, 3, 1, 3, 1, 3, }, |
{ 2, 0, 2, 0, 2, 0, 2, 0, }, |
{ 1, 3, 1, 3, 1, 3, 1, 3, }, |
}; |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = { |
{ 6, 2, 6, 2, 6, 2, 6, 2, }, |
{ 0, 4, 0, 4, 0, 4, 0, 4, }, |
{ 6, 2, 6, 2, 6, 2, 6, 2, }, |
}; |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = { |
{ 8, 4, 11, 7, 8, 4, 11, 7, }, |
{ 2, 14, 1, 13, 2, 14, 1, 13, }, |
{ 10, 6, 9, 5, 10, 6, 9, 5, }, |
{ 0, 12, 3, 15, 0, 12, 3, 15, }, |
{ 8, 4, 11, 7, 8, 4, 11, 7, }, |
}; |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = { |
{ 17, 9, 23, 15, 16, 8, 22, 14, }, |
{ 5, 29, 3, 27, 4, 28, 2, 26, }, |
{ 21, 13, 19, 11, 20, 12, 18, 10, }, |
{ 0, 24, 6, 30, 1, 25, 7, 31, }, |
{ 16, 8, 22, 14, 17, 9, 23, 15, }, |
{ 4, 28, 2, 26, 5, 29, 3, 27, }, |
{ 20, 12, 18, 10, 21, 13, 19, 11, }, |
{ 1, 25, 7, 31, 0, 24, 6, 30, }, |
{ 17, 9, 23, 15, 16, 8, 22, 14, }, |
}; |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = { |
{ 0, 55, 14, 68, 3, 58, 17, 72, }, |
{ 37, 18, 50, 32, 40, 22, 54, 35, }, |
{ 9, 64, 5, 59, 13, 67, 8, 63, }, |
{ 46, 27, 41, 23, 49, 31, 44, 26, }, |
{ 2, 57, 16, 71, 1, 56, 15, 70, }, |
{ 39, 21, 52, 34, 38, 19, 51, 33, }, |
{ 11, 66, 7, 62, 10, 65, 6, 60, }, |
{ 48, 30, 43, 25, 47, 29, 42, 24, }, |
{ 0, 55, 14, 68, 3, 58, 17, 72, }, |
}; |
#if 1 |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { |
{117, 62, 158, 103, 113, 58, 155, 100, }, |
{ 34, 199, 21, 186, 31, 196, 17, 182, }, |
{144, 89, 131, 76, 141, 86, 127, 72, }, |
{ 0, 165, 41, 206, 10, 175, 52, 217, }, |
{110, 55, 151, 96, 120, 65, 162, 107, }, |
{ 28, 193, 14, 179, 38, 203, 24, 189, }, |
{138, 83, 124, 69, 148, 93, 134, 79, }, |
{ 7, 172, 48, 213, 3, 168, 45, 210, }, |
{117, 62, 158, 103, 113, 58, 155, 100, }, |
}; |
#elif 1 |
// tries to correct a gamma of 1.5 |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { |
{ 0, 143, 18, 200, 2, 156, 25, 215, }, |
{ 78, 28, 125, 64, 89, 36, 138, 74, }, |
{ 10, 180, 3, 161, 16, 195, 8, 175, }, |
{109, 51, 93, 38, 121, 60, 105, 47, }, |
{ 1, 152, 23, 210, 0, 147, 20, 205, }, |
{ 85, 33, 134, 71, 81, 30, 130, 67, }, |
{ 14, 190, 6, 171, 12, 185, 5, 166, }, |
{117, 57, 101, 44, 113, 54, 97, 41, }, |
{ 0, 143, 18, 200, 2, 156, 25, 215, }, |
}; |
#elif 1 |
// tries to correct a gamma of 2.0 |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { |
{ 0, 124, 8, 193, 0, 140, 12, 213, }, |
{ 55, 14, 104, 42, 66, 19, 119, 52, }, |
{ 3, 168, 1, 145, 6, 187, 3, 162, }, |
{ 86, 31, 70, 21, 99, 39, 82, 28, }, |
{ 0, 134, 11, 206, 0, 129, 9, 200, }, |
{ 62, 17, 114, 48, 58, 16, 109, 45, }, |
{ 5, 181, 2, 157, 4, 175, 1, 151, }, |
{ 95, 36, 78, 26, 90, 34, 74, 24, }, |
{ 0, 124, 8, 193, 0, 140, 12, 213, }, |
}; |
#else |
// tries to correct a gamma of 2.5 |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = { |
{ 0, 107, 3, 187, 0, 125, 6, 212, }, |
{ 39, 7, 86, 28, 49, 11, 102, 36, }, |
{ 1, 158, 0, 131, 3, 180, 1, 151, }, |
{ 68, 19, 52, 12, 81, 25, 64, 17, }, |
{ 0, 119, 5, 203, 0, 113, 4, 195, }, |
{ 45, 9, 96, 33, 42, 8, 91, 30, }, |
{ 2, 172, 1, 144, 2, 165, 0, 137, }, |
{ 77, 23, 60, 15, 72, 21, 56, 14, }, |
{ 0, 107, 3, 187, 0, 125, 6, 212, }, |
}; |
#endif |
#define output_pixel(pos, val, bias, signedness) \ |
if (big_endian) { \ |
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ |
} else { \ |
AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ |
} |
static av_always_inline void |
yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW, |
int big_endian, int output_bits) |
{ |
int i; |
int shift = 3; |
av_assert0(output_bits == 16); |
for (i = 0; i < dstW; i++) { |
int val = src[i] + (1 << (shift - 1)); |
output_pixel(&dest[i], val, 0, uint); |
} |
} |
static av_always_inline void |
yuv2planeX_16_c_template(const int16_t *filter, int filterSize, |
const int32_t **src, uint16_t *dest, int dstW, |
int big_endian, int output_bits) |
{ |
int i; |
int shift = 15; |
av_assert0(output_bits == 16); |
for (i = 0; i < dstW; i++) { |
int val = 1 << (shift - 1); |
int j; |
/* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline |
* filters (or anything with negative coeffs, the range can be slightly |
* wider in both directions. To account for this overflow, we subtract |
* a constant so it always fits in the signed range (assuming a |
* reasonable filterSize), and re-add that at the end. */ |
val -= 0x40000000; |
for (j = 0; j < filterSize; j++) |
val += src[j][i] * (unsigned)filter[j]; |
output_pixel(&dest[i], val, 0x8000, int); |
} |
} |
#undef output_pixel |
#define output_pixel(pos, val) \ |
if (big_endian) { \ |
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ |
} else { \ |
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ |
} |
static av_always_inline void |
yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW, |
int big_endian, int output_bits) |
{ |
int i; |
int shift = 15 - output_bits; |
for (i = 0; i < dstW; i++) { |
int val = src[i] + (1 << (shift - 1)); |
output_pixel(&dest[i], val); |
} |
} |
static av_always_inline void |
yuv2planeX_10_c_template(const int16_t *filter, int filterSize, |
const int16_t **src, uint16_t *dest, int dstW, |
int big_endian, int output_bits) |
{ |
int i; |
int shift = 11 + 16 - output_bits; |
for (i = 0; i < dstW; i++) { |
int val = 1 << (shift - 1); |
int j; |
for (j = 0; j < filterSize; j++) |
val += src[j][i] * filter[j]; |
output_pixel(&dest[i], val); |
} |
} |
#undef output_pixel |
#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \ |
static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \ |
uint8_t *dest, int dstW, \ |
const uint8_t *dither, int offset)\ |
{ \ |
yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \ |
(uint16_t *) dest, dstW, is_be, bits); \ |
}\ |
static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \ |
const int16_t **src, uint8_t *dest, int dstW, \ |
const uint8_t *dither, int offset)\ |
{ \ |
yuv2planeX_## template_size ## _c_template(filter, \ |
filterSize, (const typeX_t **) src, \ |
(uint16_t *) dest, dstW, is_be, bits); \ |
} |
yuv2NBPS( 9, BE, 1, 10, int16_t) |
yuv2NBPS( 9, LE, 0, 10, int16_t) |
yuv2NBPS(10, BE, 1, 10, int16_t) |
yuv2NBPS(10, LE, 0, 10, int16_t) |
yuv2NBPS(12, BE, 1, 10, int16_t) |
yuv2NBPS(12, LE, 0, 10, int16_t) |
yuv2NBPS(14, BE, 1, 10, int16_t) |
yuv2NBPS(14, LE, 0, 10, int16_t) |
yuv2NBPS(16, BE, 1, 16, int32_t) |
yuv2NBPS(16, LE, 0, 16, int32_t) |
static void yuv2planeX_8_c(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset) |
{ |
int i; |
for (i=0; i<dstW; i++) { |
int val = dither[(i + offset) & 7] << 12; |
int j; |
for (j=0; j<filterSize; j++) |
val += src[j][i] * filter[j]; |
dest[i]= av_clip_uint8(val>>19); |
} |
} |
static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset) |
{ |
int i; |
for (i=0; i<dstW; i++) { |
int val = (src[i] + dither[(i + offset) & 7]) >> 7; |
dest[i]= av_clip_uint8(val); |
} |
} |
static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, |
const int16_t **chrUSrc, const int16_t **chrVSrc, |
uint8_t *dest, int chrDstW) |
{ |
enum AVPixelFormat dstFormat = c->dstFormat; |
const uint8_t *chrDither = c->chrDither8; |
int i; |
if (dstFormat == AV_PIX_FMT_NV12) |
for (i=0; i<chrDstW; i++) { |
int u = chrDither[i & 7] << 12; |
int v = chrDither[(i + 3) & 7] << 12; |
int j; |
for (j=0; j<chrFilterSize; j++) { |
u += chrUSrc[j][i] * chrFilter[j]; |
v += chrVSrc[j][i] * chrFilter[j]; |
} |
dest[2*i]= av_clip_uint8(u>>19); |
dest[2*i+1]= av_clip_uint8(v>>19); |
} |
else |
for (i=0; i<chrDstW; i++) { |
int u = chrDither[i & 7] << 12; |
int v = chrDither[(i + 3) & 7] << 12; |
int j; |
for (j=0; j<chrFilterSize; j++) { |
u += chrUSrc[j][i] * chrFilter[j]; |
v += chrVSrc[j][i] * chrFilter[j]; |
} |
dest[2*i]= av_clip_uint8(v>>19); |
dest[2*i+1]= av_clip_uint8(u>>19); |
} |
} |
#define accumulate_bit(acc, val) \ |
acc <<= 1; \ |
acc |= (val) >= (128 + 110) |
#define output_pixel(pos, acc) \ |
if (target == AV_PIX_FMT_MONOBLACK) { \ |
pos = acc; \ |
} else { \ |
pos = ~acc; \ |
} |
static av_always_inline void |
yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t *dest, int dstW, |
int y, enum AVPixelFormat target) |
{ |
const uint8_t * const d128 = ff_dither_8x8_220[y&7]; |
int i; |
unsigned acc = 0; |
int err = 0; |
for (i = 0; i < dstW; i += 2) { |
int j; |
int Y1 = 1 << 18; |
int Y2 = 1 << 18; |
for (j = 0; j < lumFilterSize; j++) { |
Y1 += lumSrc[j][i] * lumFilter[j]; |
Y2 += lumSrc[j][i+1] * lumFilter[j]; |
} |
Y1 >>= 19; |
Y2 >>= 19; |
if ((Y1 | Y2) & 0x100) { |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
} |
if (c->dither == SWS_DITHER_ED) { |
Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; |
c->dither_error[0][i] = err; |
acc = 2*acc + (Y1 >= 128); |
Y1 -= 220*(acc&1); |
err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4); |
c->dither_error[0][i+1] = Y1; |
acc = 2*acc + (err >= 128); |
err -= 220*(acc&1); |
} else { |
accumulate_bit(acc, Y1 + d128[(i + 0) & 7]); |
accumulate_bit(acc, Y2 + d128[(i + 1) & 7]); |
} |
if ((i & 7) == 6) { |
output_pixel(*dest++, acc); |
} |
} |
c->dither_error[0][i] = err; |
if (i & 6) { |
output_pixel(*dest, acc); |
} |
} |
static av_always_inline void |
yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, int dstW, |
int yalpha, int uvalpha, int y, |
enum AVPixelFormat target) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1]; |
const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; |
int yalpha1 = 4096 - yalpha; |
int i; |
if (c->dither == SWS_DITHER_ED) { |
int err = 0; |
int acc = 0; |
for (i = 0; i < dstW; i +=2) { |
int Y; |
Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; |
Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; |
c->dither_error[0][i] = err; |
acc = 2*acc + (Y >= 128); |
Y -= 220*(acc&1); |
err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; |
err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4; |
c->dither_error[0][i+1] = Y; |
acc = 2*acc + (err >= 128); |
err -= 220*(acc&1); |
if ((i & 7) == 6) |
output_pixel(*dest++, acc); |
} |
c->dither_error[0][i] = err; |
} else { |
for (i = 0; i < dstW; i += 8) { |
int Y, acc = 0; |
Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[0]); |
Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[1]); |
Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[2]); |
Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[3]); |
Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[4]); |
Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[5]); |
Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[6]); |
Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; |
accumulate_bit(acc, Y + d128[7]); |
output_pixel(*dest++, acc); |
} |
} |
} |
static av_always_inline void |
yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, int dstW, |
int uvalpha, int y, enum AVPixelFormat target) |
{ |
const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; |
int i; |
if (c->dither == SWS_DITHER_ED) { |
int err = 0; |
int acc = 0; |
for (i = 0; i < dstW; i +=2) { |
int Y; |
Y = ((buf0[i + 0] + 64) >> 7); |
Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4; |
c->dither_error[0][i] = err; |
acc = 2*acc + (Y >= 128); |
Y -= 220*(acc&1); |
err = ((buf0[i + 1] + 64) >> 7); |
err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4; |
c->dither_error[0][i+1] = Y; |
acc = 2*acc + (err >= 128); |
err -= 220*(acc&1); |
if ((i & 7) == 6) |
output_pixel(*dest++, acc); |
} |
c->dither_error[0][i] = err; |
} else { |
for (i = 0; i < dstW; i += 8) { |
int acc = 0; |
accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); |
accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); |
accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); |
accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); |
accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); |
accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); |
accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); |
accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); |
output_pixel(*dest++, acc); |
} |
} |
} |
#undef output_pixel |
#undef accumulate_bit |
#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \ |
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ |
const int16_t **lumSrc, int lumFilterSize, \ |
const int16_t *chrFilter, const int16_t **chrUSrc, \ |
const int16_t **chrVSrc, int chrFilterSize, \ |
const int16_t **alpSrc, uint8_t *dest, int dstW, \ |
int y) \ |
{ \ |
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ |
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ |
alpSrc, dest, dstW, y, fmt); \ |
} \ |
\ |
static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ |
const int16_t *ubuf[2], const int16_t *vbuf[2], \ |
const int16_t *abuf[2], uint8_t *dest, int dstW, \ |
int yalpha, int uvalpha, int y) \ |
{ \ |
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ |
dest, dstW, yalpha, uvalpha, y, fmt); \ |
} \ |
\ |
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ |
const int16_t *ubuf[2], const int16_t *vbuf[2], \ |
const int16_t *abuf0, uint8_t *dest, int dstW, \ |
int uvalpha, int y) \ |
{ \ |
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \ |
abuf0, dest, dstW, uvalpha, \ |
y, fmt); \ |
} |
YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE) |
YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK) |
#define output_pixels(pos, Y1, U, Y2, V) \ |
if (target == AV_PIX_FMT_YUYV422) { \ |
dest[pos + 0] = Y1; \ |
dest[pos + 1] = U; \ |
dest[pos + 2] = Y2; \ |
dest[pos + 3] = V; \ |
} else if (target == AV_PIX_FMT_YVYU422) { \ |
dest[pos + 0] = Y1; \ |
dest[pos + 1] = V; \ |
dest[pos + 2] = Y2; \ |
dest[pos + 3] = U; \ |
} else { /* AV_PIX_FMT_UYVY422 */ \ |
dest[pos + 0] = U; \ |
dest[pos + 1] = Y1; \ |
dest[pos + 2] = V; \ |
dest[pos + 3] = Y2; \ |
} |
static av_always_inline void |
yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t *dest, int dstW, |
int y, enum AVPixelFormat target) |
{ |
int i; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int j; |
int Y1 = 1 << 18; |
int Y2 = 1 << 18; |
int U = 1 << 18; |
int V = 1 << 18; |
for (j = 0; j < lumFilterSize; j++) { |
Y1 += lumSrc[j][i * 2] * lumFilter[j]; |
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; |
} |
for (j = 0; j < chrFilterSize; j++) { |
U += chrUSrc[j][i] * chrFilter[j]; |
V += chrVSrc[j][i] * chrFilter[j]; |
} |
Y1 >>= 19; |
Y2 >>= 19; |
U >>= 19; |
V >>= 19; |
if ((Y1 | Y2 | U | V) & 0x100) { |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
U = av_clip_uint8(U); |
V = av_clip_uint8(V); |
} |
output_pixels(4*i, Y1, U, Y2, V); |
} |
} |
static av_always_inline void |
yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, int dstW, |
int yalpha, int uvalpha, int y, |
enum AVPixelFormat target) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1], |
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; |
int yalpha1 = 4096 - yalpha; |
int uvalpha1 = 4096 - uvalpha; |
int i; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; |
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; |
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; |
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; |
if ((Y1 | Y2 | U | V) & 0x100) { |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
U = av_clip_uint8(U); |
V = av_clip_uint8(V); |
} |
output_pixels(i * 4, Y1, U, Y2, V); |
} |
} |
static av_always_inline void |
yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, int dstW, |
int uvalpha, int y, enum AVPixelFormat target) |
{ |
const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; |
int i; |
if (uvalpha < 2048) { |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2 ]+64) >> 7; |
int Y2 = (buf0[i * 2 + 1]+64) >> 7; |
int U = (ubuf0[i] +64) >> 7; |
int V = (vbuf0[i] +64) >> 7; |
if ((Y1 | Y2 | U | V) & 0x100) { |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
U = av_clip_uint8(U); |
V = av_clip_uint8(V); |
} |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
U = av_clip_uint8(U); |
V = av_clip_uint8(V); |
output_pixels(i * 4, Y1, U, Y2, V); |
} |
} else { |
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2 ] + 64) >> 7; |
int Y2 = (buf0[i * 2 + 1] + 64) >> 7; |
int U = (ubuf0[i] + ubuf1[i]+128) >> 8; |
int V = (vbuf0[i] + vbuf1[i]+128) >> 8; |
if ((Y1 | Y2 | U | V) & 0x100) { |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
U = av_clip_uint8(U); |
V = av_clip_uint8(V); |
} |
Y1 = av_clip_uint8(Y1); |
Y2 = av_clip_uint8(Y2); |
U = av_clip_uint8(U); |
V = av_clip_uint8(V); |
output_pixels(i * 4, Y1, U, Y2, V); |
} |
} |
} |
#undef output_pixels |
YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422) |
YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422) |
YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422) |
#define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B) |
#define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R) |
#define output_pixel(pos, val) \ |
if (isBE(target)) { \ |
AV_WB16(pos, val); \ |
} else { \ |
AV_WL16(pos, val); \ |
} |
static av_always_inline void |
yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, |
const int32_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int32_t **chrUSrc, |
const int32_t **chrVSrc, int chrFilterSize, |
const int32_t **alpSrc, uint16_t *dest, int dstW, |
int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) |
{ |
int i; |
int A1 = 0xffff<<14, A2 = 0xffff<<14; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int j; |
int Y1 = -0x40000000; |
int Y2 = -0x40000000; |
int U = -128 << 23; // 19 |
int V = -128 << 23; |
int R, G, B; |
for (j = 0; j < lumFilterSize; j++) { |
Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j]; |
Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; |
} |
for (j = 0; j < chrFilterSize; j++) {; |
U += chrUSrc[j][i] * (unsigned)chrFilter[j]; |
V += chrVSrc[j][i] * (unsigned)chrFilter[j]; |
} |
if (hasAlpha) { |
A1 = -0x40000000; |
A2 = -0x40000000; |
for (j = 0; j < lumFilterSize; j++) { |
A1 += alpSrc[j][i * 2] * (unsigned)lumFilter[j]; |
A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; |
} |
A1 >>= 1; |
A1 += 0x20002000; |
A2 >>= 1; |
A2 += 0x20002000; |
} |
// 8bit: 12+15=27; 16-bit: 12+19=31 |
Y1 >>= 14; // 10 |
Y1 += 0x10000; |
Y2 >>= 14; |
Y2 += 0x10000; |
U >>= 14; |
V >>= 14; |
// 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit |
Y1 -= c->yuv2rgb_y_offset; |
Y2 -= c->yuv2rgb_y_offset; |
Y1 *= c->yuv2rgb_y_coeff; |
Y2 *= c->yuv2rgb_y_coeff; |
Y1 += 1 << 13; // 21 |
Y2 += 1 << 13; |
// 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
// 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); |
output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); |
dest += 8; |
} else { |
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); |
dest += 6; |
} |
} |
} |
static av_always_inline void |
yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], |
const int32_t *ubuf[2], const int32_t *vbuf[2], |
const int32_t *abuf[2], uint16_t *dest, int dstW, |
int yalpha, int uvalpha, int y, |
enum AVPixelFormat target, int hasAlpha, int eightbytes) |
{ |
const int32_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1], |
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1], |
*abuf0 = hasAlpha ? abuf[0] : NULL, |
*abuf1 = hasAlpha ? abuf[1] : NULL; |
int yalpha1 = 4096 - yalpha; |
int uvalpha1 = 4096 - uvalpha; |
int i; |
int A1 = 0xffff<<14, A2 = 0xffff<<14; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14; |
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14; |
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; |
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; |
int R, G, B; |
Y1 -= c->yuv2rgb_y_offset; |
Y2 -= c->yuv2rgb_y_offset; |
Y1 *= c->yuv2rgb_y_coeff; |
Y2 *= c->yuv2rgb_y_coeff; |
Y1 += 1 << 13; |
Y2 += 1 << 13; |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
if (hasAlpha) { |
A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 1; |
A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1; |
A1 += 1 << 13; |
A2 += 1 << 13; |
} |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); |
output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); |
dest += 8; |
} else { |
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); |
dest += 6; |
} |
} |
} |
static av_always_inline void |
yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, |
const int32_t *ubuf[2], const int32_t *vbuf[2], |
const int32_t *abuf0, uint16_t *dest, int dstW, |
int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) |
{ |
const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; |
int i; |
int A1 = 0xffff<<14, A2= 0xffff<<14; |
if (uvalpha < 2048) { |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2] ) >> 2; |
int Y2 = (buf0[i * 2 + 1]) >> 2; |
int U = (ubuf0[i] + (-128 << 11)) >> 2; |
int V = (vbuf0[i] + (-128 << 11)) >> 2; |
int R, G, B; |
Y1 -= c->yuv2rgb_y_offset; |
Y2 -= c->yuv2rgb_y_offset; |
Y1 *= c->yuv2rgb_y_coeff; |
Y2 *= c->yuv2rgb_y_coeff; |
Y1 += 1 << 13; |
Y2 += 1 << 13; |
if (hasAlpha) { |
A1 = abuf0[i * 2 ] << 11; |
A2 = abuf0[i * 2 + 1] << 11; |
A1 += 1 << 13; |
A2 += 1 << 13; |
} |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); |
output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); |
dest += 8; |
} else { |
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); |
dest += 6; |
} |
} |
} else { |
const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; |
int A1 = 0xffff<<14, A2 = 0xffff<<14; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2] ) >> 2; |
int Y2 = (buf0[i * 2 + 1]) >> 2; |
int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; |
int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; |
int R, G, B; |
Y1 -= c->yuv2rgb_y_offset; |
Y2 -= c->yuv2rgb_y_offset; |
Y1 *= c->yuv2rgb_y_coeff; |
Y2 *= c->yuv2rgb_y_coeff; |
Y1 += 1 << 13; |
Y2 += 1 << 13; |
if (hasAlpha) { |
A1 = abuf0[i * 2 ] << 11; |
A2 = abuf0[i * 2 + 1] << 11; |
A1 += 1 << 13; |
A2 += 1 << 13; |
} |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); |
output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); |
dest += 8; |
} else { |
output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); |
output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); |
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); |
dest += 6; |
} |
} |
} |
} |
static av_always_inline void |
yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter, |
const int32_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int32_t **chrUSrc, |
const int32_t **chrVSrc, int chrFilterSize, |
const int32_t **alpSrc, uint16_t *dest, int dstW, |
int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) |
{ |
int i; |
int A = 0xffff<<14; |
for (i = 0; i < dstW; i++) { |
int j; |
int Y = -0x40000000; |
int U = -128 << 23; // 19 |
int V = -128 << 23; |
int R, G, B; |
for (j = 0; j < lumFilterSize; j++) { |
Y += lumSrc[j][i] * (unsigned)lumFilter[j]; |
} |
for (j = 0; j < chrFilterSize; j++) {; |
U += chrUSrc[j][i] * (unsigned)chrFilter[j]; |
V += chrVSrc[j][i] * (unsigned)chrFilter[j]; |
} |
if (hasAlpha) { |
A = -0x40000000; |
for (j = 0; j < lumFilterSize; j++) { |
A += alpSrc[j][i] * (unsigned)lumFilter[j]; |
} |
A >>= 1; |
A += 0x20002000; |
} |
// 8bit: 12+15=27; 16-bit: 12+19=31 |
Y >>= 14; // 10 |
Y += 0x10000; |
U >>= 14; |
V >>= 14; |
// 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit |
Y -= c->yuv2rgb_y_offset; |
Y *= c->yuv2rgb_y_coeff; |
Y += 1 << 13; // 21 |
// 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
// 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); |
dest += 4; |
} else { |
dest += 3; |
} |
} |
} |
static av_always_inline void |
yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2], |
const int32_t *ubuf[2], const int32_t *vbuf[2], |
const int32_t *abuf[2], uint16_t *dest, int dstW, |
int yalpha, int uvalpha, int y, |
enum AVPixelFormat target, int hasAlpha, int eightbytes) |
{ |
const int32_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1], |
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1], |
*abuf0 = hasAlpha ? abuf[0] : NULL, |
*abuf1 = hasAlpha ? abuf[1] : NULL; |
int yalpha1 = 4096 - yalpha; |
int uvalpha1 = 4096 - uvalpha; |
int i; |
int A = 0xffff<<14; |
for (i = 0; i < dstW; i++) { |
int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 14; |
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; |
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; |
int R, G, B; |
Y -= c->yuv2rgb_y_offset; |
Y *= c->yuv2rgb_y_coeff; |
Y += 1 << 13; |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
if (hasAlpha) { |
A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1; |
A += 1 << 13; |
} |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); |
dest += 4; |
} else { |
dest += 3; |
} |
} |
} |
static av_always_inline void |
yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, |
const int32_t *ubuf[2], const int32_t *vbuf[2], |
const int32_t *abuf0, uint16_t *dest, int dstW, |
int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) |
{ |
const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; |
int i; |
int A = 0xffff<<14; |
if (uvalpha < 2048) { |
for (i = 0; i < dstW; i++) { |
int Y = (buf0[i]) >> 2; |
int U = (ubuf0[i] + (-128 << 11)) >> 2; |
int V = (vbuf0[i] + (-128 << 11)) >> 2; |
int R, G, B; |
Y -= c->yuv2rgb_y_offset; |
Y *= c->yuv2rgb_y_coeff; |
Y += 1 << 13; |
if (hasAlpha) { |
A = abuf0[i] << 11; |
A += 1 << 13; |
} |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); |
dest += 4; |
} else { |
dest += 3; |
} |
} |
} else { |
const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; |
int A = 0xffff<<14; |
for (i = 0; i < dstW; i++) { |
int Y = (buf0[i] ) >> 2; |
int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; |
int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; |
int R, G, B; |
Y -= c->yuv2rgb_y_offset; |
Y *= c->yuv2rgb_y_coeff; |
Y += 1 << 13; |
if (hasAlpha) { |
A = abuf0[i] << 11; |
A += 1 << 13; |
} |
R = V * c->yuv2rgb_v2r_coeff; |
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = U * c->yuv2rgb_u2b_coeff; |
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); |
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); |
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); |
if (eightbytes) { |
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); |
dest += 4; |
} else { |
dest += 3; |
} |
} |
} |
} |
#undef output_pixel |
#undef r_b |
#undef b_r |
#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \ |
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ |
const int16_t **_lumSrc, int lumFilterSize, \ |
const int16_t *chrFilter, const int16_t **_chrUSrc, \ |
const int16_t **_chrVSrc, int chrFilterSize, \ |
const int16_t **_alpSrc, uint8_t *_dest, int dstW, \ |
int y) \ |
{ \ |
const int32_t **lumSrc = (const int32_t **) _lumSrc, \ |
**chrUSrc = (const int32_t **) _chrUSrc, \ |
**chrVSrc = (const int32_t **) _chrVSrc, \ |
**alpSrc = (const int32_t **) _alpSrc; \ |
uint16_t *dest = (uint16_t *) _dest; \ |
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ |
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ |
alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \ |
} \ |
\ |
static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ |
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \ |
const int16_t *_abuf[2], uint8_t *_dest, int dstW, \ |
int yalpha, int uvalpha, int y) \ |
{ \ |
const int32_t **buf = (const int32_t **) _buf, \ |
**ubuf = (const int32_t **) _ubuf, \ |
**vbuf = (const int32_t **) _vbuf, \ |
**abuf = (const int32_t **) _abuf; \ |
uint16_t *dest = (uint16_t *) _dest; \ |
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ |
dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \ |
} \ |
\ |
static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ |
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \ |
const int16_t *_abuf0, uint8_t *_dest, int dstW, \ |
int uvalpha, int y) \ |
{ \ |
const int32_t *buf0 = (const int32_t *) _buf0, \ |
**ubuf = (const int32_t **) _ubuf, \ |
**vbuf = (const int32_t **) _vbuf, \ |
*abuf0 = (const int32_t *) _abuf0; \ |
uint16_t *dest = (uint16_t *) _dest; \ |
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ |
dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \ |
} |
YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1) |
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1) |
/* |
* Write out 2 RGB pixels in the target pixel format. This function takes a |
* R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of |
* things like endianness conversion and shifting. The caller takes care of |
* setting the correct offset in these tables from the chroma (U/V) values. |
* This function then uses the luminance (Y1/Y2) values to write out the |
* correct RGB values into the destination buffer. |
*/ |
static av_always_inline void |
yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, |
unsigned A1, unsigned A2, |
const void *_r, const void *_g, const void *_b, int y, |
enum AVPixelFormat target, int hasAlpha) |
{ |
if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA || |
target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) { |
uint32_t *dest = (uint32_t *) _dest; |
const uint32_t *r = (const uint32_t *) _r; |
const uint32_t *g = (const uint32_t *) _g; |
const uint32_t *b = (const uint32_t *) _b; |
#if CONFIG_SMALL |
int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0; |
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0); |
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0); |
#else |
if (hasAlpha) { |
int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24; |
av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0); |
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh); |
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh); |
} else { |
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1 |
int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24; |
av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF); |
#endif |
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1]; |
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2]; |
} |
#endif |
} else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) { |
uint8_t *dest = (uint8_t *) _dest; |
const uint8_t *r = (const uint8_t *) _r; |
const uint8_t *g = (const uint8_t *) _g; |
const uint8_t *b = (const uint8_t *) _b; |
#define r_b ((target == AV_PIX_FMT_RGB24) ? r : b) |
#define b_r ((target == AV_PIX_FMT_RGB24) ? b : r) |
dest[i * 6 + 0] = r_b[Y1]; |
dest[i * 6 + 1] = g[Y1]; |
dest[i * 6 + 2] = b_r[Y1]; |
dest[i * 6 + 3] = r_b[Y2]; |
dest[i * 6 + 4] = g[Y2]; |
dest[i * 6 + 5] = b_r[Y2]; |
#undef r_b |
#undef b_r |
} else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 || |
target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 || |
target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) { |
uint16_t *dest = (uint16_t *) _dest; |
const uint16_t *r = (const uint16_t *) _r; |
const uint16_t *g = (const uint16_t *) _g; |
const uint16_t *b = (const uint16_t *) _b; |
int dr1, dg1, db1, dr2, dg2, db2; |
if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) { |
dr1 = ff_dither_2x2_8[ y & 1 ][0]; |
dg1 = ff_dither_2x2_4[ y & 1 ][0]; |
db1 = ff_dither_2x2_8[(y & 1) ^ 1][0]; |
dr2 = ff_dither_2x2_8[ y & 1 ][1]; |
dg2 = ff_dither_2x2_4[ y & 1 ][1]; |
db2 = ff_dither_2x2_8[(y & 1) ^ 1][1]; |
} else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) { |
dr1 = ff_dither_2x2_8[ y & 1 ][0]; |
dg1 = ff_dither_2x2_8[ y & 1 ][1]; |
db1 = ff_dither_2x2_8[(y & 1) ^ 1][0]; |
dr2 = ff_dither_2x2_8[ y & 1 ][1]; |
dg2 = ff_dither_2x2_8[ y & 1 ][0]; |
db2 = ff_dither_2x2_8[(y & 1) ^ 1][1]; |
} else { |
dr1 = ff_dither_4x4_16[ y & 3 ][0]; |
dg1 = ff_dither_4x4_16[ y & 3 ][1]; |
db1 = ff_dither_4x4_16[(y & 3) ^ 3][0]; |
dr2 = ff_dither_4x4_16[ y & 3 ][1]; |
dg2 = ff_dither_4x4_16[ y & 3 ][0]; |
db2 = ff_dither_4x4_16[(y & 3) ^ 3][1]; |
} |
dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; |
dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; |
} else /* 8/4-bit */ { |
uint8_t *dest = (uint8_t *) _dest; |
const uint8_t *r = (const uint8_t *) _r; |
const uint8_t *g = (const uint8_t *) _g; |
const uint8_t *b = (const uint8_t *) _b; |
int dr1, dg1, db1, dr2, dg2, db2; |
if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) { |
const uint8_t * const d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t * const d32 = ff_dither_8x8_32[y & 7]; |
dr1 = dg1 = d32[(i * 2 + 0) & 7]; |
db1 = d64[(i * 2 + 0) & 7]; |
dr2 = dg2 = d32[(i * 2 + 1) & 7]; |
db2 = d64[(i * 2 + 1) & 7]; |
} else { |
const uint8_t * const d64 = ff_dither_8x8_73 [y & 7]; |
const uint8_t * const d128 = ff_dither_8x8_220[y & 7]; |
dr1 = db1 = d128[(i * 2 + 0) & 7]; |
dg1 = d64[(i * 2 + 0) & 7]; |
dr2 = db2 = d128[(i * 2 + 1) & 7]; |
dg2 = d64[(i * 2 + 1) & 7]; |
} |
if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) { |
dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] + |
((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4); |
} else { |
dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; |
dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; |
} |
} |
} |
static av_always_inline void |
yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t *dest, int dstW, |
int y, enum AVPixelFormat target, int hasAlpha) |
{ |
int i; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int j, A1, A2; |
int Y1 = 1 << 18; |
int Y2 = 1 << 18; |
int U = 1 << 18; |
int V = 1 << 18; |
const void *r, *g, *b; |
for (j = 0; j < lumFilterSize; j++) { |
Y1 += lumSrc[j][i * 2] * lumFilter[j]; |
Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; |
} |
for (j = 0; j < chrFilterSize; j++) { |
U += chrUSrc[j][i] * chrFilter[j]; |
V += chrVSrc[j][i] * chrFilter[j]; |
} |
Y1 >>= 19; |
Y2 >>= 19; |
U >>= 19; |
V >>= 19; |
if (hasAlpha) { |
A1 = 1 << 18; |
A2 = 1 << 18; |
for (j = 0; j < lumFilterSize; j++) { |
A1 += alpSrc[j][i * 2 ] * lumFilter[j]; |
A2 += alpSrc[j][i * 2 + 1] * lumFilter[j]; |
} |
A1 >>= 19; |
A2 >>= 19; |
if ((A1 | A2) & 0x100) { |
A1 = av_clip_uint8(A1); |
A2 = av_clip_uint8(A2); |
} |
} |
r = c->table_rV[V + YUVRGB_TABLE_HEADROOM]; |
g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]); |
b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; |
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, |
r, g, b, y, target, hasAlpha); |
} |
} |
static av_always_inline void |
yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, int dstW, |
int yalpha, int uvalpha, int y, |
enum AVPixelFormat target, int hasAlpha) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1], |
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1], |
*abuf0 = hasAlpha ? abuf[0] : NULL, |
*abuf1 = hasAlpha ? abuf[1] : NULL; |
int yalpha1 = 4096 - yalpha; |
int uvalpha1 = 4096 - uvalpha; |
int i; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; |
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; |
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; |
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; |
int A1, A2; |
const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], |
*g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), |
*b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; |
if (hasAlpha) { |
A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19; |
A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19; |
A1 = av_clip_uint8(A1); |
A2 = av_clip_uint8(A2); |
} |
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, |
r, g, b, y, target, hasAlpha); |
} |
} |
static av_always_inline void |
yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, int dstW, |
int uvalpha, int y, enum AVPixelFormat target, |
int hasAlpha) |
{ |
const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; |
int i; |
if (uvalpha < 2048) { |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2 ] + 64) >> 7; |
int Y2 = (buf0[i * 2 + 1] + 64) >> 7; |
int U = (ubuf0[i] + 64) >> 7; |
int V = (vbuf0[i] + 64) >> 7; |
int A1, A2; |
const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], |
*g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), |
*b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; |
if (hasAlpha) { |
A1 = abuf0[i * 2 ] * 255 + 16384 >> 15; |
A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15; |
A1 = av_clip_uint8(A1); |
A2 = av_clip_uint8(A2); |
} |
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, |
r, g, b, y, target, hasAlpha); |
} |
} else { |
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; |
for (i = 0; i < ((dstW + 1) >> 1); i++) { |
int Y1 = (buf0[i * 2 ] + 64) >> 7; |
int Y2 = (buf0[i * 2 + 1] + 64) >> 7; |
int U = (ubuf0[i] + ubuf1[i] + 128) >> 8; |
int V = (vbuf0[i] + vbuf1[i] + 128) >> 8; |
int A1, A2; |
const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], |
*g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), |
*b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; |
if (hasAlpha) { |
A1 = (abuf0[i * 2 ] + 64) >> 7; |
A2 = (abuf0[i * 2 + 1] + 64) >> 7; |
A1 = av_clip_uint8(A1); |
A2 = av_clip_uint8(A2); |
} |
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, |
r, g, b, y, target, hasAlpha); |
} |
} |
} |
#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ |
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ |
const int16_t **lumSrc, int lumFilterSize, \ |
const int16_t *chrFilter, const int16_t **chrUSrc, \ |
const int16_t **chrVSrc, int chrFilterSize, \ |
const int16_t **alpSrc, uint8_t *dest, int dstW, \ |
int y) \ |
{ \ |
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ |
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ |
alpSrc, dest, dstW, y, fmt, hasAlpha); \ |
} |
#define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \ |
YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ |
static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ |
const int16_t *ubuf[2], const int16_t *vbuf[2], \ |
const int16_t *abuf[2], uint8_t *dest, int dstW, \ |
int yalpha, int uvalpha, int y) \ |
{ \ |
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ |
dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ |
} |
#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \ |
YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \ |
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ |
const int16_t *ubuf[2], const int16_t *vbuf[2], \ |
const int16_t *abuf0, uint8_t *dest, int dstW, \ |
int uvalpha, int y) \ |
{ \ |
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ |
dstW, uvalpha, y, fmt, hasAlpha); \ |
} |
#if CONFIG_SMALL |
YUV2RGBWRAPPER(yuv2rgb,, 32_1, AV_PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
YUV2RGBWRAPPER(yuv2rgb,, 32, AV_PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
#else |
#if CONFIG_SWSCALE_ALPHA |
YUV2RGBWRAPPER(yuv2rgb,, a32_1, AV_PIX_FMT_RGB32_1, 1) |
YUV2RGBWRAPPER(yuv2rgb,, a32, AV_PIX_FMT_RGB32, 1) |
#endif |
YUV2RGBWRAPPER(yuv2rgb,, x32_1, AV_PIX_FMT_RGB32_1, 0) |
YUV2RGBWRAPPER(yuv2rgb,, x32, AV_PIX_FMT_RGB32, 0) |
#endif |
YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24, 0) |
YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24, 0) |
YUV2RGBWRAPPER(yuv2rgb,, 16, AV_PIX_FMT_RGB565, 0) |
YUV2RGBWRAPPER(yuv2rgb,, 15, AV_PIX_FMT_RGB555, 0) |
YUV2RGBWRAPPER(yuv2rgb,, 12, AV_PIX_FMT_RGB444, 0) |
YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0) |
YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0) |
YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0) |
static av_always_inline void yuv2rgb_write_full(SwsContext *c, |
uint8_t *dest, int i, int Y, int A, int U, int V, |
int y, enum AVPixelFormat target, int hasAlpha, int err[4]) |
{ |
int R, G, B; |
int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8; |
Y -= c->yuv2rgb_y_offset; |
Y *= c->yuv2rgb_y_coeff; |
Y += 1 << 21; |
R = Y + V*c->yuv2rgb_v2r_coeff; |
G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; |
B = Y + U*c->yuv2rgb_u2b_coeff; |
if ((R | G | B) & 0xC0000000) { |
R = av_clip_uintp2(R, 30); |
G = av_clip_uintp2(G, 30); |
B = av_clip_uintp2(B, 30); |
} |
switch(target) { |
case AV_PIX_FMT_ARGB: |
dest[0] = hasAlpha ? A : 255; |
dest[1] = R >> 22; |
dest[2] = G >> 22; |
dest[3] = B >> 22; |
break; |
case AV_PIX_FMT_RGB24: |
dest[0] = R >> 22; |
dest[1] = G >> 22; |
dest[2] = B >> 22; |
break; |
case AV_PIX_FMT_RGBA: |
dest[0] = R >> 22; |
dest[1] = G >> 22; |
dest[2] = B >> 22; |
dest[3] = hasAlpha ? A : 255; |
break; |
case AV_PIX_FMT_ABGR: |
dest[0] = hasAlpha ? A : 255; |
dest[1] = B >> 22; |
dest[2] = G >> 22; |
dest[3] = R >> 22; |
break; |
case AV_PIX_FMT_BGR24: |
dest[0] = B >> 22; |
dest[1] = G >> 22; |
dest[2] = R >> 22; |
break; |
case AV_PIX_FMT_BGRA: |
dest[0] = B >> 22; |
dest[1] = G >> 22; |
dest[2] = R >> 22; |
dest[3] = hasAlpha ? A : 255; |
break; |
case AV_PIX_FMT_BGR4_BYTE: |
case AV_PIX_FMT_RGB4_BYTE: |
case AV_PIX_FMT_BGR8: |
case AV_PIX_FMT_RGB8: |
{ |
int r,g,b; |
switch (c->dither) { |
default: |
case SWS_DITHER_AUTO: |
case SWS_DITHER_ED: |
R >>= 22; |
G >>= 22; |
B >>= 22; |
R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4; |
G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4; |
B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4; |
c->dither_error[0][i] = err[0]; |
c->dither_error[1][i] = err[1]; |
c->dither_error[2][i] = err[2]; |
r = R >> (isrgb8 ? 5 : 7); |
g = G >> (isrgb8 ? 5 : 6); |
b = B >> (isrgb8 ? 6 : 7); |
r = av_clip(r, 0, isrgb8 ? 7 : 1); |
g = av_clip(g, 0, isrgb8 ? 7 : 3); |
b = av_clip(b, 0, isrgb8 ? 3 : 1); |
err[0] = R - r*(isrgb8 ? 36 : 255); |
err[1] = G - g*(isrgb8 ? 36 : 85); |
err[2] = B - b*(isrgb8 ? 85 : 255); |
break; |
case SWS_DITHER_A_DITHER: |
if (isrgb8) { |
/* see http://pippin.gimp.org/a_dither/ for details/origin */ |
#define A_DITHER(u,v) (((((u)+((v)*236))*119)&0xff)) |
r = (((R >> 19) + A_DITHER(i,y) -96)>>8); |
g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8); |
b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8); |
r = av_clip_uintp2(r, 3); |
g = av_clip_uintp2(g, 3); |
b = av_clip_uintp2(b, 2); |
} else { |
r = (((R >> 21) + A_DITHER(i,y)-256)>>8); |
g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8); |
b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8); |
r = av_clip_uintp2(r, 1); |
g = av_clip_uintp2(g, 2); |
b = av_clip_uintp2(b, 1); |
} |
break; |
case SWS_DITHER_X_DITHER: |
if (isrgb8) { |
/* see http://pippin.gimp.org/a_dither/ for details/origin */ |
#define X_DITHER(u,v) (((((u)^((v)*237))*181)&0x1ff)/2) |
r = (((R >> 19) + X_DITHER(i,y) - 96)>>8); |
g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8); |
b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8); |
r = av_clip_uintp2(r, 3); |
g = av_clip_uintp2(g, 3); |
b = av_clip_uintp2(b, 2); |
} else { |
r = (((R >> 21) + X_DITHER(i,y)-256)>>8); |
g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8); |
b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8); |
r = av_clip_uintp2(r, 1); |
g = av_clip_uintp2(g, 2); |
b = av_clip_uintp2(b, 1); |
} |
break; |
} |
if(target == AV_PIX_FMT_BGR4_BYTE) { |
dest[0] = r + 2*g + 8*b; |
} else if(target == AV_PIX_FMT_RGB4_BYTE) { |
dest[0] = b + 2*g + 8*r; |
} else if(target == AV_PIX_FMT_BGR8) { |
dest[0] = r + 8*g + 64*b; |
} else if(target == AV_PIX_FMT_RGB8) { |
dest[0] = b + 4*g + 32*r; |
} else |
av_assert2(0); |
break;} |
} |
} |
static av_always_inline void |
yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t *dest, |
int dstW, int y, enum AVPixelFormat target, int hasAlpha) |
{ |
int i; |
int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; |
int err[4] = {0}; |
int A = 0; //init to silence warning |
if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE |
|| target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) |
step = 1; |
for (i = 0; i < dstW; i++) { |
int j; |
int Y = 1<<9; |
int U = (1<<9)-(128 << 19); |
int V = (1<<9)-(128 << 19); |
for (j = 0; j < lumFilterSize; j++) { |
Y += lumSrc[j][i] * lumFilter[j]; |
} |
for (j = 0; j < chrFilterSize; j++) { |
U += chrUSrc[j][i] * chrFilter[j]; |
V += chrVSrc[j][i] * chrFilter[j]; |
} |
Y >>= 10; |
U >>= 10; |
V >>= 10; |
if (hasAlpha) { |
A = 1 << 18; |
for (j = 0; j < lumFilterSize; j++) { |
A += alpSrc[j][i] * lumFilter[j]; |
} |
A >>= 19; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); |
dest += step; |
} |
c->dither_error[0][i] = err[0]; |
c->dither_error[1][i] = err[1]; |
c->dither_error[2][i] = err[2]; |
} |
static av_always_inline void |
yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, int dstW, |
int yalpha, int uvalpha, int y, |
enum AVPixelFormat target, int hasAlpha) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1], |
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1], |
*abuf0 = hasAlpha ? abuf[0] : NULL, |
*abuf1 = hasAlpha ? abuf[1] : NULL; |
int yalpha1 = 4096 - yalpha; |
int uvalpha1 = 4096 - uvalpha; |
int i; |
int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; |
int err[4] = {0}; |
int A = 0; // init to silcene warning |
if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE |
|| target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) |
step = 1; |
for (i = 0; i < dstW; i++) { |
int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10; //FIXME rounding |
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10; |
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10; |
if (hasAlpha) { |
A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); |
dest += step; |
} |
c->dither_error[0][i] = err[0]; |
c->dither_error[1][i] = err[1]; |
c->dither_error[2][i] = err[2]; |
} |
static av_always_inline void |
yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, int dstW, |
int uvalpha, int y, enum AVPixelFormat target, |
int hasAlpha) |
{ |
const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; |
int i; |
int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4; |
int err[4] = {0}; |
if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE |
|| target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8) |
step = 1; |
if (uvalpha < 2048) { |
int A = 0; //init to silence warning |
for (i = 0; i < dstW; i++) { |
int Y = buf0[i] << 2; |
int U = (ubuf0[i] - (128<<7)) * 4; |
int V = (vbuf0[i] - (128<<7)) * 4; |
if (hasAlpha) { |
A = (abuf0[i] + 64) >> 7; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); |
dest += step; |
} |
} else { |
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; |
int A = 0; //init to silence warning |
for (i = 0; i < dstW; i++) { |
int Y = buf0[i] << 2; |
int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1; |
int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1; |
if (hasAlpha) { |
A = (abuf0[i] + 64) >> 7; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err); |
dest += step; |
} |
} |
c->dither_error[0][i] = err[0]; |
c->dither_error[1][i] = err[1]; |
c->dither_error[2][i] = err[2]; |
} |
#if CONFIG_SMALL |
YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
#else |
#if CONFIG_SWSCALE_ALPHA |
YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1) |
YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1) |
YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1) |
YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1) |
#endif |
YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0) |
#endif |
YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0) |
YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0) |
static void |
yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t **dest, |
int dstW, int y) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); |
int i; |
int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc; |
uint16_t **dest16 = (uint16_t**)dest; |
int SH = 22 + 7 - desc->comp[0].depth_minus1; |
int A = 0; // init to silence warning |
for (i = 0; i < dstW; i++) { |
int j; |
int Y = 1 << 9; |
int U = (1 << 9) - (128 << 19); |
int V = (1 << 9) - (128 << 19); |
int R, G, B; |
for (j = 0; j < lumFilterSize; j++) |
Y += lumSrc[j][i] * lumFilter[j]; |
for (j = 0; j < chrFilterSize; j++) { |
U += chrUSrc[j][i] * chrFilter[j]; |
V += chrVSrc[j][i] * chrFilter[j]; |
} |
Y >>= 10; |
U >>= 10; |
V >>= 10; |
if (hasAlpha) { |
A = 1 << 18; |
for (j = 0; j < lumFilterSize; j++) |
A += alpSrc[j][i] * lumFilter[j]; |
A >>= 19; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
Y -= c->yuv2rgb_y_offset; |
Y *= c->yuv2rgb_y_coeff; |
Y += 1 << 21; |
R = Y + V * c->yuv2rgb_v2r_coeff; |
G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; |
B = Y + U * c->yuv2rgb_u2b_coeff; |
if ((R | G | B) & 0xC0000000) { |
R = av_clip_uintp2(R, 30); |
G = av_clip_uintp2(G, 30); |
B = av_clip_uintp2(B, 30); |
} |
if (SH != 22) { |
dest16[0][i] = G >> SH; |
dest16[1][i] = B >> SH; |
dest16[2][i] = R >> SH; |
if (hasAlpha) |
dest16[3][i] = A; |
} else { |
dest[0][i] = G >> 22; |
dest[1][i] = B >> 22; |
dest[2][i] = R >> 22; |
if (hasAlpha) |
dest[3][i] = A; |
} |
} |
if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { |
for (i = 0; i < dstW; i++) { |
dest16[0][i] = av_bswap16(dest16[0][i]); |
dest16[1][i] = av_bswap16(dest16[1][i]); |
dest16[2][i] = av_bswap16(dest16[2][i]); |
if (hasAlpha) |
dest16[3][i] = av_bswap16(dest16[3][i]); |
} |
} |
} |
static void |
yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, int dstW, |
int uvalpha, int y) |
{ |
int hasAlpha = !!abuf0; |
int i; |
for (i = 0; i < dstW; i++) { |
int Y = (buf0[i] + 64) >> 7; |
int A; |
Y = av_clip_uint8(Y); |
if (hasAlpha) { |
A = (abuf0[i] + 64) >> 7; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
dest[i * 2 ] = Y; |
dest[i * 2 + 1] = hasAlpha ? A : 255; |
} |
} |
static void |
yuv2ya8_2_c(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, int dstW, |
int yalpha, int uvalpha, int y) |
{ |
int hasAlpha = abuf && abuf[0] && abuf[1]; |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*abuf0 = hasAlpha ? abuf[0] : NULL, |
*abuf1 = hasAlpha ? abuf[1] : NULL; |
int yalpha1 = 4096 - yalpha; |
int i; |
for (i = 0; i < dstW; i++) { |
int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 19; |
int A; |
Y = av_clip_uint8(Y); |
if (hasAlpha) { |
A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19; |
A = av_clip_uint8(A); |
} |
dest[i * 2 ] = Y; |
dest[i * 2 + 1] = hasAlpha ? A : 255; |
} |
} |
static void |
yuv2ya8_X_c(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t *dest, int dstW, int y) |
{ |
int hasAlpha = !!alpSrc; |
int i; |
for (i = 0; i < dstW; i++) { |
int j; |
int Y = 1 << 18, A = 1 << 18; |
for (j = 0; j < lumFilterSize; j++) |
Y += lumSrc[j][i] * lumFilter[j]; |
Y >>= 19; |
if (Y & 0x100) |
Y = av_clip_uint8(Y); |
if (hasAlpha) { |
for (j = 0; j < lumFilterSize; j++) |
A += alpSrc[j][i] * lumFilter[j]; |
A >>= 19; |
if (A & 0x100) |
A = av_clip_uint8(A); |
} |
dest[2 * i ] = Y; |
dest[2 * i + 1] = hasAlpha ? A : 255; |
} |
} |
static void |
yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter, |
const int16_t **_lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **_chrUSrc, |
const int16_t **_chrVSrc, int chrFilterSize, |
const int16_t **_alpSrc, uint8_t *dest, int dstW, int y) |
{ |
const int32_t **lumSrc = (const int32_t **) _lumSrc, |
**chrUSrc = (const int32_t **) _chrUSrc, |
**chrVSrc = (const int32_t **) _chrVSrc, |
**alpSrc = (const int32_t **) _alpSrc; |
int hasAlpha = !!alpSrc; |
int i; |
for (i = 0; i < dstW; i++) { |
int Y = 1 << 14, U = 1 << 14; |
int V = 1 << 14, A = 1 << 14; |
int j; |
Y -= 0x40000000; |
U -= 0x40000000; |
V -= 0x40000000; |
A -= 0x40000000; |
for (j = 0; j < lumFilterSize; j++) |
Y += lumSrc[j][i] * (unsigned)lumFilter[j]; |
for (j = 0; j < chrFilterSize; j++) |
U += chrUSrc[j][i] * (unsigned)chrFilter[j]; |
for (j = 0; j < chrFilterSize; j++) |
V += chrVSrc[j][i] * (unsigned)chrFilter[j]; |
if (hasAlpha) |
for (j = 0; j < lumFilterSize; j++) |
A += alpSrc[j][i] * (unsigned)lumFilter[j]; |
Y = 0x8000 + av_clip_int16(Y >> 15); |
U = 0x8000 + av_clip_int16(U >> 15); |
V = 0x8000 + av_clip_int16(V >> 15); |
A = 0x8000 + av_clip_int16(A >> 15); |
AV_WL16(dest + 8 * i, hasAlpha ? A : 65535); |
AV_WL16(dest + 8 * i + 2, Y); |
AV_WL16(dest + 8 * i + 4, U); |
AV_WL16(dest + 8 * i + 6, V); |
} |
} |
av_cold void ff_sws_init_output_funcs(SwsContext *c, |
yuv2planar1_fn *yuv2plane1, |
yuv2planarX_fn *yuv2planeX, |
yuv2interleavedX_fn *yuv2nv12cX, |
yuv2packed1_fn *yuv2packed1, |
yuv2packed2_fn *yuv2packed2, |
yuv2packedX_fn *yuv2packedX, |
yuv2anyX_fn *yuv2anyX) |
{ |
enum AVPixelFormat dstFormat = c->dstFormat; |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat); |
if (is16BPS(dstFormat)) { |
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c; |
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c; |
} else if (is9_OR_10BPS(dstFormat)) { |
if (desc->comp[0].depth_minus1 == 8) { |
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c; |
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c; |
} else if (desc->comp[0].depth_minus1 == 9) { |
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c; |
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c; |
} else if (desc->comp[0].depth_minus1 == 11) { |
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c : yuv2planeX_12LE_c; |
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c : yuv2plane1_12LE_c; |
} else if (desc->comp[0].depth_minus1 == 13) { |
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c : yuv2planeX_14LE_c; |
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c : yuv2plane1_14LE_c; |
} else |
av_assert0(0); |
} else { |
*yuv2plane1 = yuv2plane1_8_c; |
*yuv2planeX = yuv2planeX_8_c; |
if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21) |
*yuv2nv12cX = yuv2nv12cX_c; |
} |
if(c->flags & SWS_FULL_CHR_H_INT) { |
switch (dstFormat) { |
case AV_PIX_FMT_RGBA: |
#if CONFIG_SMALL |
*yuv2packedX = yuv2rgba32_full_X_c; |
*yuv2packed2 = yuv2rgba32_full_2_c; |
*yuv2packed1 = yuv2rgba32_full_1_c; |
#else |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2rgba32_full_X_c; |
*yuv2packed2 = yuv2rgba32_full_2_c; |
*yuv2packed1 = yuv2rgba32_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2rgbx32_full_X_c; |
*yuv2packed2 = yuv2rgbx32_full_2_c; |
*yuv2packed1 = yuv2rgbx32_full_1_c; |
} |
#endif /* !CONFIG_SMALL */ |
break; |
case AV_PIX_FMT_ARGB: |
#if CONFIG_SMALL |
*yuv2packedX = yuv2argb32_full_X_c; |
*yuv2packed2 = yuv2argb32_full_2_c; |
*yuv2packed1 = yuv2argb32_full_1_c; |
#else |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2argb32_full_X_c; |
*yuv2packed2 = yuv2argb32_full_2_c; |
*yuv2packed1 = yuv2argb32_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2xrgb32_full_X_c; |
*yuv2packed2 = yuv2xrgb32_full_2_c; |
*yuv2packed1 = yuv2xrgb32_full_1_c; |
} |
#endif /* !CONFIG_SMALL */ |
break; |
case AV_PIX_FMT_BGRA: |
#if CONFIG_SMALL |
*yuv2packedX = yuv2bgra32_full_X_c; |
*yuv2packed2 = yuv2bgra32_full_2_c; |
*yuv2packed1 = yuv2bgra32_full_1_c; |
#else |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2bgra32_full_X_c; |
*yuv2packed2 = yuv2bgra32_full_2_c; |
*yuv2packed1 = yuv2bgra32_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2bgrx32_full_X_c; |
*yuv2packed2 = yuv2bgrx32_full_2_c; |
*yuv2packed1 = yuv2bgrx32_full_1_c; |
} |
#endif /* !CONFIG_SMALL */ |
break; |
case AV_PIX_FMT_ABGR: |
#if CONFIG_SMALL |
*yuv2packedX = yuv2abgr32_full_X_c; |
*yuv2packed2 = yuv2abgr32_full_2_c; |
*yuv2packed1 = yuv2abgr32_full_1_c; |
#else |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2abgr32_full_X_c; |
*yuv2packed2 = yuv2abgr32_full_2_c; |
*yuv2packed1 = yuv2abgr32_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2xbgr32_full_X_c; |
*yuv2packed2 = yuv2xbgr32_full_2_c; |
*yuv2packed1 = yuv2xbgr32_full_1_c; |
} |
#endif /* !CONFIG_SMALL */ |
break; |
case AV_PIX_FMT_RGBA64LE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2rgba64le_full_X_c; |
*yuv2packed2 = yuv2rgba64le_full_2_c; |
*yuv2packed1 = yuv2rgba64le_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2rgbx64le_full_X_c; |
*yuv2packed2 = yuv2rgbx64le_full_2_c; |
*yuv2packed1 = yuv2rgbx64le_full_1_c; |
} |
break; |
case AV_PIX_FMT_RGBA64BE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2rgba64be_full_X_c; |
*yuv2packed2 = yuv2rgba64be_full_2_c; |
*yuv2packed1 = yuv2rgba64be_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2rgbx64be_full_X_c; |
*yuv2packed2 = yuv2rgbx64be_full_2_c; |
*yuv2packed1 = yuv2rgbx64be_full_1_c; |
} |
break; |
case AV_PIX_FMT_BGRA64LE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2bgra64le_full_X_c; |
*yuv2packed2 = yuv2bgra64le_full_2_c; |
*yuv2packed1 = yuv2bgra64le_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2bgrx64le_full_X_c; |
*yuv2packed2 = yuv2bgrx64le_full_2_c; |
*yuv2packed1 = yuv2bgrx64le_full_1_c; |
} |
break; |
case AV_PIX_FMT_BGRA64BE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packedX = yuv2bgra64be_full_X_c; |
*yuv2packed2 = yuv2bgra64be_full_2_c; |
*yuv2packed1 = yuv2bgra64be_full_1_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packedX = yuv2bgrx64be_full_X_c; |
*yuv2packed2 = yuv2bgrx64be_full_2_c; |
*yuv2packed1 = yuv2bgrx64be_full_1_c; |
} |
break; |
case AV_PIX_FMT_RGB24: |
*yuv2packedX = yuv2rgb24_full_X_c; |
*yuv2packed2 = yuv2rgb24_full_2_c; |
*yuv2packed1 = yuv2rgb24_full_1_c; |
break; |
case AV_PIX_FMT_BGR24: |
*yuv2packedX = yuv2bgr24_full_X_c; |
*yuv2packed2 = yuv2bgr24_full_2_c; |
*yuv2packed1 = yuv2bgr24_full_1_c; |
break; |
case AV_PIX_FMT_RGB48LE: |
*yuv2packedX = yuv2rgb48le_full_X_c; |
*yuv2packed2 = yuv2rgb48le_full_2_c; |
*yuv2packed1 = yuv2rgb48le_full_1_c; |
break; |
case AV_PIX_FMT_BGR48LE: |
*yuv2packedX = yuv2bgr48le_full_X_c; |
*yuv2packed2 = yuv2bgr48le_full_2_c; |
*yuv2packed1 = yuv2bgr48le_full_1_c; |
break; |
case AV_PIX_FMT_RGB48BE: |
*yuv2packedX = yuv2rgb48be_full_X_c; |
*yuv2packed2 = yuv2rgb48be_full_2_c; |
*yuv2packed1 = yuv2rgb48be_full_1_c; |
break; |
case AV_PIX_FMT_BGR48BE: |
*yuv2packedX = yuv2bgr48be_full_X_c; |
*yuv2packed2 = yuv2bgr48be_full_2_c; |
*yuv2packed1 = yuv2bgr48be_full_1_c; |
break; |
case AV_PIX_FMT_BGR4_BYTE: |
*yuv2packedX = yuv2bgr4_byte_full_X_c; |
*yuv2packed2 = yuv2bgr4_byte_full_2_c; |
*yuv2packed1 = yuv2bgr4_byte_full_1_c; |
break; |
case AV_PIX_FMT_RGB4_BYTE: |
*yuv2packedX = yuv2rgb4_byte_full_X_c; |
*yuv2packed2 = yuv2rgb4_byte_full_2_c; |
*yuv2packed1 = yuv2rgb4_byte_full_1_c; |
break; |
case AV_PIX_FMT_BGR8: |
*yuv2packedX = yuv2bgr8_full_X_c; |
*yuv2packed2 = yuv2bgr8_full_2_c; |
*yuv2packed1 = yuv2bgr8_full_1_c; |
break; |
case AV_PIX_FMT_RGB8: |
*yuv2packedX = yuv2rgb8_full_X_c; |
*yuv2packed2 = yuv2rgb8_full_2_c; |
*yuv2packed1 = yuv2rgb8_full_1_c; |
break; |
case AV_PIX_FMT_GBRP: |
case AV_PIX_FMT_GBRP9BE: |
case AV_PIX_FMT_GBRP9LE: |
case AV_PIX_FMT_GBRP10BE: |
case AV_PIX_FMT_GBRP10LE: |
case AV_PIX_FMT_GBRP12BE: |
case AV_PIX_FMT_GBRP12LE: |
case AV_PIX_FMT_GBRP14BE: |
case AV_PIX_FMT_GBRP14LE: |
case AV_PIX_FMT_GBRP16BE: |
case AV_PIX_FMT_GBRP16LE: |
case AV_PIX_FMT_GBRAP: |
*yuv2anyX = yuv2gbrp_full_X_c; |
break; |
} |
if (!*yuv2packedX && !*yuv2anyX) |
goto YUV_PACKED; |
} else { |
YUV_PACKED: |
switch (dstFormat) { |
case AV_PIX_FMT_RGBA64LE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packed1 = yuv2rgba64le_1_c; |
*yuv2packed2 = yuv2rgba64le_2_c; |
*yuv2packedX = yuv2rgba64le_X_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packed1 = yuv2rgbx64le_1_c; |
*yuv2packed2 = yuv2rgbx64le_2_c; |
*yuv2packedX = yuv2rgbx64le_X_c; |
} |
break; |
case AV_PIX_FMT_RGBA64BE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packed1 = yuv2rgba64be_1_c; |
*yuv2packed2 = yuv2rgba64be_2_c; |
*yuv2packedX = yuv2rgba64be_X_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packed1 = yuv2rgbx64be_1_c; |
*yuv2packed2 = yuv2rgbx64be_2_c; |
*yuv2packedX = yuv2rgbx64be_X_c; |
} |
break; |
case AV_PIX_FMT_BGRA64LE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packed1 = yuv2bgra64le_1_c; |
*yuv2packed2 = yuv2bgra64le_2_c; |
*yuv2packedX = yuv2bgra64le_X_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packed1 = yuv2bgrx64le_1_c; |
*yuv2packed2 = yuv2bgrx64le_2_c; |
*yuv2packedX = yuv2bgrx64le_X_c; |
} |
break; |
case AV_PIX_FMT_BGRA64BE: |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packed1 = yuv2bgra64be_1_c; |
*yuv2packed2 = yuv2bgra64be_2_c; |
*yuv2packedX = yuv2bgra64be_X_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packed1 = yuv2bgrx64be_1_c; |
*yuv2packed2 = yuv2bgrx64be_2_c; |
*yuv2packedX = yuv2bgrx64be_X_c; |
} |
break; |
case AV_PIX_FMT_RGB48LE: |
*yuv2packed1 = yuv2rgb48le_1_c; |
*yuv2packed2 = yuv2rgb48le_2_c; |
*yuv2packedX = yuv2rgb48le_X_c; |
break; |
case AV_PIX_FMT_RGB48BE: |
*yuv2packed1 = yuv2rgb48be_1_c; |
*yuv2packed2 = yuv2rgb48be_2_c; |
*yuv2packedX = yuv2rgb48be_X_c; |
break; |
case AV_PIX_FMT_BGR48LE: |
*yuv2packed1 = yuv2bgr48le_1_c; |
*yuv2packed2 = yuv2bgr48le_2_c; |
*yuv2packedX = yuv2bgr48le_X_c; |
break; |
case AV_PIX_FMT_BGR48BE: |
*yuv2packed1 = yuv2bgr48be_1_c; |
*yuv2packed2 = yuv2bgr48be_2_c; |
*yuv2packedX = yuv2bgr48be_X_c; |
break; |
case AV_PIX_FMT_RGB32: |
case AV_PIX_FMT_BGR32: |
#if CONFIG_SMALL |
*yuv2packed1 = yuv2rgb32_1_c; |
*yuv2packed2 = yuv2rgb32_2_c; |
*yuv2packedX = yuv2rgb32_X_c; |
#else |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packed1 = yuv2rgba32_1_c; |
*yuv2packed2 = yuv2rgba32_2_c; |
*yuv2packedX = yuv2rgba32_X_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packed1 = yuv2rgbx32_1_c; |
*yuv2packed2 = yuv2rgbx32_2_c; |
*yuv2packedX = yuv2rgbx32_X_c; |
} |
#endif /* !CONFIG_SMALL */ |
break; |
case AV_PIX_FMT_RGB32_1: |
case AV_PIX_FMT_BGR32_1: |
#if CONFIG_SMALL |
*yuv2packed1 = yuv2rgb32_1_1_c; |
*yuv2packed2 = yuv2rgb32_1_2_c; |
*yuv2packedX = yuv2rgb32_1_X_c; |
#else |
#if CONFIG_SWSCALE_ALPHA |
if (c->alpPixBuf) { |
*yuv2packed1 = yuv2rgba32_1_1_c; |
*yuv2packed2 = yuv2rgba32_1_2_c; |
*yuv2packedX = yuv2rgba32_1_X_c; |
} else |
#endif /* CONFIG_SWSCALE_ALPHA */ |
{ |
*yuv2packed1 = yuv2rgbx32_1_1_c; |
*yuv2packed2 = yuv2rgbx32_1_2_c; |
*yuv2packedX = yuv2rgbx32_1_X_c; |
} |
#endif /* !CONFIG_SMALL */ |
break; |
case AV_PIX_FMT_RGB24: |
*yuv2packed1 = yuv2rgb24_1_c; |
*yuv2packed2 = yuv2rgb24_2_c; |
*yuv2packedX = yuv2rgb24_X_c; |
break; |
case AV_PIX_FMT_BGR24: |
*yuv2packed1 = yuv2bgr24_1_c; |
*yuv2packed2 = yuv2bgr24_2_c; |
*yuv2packedX = yuv2bgr24_X_c; |
break; |
case AV_PIX_FMT_RGB565LE: |
case AV_PIX_FMT_RGB565BE: |
case AV_PIX_FMT_BGR565LE: |
case AV_PIX_FMT_BGR565BE: |
*yuv2packed1 = yuv2rgb16_1_c; |
*yuv2packed2 = yuv2rgb16_2_c; |
*yuv2packedX = yuv2rgb16_X_c; |
break; |
case AV_PIX_FMT_RGB555LE: |
case AV_PIX_FMT_RGB555BE: |
case AV_PIX_FMT_BGR555LE: |
case AV_PIX_FMT_BGR555BE: |
*yuv2packed1 = yuv2rgb15_1_c; |
*yuv2packed2 = yuv2rgb15_2_c; |
*yuv2packedX = yuv2rgb15_X_c; |
break; |
case AV_PIX_FMT_RGB444LE: |
case AV_PIX_FMT_RGB444BE: |
case AV_PIX_FMT_BGR444LE: |
case AV_PIX_FMT_BGR444BE: |
*yuv2packed1 = yuv2rgb12_1_c; |
*yuv2packed2 = yuv2rgb12_2_c; |
*yuv2packedX = yuv2rgb12_X_c; |
break; |
case AV_PIX_FMT_RGB8: |
case AV_PIX_FMT_BGR8: |
*yuv2packed1 = yuv2rgb8_1_c; |
*yuv2packed2 = yuv2rgb8_2_c; |
*yuv2packedX = yuv2rgb8_X_c; |
break; |
case AV_PIX_FMT_RGB4: |
case AV_PIX_FMT_BGR4: |
*yuv2packed1 = yuv2rgb4_1_c; |
*yuv2packed2 = yuv2rgb4_2_c; |
*yuv2packedX = yuv2rgb4_X_c; |
break; |
case AV_PIX_FMT_RGB4_BYTE: |
case AV_PIX_FMT_BGR4_BYTE: |
*yuv2packed1 = yuv2rgb4b_1_c; |
*yuv2packed2 = yuv2rgb4b_2_c; |
*yuv2packedX = yuv2rgb4b_X_c; |
break; |
} |
} |
switch (dstFormat) { |
case AV_PIX_FMT_MONOWHITE: |
*yuv2packed1 = yuv2monowhite_1_c; |
*yuv2packed2 = yuv2monowhite_2_c; |
*yuv2packedX = yuv2monowhite_X_c; |
break; |
case AV_PIX_FMT_MONOBLACK: |
*yuv2packed1 = yuv2monoblack_1_c; |
*yuv2packed2 = yuv2monoblack_2_c; |
*yuv2packedX = yuv2monoblack_X_c; |
break; |
case AV_PIX_FMT_YUYV422: |
*yuv2packed1 = yuv2yuyv422_1_c; |
*yuv2packed2 = yuv2yuyv422_2_c; |
*yuv2packedX = yuv2yuyv422_X_c; |
break; |
case AV_PIX_FMT_YVYU422: |
*yuv2packed1 = yuv2yvyu422_1_c; |
*yuv2packed2 = yuv2yvyu422_2_c; |
*yuv2packedX = yuv2yvyu422_X_c; |
break; |
case AV_PIX_FMT_UYVY422: |
*yuv2packed1 = yuv2uyvy422_1_c; |
*yuv2packed2 = yuv2uyvy422_2_c; |
*yuv2packedX = yuv2uyvy422_X_c; |
break; |
case AV_PIX_FMT_YA8: |
*yuv2packed1 = yuv2ya8_1_c; |
*yuv2packed2 = yuv2ya8_2_c; |
*yuv2packedX = yuv2ya8_X_c; |
break; |
case AV_PIX_FMT_AYUV64LE: |
*yuv2packedX = yuv2ayuv64le_X_c; |
break; |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/ppc/Makefile |
---|
0,0 → 1,3 |
OBJS += ppc/swscale_altivec.o \ |
ppc/yuv2rgb_altivec.o \ |
ppc/yuv2yuv_altivec.o \ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/ppc/swscale_altivec.c |
---|
0,0 → 1,371 |
/* |
* AltiVec-enhanced yuv2yuvX |
* |
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> |
* based on the equivalent C code in swscale.c |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <inttypes.h> |
#include "config.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#include "libavutil/attributes.h" |
#include "libavutil/cpu.h" |
#include "yuv2rgb_altivec.h" |
#include "libavutil/ppc/util_altivec.h" |
#if HAVE_ALTIVEC |
#define vzero vec_splat_s32(0) |
#if HAVE_BIGENDIAN |
#define GET_LS(a,b,c,s) {\ |
vector signed short l2 = vec_ld(((b) << 1) + 16, s);\ |
ls = vec_perm(a, l2, c);\ |
a = l2;\ |
} |
#else |
#define GET_LS(a,b,c,s) {\ |
ls = a;\ |
a = vec_vsx_ld(((b) << 1) + 16, s);\ |
} |
#endif |
#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\ |
vector signed short ls;\ |
GET_LS(l1, x, perm, src);\ |
vector signed int i1 = vec_mule(filter, ls);\ |
vector signed int i2 = vec_mulo(filter, ls);\ |
vector signed int vf1, vf2;\ |
vf1 = vec_mergeh(i1, i2);\ |
vf2 = vec_mergel(i1, i2);\ |
d1 = vec_add(d1, vf1);\ |
d2 = vec_add(d2, vf2);\ |
} while (0) |
#if HAVE_BIGENDIAN |
#define LOAD_FILTER(vf,f) {\ |
vector unsigned char perm0 = vec_lvsl(joffset, f);\ |
vf = vec_ld(joffset, f);\ |
vf = vec_perm(vf, vf, perm0);\ |
} |
#define LOAD_L1(ll1,s,p){\ |
p = vec_lvsl(xoffset, s);\ |
ll1 = vec_ld(xoffset, s);\ |
} |
#else |
#define LOAD_FILTER(vf,f) {\ |
vf = vec_vsx_ld(joffset, f);\ |
} |
#define LOAD_L1(ll1,s,p){\ |
ll1 = vec_vsx_ld(xoffset, s);\ |
} |
#endif |
static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, |
const uint8_t *dither, int offset, int x) |
{ |
register int i, j; |
LOCAL_ALIGNED(16, int, val, [16]); |
vector signed int vo1, vo2, vo3, vo4; |
vector unsigned short vs1, vs2; |
vector unsigned char vf; |
vector unsigned int altivec_vectorShiftInt19 = |
vec_add(vec_splat_u32(10), vec_splat_u32(9)); |
for (i = 0; i < 16; i++) |
val[i] = dither[(x + i + offset) & 7] << 12; |
vo1 = vec_ld(0, val); |
vo2 = vec_ld(16, val); |
vo3 = vec_ld(32, val); |
vo4 = vec_ld(48, val); |
for (j = 0; j < filterSize; j++) { |
unsigned int joffset=j<<1; |
unsigned int xoffset=x<<1; |
vector unsigned char perm; |
vector signed short l1,vLumFilter; |
LOAD_FILTER(vLumFilter,filter); |
vLumFilter = vec_splat(vLumFilter, 0); |
LOAD_L1(l1,src[j],perm); |
yuv2planeX_8(vo1, vo2, l1, src[j], x, perm, vLumFilter); |
yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter); |
} |
vo1 = vec_sra(vo1, altivec_vectorShiftInt19); |
vo2 = vec_sra(vo2, altivec_vectorShiftInt19); |
vo3 = vec_sra(vo3, altivec_vectorShiftInt19); |
vo4 = vec_sra(vo4, altivec_vectorShiftInt19); |
vs1 = vec_packsu(vo1, vo2); |
vs2 = vec_packsu(vo3, vo4); |
vf = vec_packsu(vs1, vs2); |
VEC_ST(vf, 0, dest); |
} |
static inline void yuv2planeX_u(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset, int x) |
{ |
int i, j; |
for (i = x; i < dstW; i++) { |
int t = dither[(i + offset) & 7] << 12; |
for (j = 0; j < filterSize; j++) |
t += src[j][i] * filter[j]; |
dest[i] = av_clip_uint8(t >> 19); |
} |
} |
static void yuv2planeX_altivec(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset) |
{ |
int dst_u = -(uintptr_t)dest & 15; |
int i; |
yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0); |
for (i = dst_u; i < dstW - 15; i += 16) |
yuv2planeX_16_altivec(filter, filterSize, src, dest + i, dither, |
offset, i); |
yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i); |
} |
#if HAVE_BIGENDIAN |
// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2). |
// The neat trick: We only care for half the elements, |
// high or low depending on (i<<3)%16 (it's 0 or 8 here), |
// and we're going to use vec_mule, so we choose |
// carefully how to "unpack" the elements into the even slots. |
#define GET_VF4(a, vf, f) {\ |
vf = vec_ld(a<< 3, f);\ |
if ((a << 3) % 16)\ |
vf = vec_mergel(vf, (vector signed short)vzero);\ |
else\ |
vf = vec_mergeh(vf, (vector signed short)vzero);\ |
} |
#define FIRST_LOAD(sv, pos, s, per) {\ |
sv = vec_ld(pos, s);\ |
per = vec_lvsl(pos, s);\ |
} |
#define UPDATE_PTR(s0, d0, s1, d1) {\ |
d0 = s0;\ |
d1 = s1;\ |
} |
#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\ |
v1 = vec_ld(pos + a + 16, s);\ |
vf = vec_perm(v0, v1, per);\ |
} |
#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) {\ |
if ((((uintptr_t)s + pos) % 16) > 8) {\ |
v1 = vec_ld(pos + a + 16, s);\ |
}\ |
vf = vec_perm(v0, src_v1, per);\ |
} |
#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\ |
vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\ |
vf = vec_perm(vf0, vf1, per);\ |
} |
#else /* else of #if HAVE_BIGENDIAN */ |
#define GET_VF4(a, vf, f) {\ |
vf = (vector signed short)vec_vsx_ld(a << 3, f);\ |
vf = vec_mergeh(vf, (vector signed short)vzero);\ |
} |
#define FIRST_LOAD(sv, pos, s, per) {} |
#define UPDATE_PTR(s0, d0, s1, d1) {} |
#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\ |
vf = vec_vsx_ld(pos + a, s);\ |
} |
#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) LOAD_SRCV(pos, a, s, per, v0, v1, vf) |
#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\ |
vf = vec_vsx_ld((a * 2 * filterSize) + (b * 2) + off, f);\ |
} |
#endif /* end of #if HAVE_BIGENDIAN */ |
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, |
const uint8_t *src, const int16_t *filter, |
const int32_t *filterPos, int filterSize) |
{ |
register int i; |
LOCAL_ALIGNED(16, int, tempo, [4]); |
if (filterSize % 4) { |
for (i = 0; i < dstW; i++) { |
register int j; |
register int srcPos = filterPos[i]; |
register int val = 0; |
for (j = 0; j < filterSize; j++) |
val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; |
dst[i] = FFMIN(val >> 7, (1 << 15) - 1); |
} |
} else |
switch (filterSize) { |
case 4: |
for (i = 0; i < dstW; i++) { |
register int srcPos = filterPos[i]; |
vector unsigned char src_vF = unaligned_load(srcPos, src); |
vector signed short src_v, filter_v; |
vector signed int val_vEven, val_s; |
src_v = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); |
// now put our elements in the even slots |
src_v = vec_mergeh(src_v, (vector signed short)vzero); |
GET_VF4(i, filter_v, filter); |
val_vEven = vec_mule(src_v, filter_v); |
val_s = vec_sums(val_vEven, vzero); |
vec_st(val_s, 0, tempo); |
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); |
} |
break; |
case 8: |
for (i = 0; i < dstW; i++) { |
register int srcPos = filterPos[i]; |
vector unsigned char src_vF, src_v0, src_v1; |
vector unsigned char permS; |
vector signed short src_v, filter_v; |
vector signed int val_v, val_s; |
FIRST_LOAD(src_v0, srcPos, src, permS); |
LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); |
src_v = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); |
filter_v = vec_ld(i << 4, filter); |
val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); |
val_s = vec_sums(val_v, vzero); |
vec_st(val_s, 0, tempo); |
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); |
} |
break; |
case 16: |
for (i = 0; i < dstW; i++) { |
register int srcPos = filterPos[i]; |
vector unsigned char src_vF = unaligned_load(srcPos, src); |
vector signed short src_vA = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); |
vector signed short src_vB = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); |
vector signed short filter_v0 = vec_ld(i << 5, filter); |
vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); |
vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); |
vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); |
vector signed int val_s = vec_sums(val_v, vzero); |
VEC_ST(val_s, 0, tempo); |
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); |
} |
break; |
default: |
for (i = 0; i < dstW; i++) { |
register int j, offset = i * 2 * filterSize; |
register int srcPos = filterPos[i]; |
vector signed int val_s, val_v = (vector signed int)vzero; |
vector signed short filter_v0R; |
vector unsigned char permF, src_v0, permS; |
FIRST_LOAD(filter_v0R, offset, filter, permF); |
FIRST_LOAD(src_v0, srcPos, src, permS); |
for (j = 0; j < filterSize - 15; j += 16) { |
vector unsigned char src_v1, src_vF; |
vector signed short filter_v1R, filter_v2R, filter_v0, filter_v1; |
LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF); |
vector signed short src_vA = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); |
vector signed short src_vB = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); |
GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0); |
GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16); |
vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v); |
val_v = vec_msums(src_vB, filter_v1, val_acc); |
UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0); |
} |
if (j < filterSize - 7) { |
// loading src_v0 is useless, it's already done above |
vector unsigned char src_v1, src_vF; |
vector signed short src_v, filter_v1R, filter_v; |
LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF); |
src_v = // vec_unpackh sign-extends... |
(vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); |
GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0); |
val_v = vec_msums(src_v, filter_v, val_v); |
} |
val_s = vec_sums(val_v, vzero); |
VEC_ST(val_s, 0, tempo); |
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); |
} |
} |
} |
#endif /* HAVE_ALTIVEC */ |
av_cold void ff_sws_init_swscale_ppc(SwsContext *c) |
{ |
#if HAVE_ALTIVEC |
enum AVPixelFormat dstFormat = c->dstFormat; |
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
return; |
if (c->srcBpc == 8 && c->dstBpc <= 14) { |
c->hyScale = c->hcScale = hScale_altivec_real; |
} |
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && |
dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && |
!c->alpPixBuf) { |
c->yuv2planeX = yuv2planeX_altivec; |
} |
/* The following list of supported dstFormat values should |
* match what's found in the body of ff_yuv2packedX_altivec() */ |
if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) { |
switch (c->dstFormat) { |
case AV_PIX_FMT_ABGR: |
c->yuv2packedX = ff_yuv2abgr_X_altivec; |
break; |
case AV_PIX_FMT_BGRA: |
c->yuv2packedX = ff_yuv2bgra_X_altivec; |
break; |
case AV_PIX_FMT_ARGB: |
c->yuv2packedX = ff_yuv2argb_X_altivec; |
break; |
case AV_PIX_FMT_RGBA: |
c->yuv2packedX = ff_yuv2rgba_X_altivec; |
break; |
case AV_PIX_FMT_BGR24: |
c->yuv2packedX = ff_yuv2bgr24_X_altivec; |
break; |
case AV_PIX_FMT_RGB24: |
c->yuv2packedX = ff_yuv2rgb24_X_altivec; |
break; |
} |
} |
#endif /* HAVE_ALTIVEC */ |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/ppc/yuv2rgb_altivec.c |
---|
0,0 → 1,873 |
/* |
* AltiVec acceleration for colorspace conversion |
* |
* copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
/* |
* Convert I420 YV12 to RGB in various formats, |
* it rejects images that are not in 420 formats, |
* it rejects images that don't have widths of multiples of 16, |
* it rejects images that don't have heights of multiples of 2. |
* Reject defers to C simulation code. |
* |
* Lots of optimizations to be done here. |
* |
* 1. Need to fix saturation code. I just couldn't get it to fly with packs |
* and adds, so we currently use max/min to clip. |
* |
* 2. The inefficient use of chroma loading needs a bit of brushing up. |
* |
* 3. Analysis of pipeline stalls needs to be done. Use shark to identify |
* pipeline stalls. |
* |
* |
* MODIFIED to calculate coeffs from currently selected color space. |
* MODIFIED core to be a macro where you specify the output format. |
* ADDED UYVY conversion which is never called due to some thing in swscale. |
* CORRECTED algorithim selection to be strict on input formats. |
* ADDED runtime detection of AltiVec. |
* |
* ADDED altivec_yuv2packedX vertical scl + RGB converter |
* |
* March 27,2004 |
* PERFORMANCE ANALYSIS |
* |
* The C version uses 25% of the processor or ~250Mips for D1 video rawvideo |
* used as test. |
* The AltiVec version uses 10% of the processor or ~100Mips for D1 video |
* same sequence. |
* |
* 720 * 480 * 30 ~10MPS |
* |
* so we have roughly 10 clocks per pixel. This is too high, something has |
* to be wrong. |
* |
* OPTIMIZED clip codes to utilize vec_max and vec_packs removing the |
* need for vec_min. |
* |
* OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to |
* have the input video frame, it was just decompressed so it probably resides |
* in L1 caches. However, we are creating the output video stream. This needs |
* to use the DSTST instruction to optimize for the cache. We couple this with |
* the fact that we are not going to be visiting the input buffer again so we |
* mark it Least Recently Used. This shaves 25% of the processor cycles off. |
* |
* Now memcpy is the largest mips consumer in the system, probably due |
* to the inefficient X11 stuff. |
* |
* GL libraries seem to be very slow on this machine 1.33Ghz PB running |
* Jaguar, this is not the case for my 1Ghz PB. I thought it might be |
* a versioning issue, however I have libGL.1.2.dylib for both |
* machines. (We need to figure this out now.) |
* |
* GL2 libraries work now with patch for RGB32. |
* |
* NOTE: quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor. |
* |
* Integrated luma prescaling adjustment for saturation/contrast/brightness |
* adjustment. |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <string.h> |
#include <inttypes.h> |
#include <assert.h> |
#include "config.h" |
#include "libswscale/rgb2rgb.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#include "libavutil/attributes.h" |
#include "libavutil/cpu.h" |
#include "libavutil/pixdesc.h" |
#include "yuv2rgb_altivec.h" |
#if HAVE_ALTIVEC |
#undef PROFILE_THE_BEAST |
#undef INC_SCALING |
typedef unsigned char ubyte; |
typedef signed char sbyte; |
/* RGB interleaver, 16 planar pels 8-bit samples per channel in |
* homogeneous vector registers x0,x1,x2 are interleaved with the |
* following technique: |
* |
* o0 = vec_mergeh(x0, x1); |
* o1 = vec_perm(o0, x2, perm_rgb_0); |
* o2 = vec_perm(o0, x2, perm_rgb_1); |
* o3 = vec_mergel(x0, x1); |
* o4 = vec_perm(o3, o2, perm_rgb_2); |
* o5 = vec_perm(o3, o2, perm_rgb_3); |
* |
* perm_rgb_0: o0(RG).h v1(B) --> o1* |
* 0 1 2 3 4 |
* rgbr|gbrg|brgb|rgbr |
* 0010 0100 1001 0010 |
* 0102 3145 2673 894A |
* |
* perm_rgb_1: o0(RG).h v1(B) --> o2 |
* 0 1 2 3 4 |
* gbrg|brgb|bbbb|bbbb |
* 0100 1001 1111 1111 |
* B5CD 6EF7 89AB CDEF |
* |
* perm_rgb_2: o3(RG).l o2(rgbB.l) --> o4* |
* 0 1 2 3 4 |
* gbrg|brgb|rgbr|gbrg |
* 1111 1111 0010 0100 |
* 89AB CDEF 0182 3945 |
* |
* perm_rgb_2: o3(RG).l o2(rgbB.l) ---> o5* |
* 0 1 2 3 4 |
* brgb|rgbr|gbrg|brgb |
* 1001 0010 0100 1001 |
* a67b 89cA BdCD eEFf |
* |
*/ |
static const vector unsigned char |
perm_rgb_0 = { 0x00, 0x01, 0x10, 0x02, 0x03, 0x11, 0x04, 0x05, |
0x12, 0x06, 0x07, 0x13, 0x08, 0x09, 0x14, 0x0a }, |
perm_rgb_1 = { 0x0b, 0x15, 0x0c, 0x0d, 0x16, 0x0e, 0x0f, 0x17, |
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, |
perm_rgb_2 = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
0x00, 0x01, 0x18, 0x02, 0x03, 0x19, 0x04, 0x05 }, |
perm_rgb_3 = { 0x1a, 0x06, 0x07, 0x1b, 0x08, 0x09, 0x1c, 0x0a, |
0x0b, 0x1d, 0x0c, 0x0d, 0x1e, 0x0e, 0x0f, 0x1f }; |
#define vec_merge3(x2, x1, x0, y0, y1, y2) \ |
do { \ |
__typeof__(x0) o0, o2, o3; \ |
o0 = vec_mergeh(x0, x1); \ |
y0 = vec_perm(o0, x2, perm_rgb_0); \ |
o2 = vec_perm(o0, x2, perm_rgb_1); \ |
o3 = vec_mergel(x0, x1); \ |
y1 = vec_perm(o3, o2, perm_rgb_2); \ |
y2 = vec_perm(o3, o2, perm_rgb_3); \ |
} while (0) |
#define vec_mstbgr24(x0, x1, x2, ptr) \ |
do { \ |
__typeof__(x0) _0, _1, _2; \ |
vec_merge3(x0, x1, x2, _0, _1, _2); \ |
vec_st(_0, 0, ptr++); \ |
vec_st(_1, 0, ptr++); \ |
vec_st(_2, 0, ptr++); \ |
} while (0) |
#define vec_mstrgb24(x0, x1, x2, ptr) \ |
do { \ |
__typeof__(x0) _0, _1, _2; \ |
vec_merge3(x2, x1, x0, _0, _1, _2); \ |
vec_st(_0, 0, ptr++); \ |
vec_st(_1, 0, ptr++); \ |
vec_st(_2, 0, ptr++); \ |
} while (0) |
/* pack the pixels in rgb0 format |
* msb R |
* lsb 0 |
*/ |
#define vec_mstrgb32(T, x0, x1, x2, x3, ptr) \ |
do { \ |
T _0, _1, _2, _3; \ |
_0 = vec_mergeh(x0, x1); \ |
_1 = vec_mergeh(x2, x3); \ |
_2 = (T) vec_mergeh((vector unsigned short) _0, \ |
(vector unsigned short) _1); \ |
_3 = (T) vec_mergel((vector unsigned short) _0, \ |
(vector unsigned short) _1); \ |
vec_st(_2, 0 * 16, (T *) ptr); \ |
vec_st(_3, 1 * 16, (T *) ptr); \ |
_0 = vec_mergel(x0, x1); \ |
_1 = vec_mergel(x2, x3); \ |
_2 = (T) vec_mergeh((vector unsigned short) _0, \ |
(vector unsigned short) _1); \ |
_3 = (T) vec_mergel((vector unsigned short) _0, \ |
(vector unsigned short) _1); \ |
vec_st(_2, 2 * 16, (T *) ptr); \ |
vec_st(_3, 3 * 16, (T *) ptr); \ |
ptr += 4; \ |
} while (0) |
/* |
* 1 0 1.4021 | | Y | |
* 1 -0.3441 -0.7142 |x| Cb| |
* 1 1.7718 0 | | Cr| |
* |
* |
* Y: [-128 127] |
* Cb/Cr : [-128 127] |
* |
* typical YUV conversion works on Y: 0-255 this version has been |
* optimized for JPEG decoding. |
*/ |
#if HAVE_BIGENDIAN |
#define vec_unh(x) \ |
(vector signed short) \ |
vec_perm(x, (__typeof__(x)) { 0 }, \ |
((vector unsigned char) { \ |
0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, \ |
0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07 })) |
#define vec_unl(x) \ |
(vector signed short) \ |
vec_perm(x, (__typeof__(x)) { 0 }, \ |
((vector unsigned char) { \ |
0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B, \ |
0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F })) |
#else |
#define vec_unh(x)(vector signed short) vec_mergeh(x,(__typeof__(x)) { 0 }) |
#define vec_unl(x)(vector signed short) vec_mergel(x,(__typeof__(x)) { 0 }) |
#endif |
#define vec_clip_s16(x) \ |
vec_max(vec_min(x, ((vector signed short) { \ |
235, 235, 235, 235, 235, 235, 235, 235 })), \ |
((vector signed short) { 16, 16, 16, 16, 16, 16, 16, 16 })) |
#define vec_packclp(x, y) \ |
(vector unsigned char) \ |
vec_packs((vector unsigned short) \ |
vec_max(x, ((vector signed short) { 0 })), \ |
(vector unsigned short) \ |
vec_max(y, ((vector signed short) { 0 }))) |
static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y, |
vector signed short U, vector signed short V, |
vector signed short *R, vector signed short *G, |
vector signed short *B) |
{ |
vector signed short vx, ux, uvx; |
Y = vec_mradds(Y, c->CY, c->OY); |
U = vec_sub(U, (vector signed short) |
vec_splat((vector signed short) { 128 }, 0)); |
V = vec_sub(V, (vector signed short) |
vec_splat((vector signed short) { 128 }, 0)); |
// ux = (CBU * (u << c->CSHIFT) + 0x4000) >> 15; |
ux = vec_sl(U, c->CSHIFT); |
*B = vec_mradds(ux, c->CBU, Y); |
// vx = (CRV * (v << c->CSHIFT) + 0x4000) >> 15; |
vx = vec_sl(V, c->CSHIFT); |
*R = vec_mradds(vx, c->CRV, Y); |
// uvx = ((CGU * u) + (CGV * v)) >> 15; |
uvx = vec_mradds(U, c->CGU, Y); |
*G = vec_mradds(V, c->CGV, uvx); |
} |
/* |
* ------------------------------------------------------------------------------ |
* CS converters |
* ------------------------------------------------------------------------------ |
*/ |
#define DEFCSP420_CVT(name, out_pixels) \ |
static int altivec_ ## name(SwsContext *c, const unsigned char **in, \ |
int *instrides, int srcSliceY, int srcSliceH, \ |
unsigned char **oplanes, int *outstrides) \ |
{ \ |
int w = c->srcW; \ |
int h = srcSliceH; \ |
int i, j; \ |
int instrides_scl[3]; \ |
vector unsigned char y0, y1; \ |
\ |
vector signed char u, v; \ |
\ |
vector signed short Y0, Y1, Y2, Y3; \ |
vector signed short U, V; \ |
vector signed short vx, ux, uvx; \ |
vector signed short vx0, ux0, uvx0; \ |
vector signed short vx1, ux1, uvx1; \ |
vector signed short R0, G0, B0; \ |
vector signed short R1, G1, B1; \ |
vector unsigned char R, G, B; \ |
\ |
const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ |
vector unsigned char align_perm; \ |
\ |
vector signed short lCY = c->CY; \ |
vector signed short lOY = c->OY; \ |
vector signed short lCRV = c->CRV; \ |
vector signed short lCBU = c->CBU; \ |
vector signed short lCGU = c->CGU; \ |
vector signed short lCGV = c->CGV; \ |
vector unsigned short lCSHIFT = c->CSHIFT; \ |
\ |
const ubyte *y1i = in[0]; \ |
const ubyte *y2i = in[0] + instrides[0]; \ |
const ubyte *ui = in[1]; \ |
const ubyte *vi = in[2]; \ |
\ |
vector unsigned char *oute, *outo; \ |
\ |
/* loop moves y{1, 2}i by w */ \ |
instrides_scl[0] = instrides[0] * 2 - w; \ |
/* loop moves ui by w / 2 */ \ |
instrides_scl[1] = instrides[1] - w / 2; \ |
/* loop moves vi by w / 2 */ \ |
instrides_scl[2] = instrides[2] - w / 2; \ |
\ |
for (i = 0; i < h / 2; i++) { \ |
oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \ |
(srcSliceY + i * 2)); \ |
outo = oute + (outstrides[0] >> 4); \ |
vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \ |
vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \ |
\ |
for (j = 0; j < w / 16; j++) { \ |
y1ivP = (const vector unsigned char *) y1i; \ |
y2ivP = (const vector unsigned char *) y2i; \ |
uivP = (const vector unsigned char *) ui; \ |
vivP = (const vector unsigned char *) vi; \ |
\ |
align_perm = vec_lvsl(0, y1i); \ |
y0 = (vector unsigned char) \ |
vec_perm(y1ivP[0], y1ivP[1], align_perm); \ |
\ |
align_perm = vec_lvsl(0, y2i); \ |
y1 = (vector unsigned char) \ |
vec_perm(y2ivP[0], y2ivP[1], align_perm); \ |
\ |
align_perm = vec_lvsl(0, ui); \ |
u = (vector signed char) \ |
vec_perm(uivP[0], uivP[1], align_perm); \ |
\ |
align_perm = vec_lvsl(0, vi); \ |
v = (vector signed char) \ |
vec_perm(vivP[0], vivP[1], align_perm); \ |
\ |
u = (vector signed char) \ |
vec_sub(u, \ |
(vector signed char) \ |
vec_splat((vector signed char) { 128 }, 0)); \ |
v = (vector signed char) \ |
vec_sub(v, \ |
(vector signed char) \ |
vec_splat((vector signed char) { 128 }, 0)); \ |
\ |
U = vec_unpackh(u); \ |
V = vec_unpackh(v); \ |
\ |
Y0 = vec_unh(y0); \ |
Y1 = vec_unl(y0); \ |
Y2 = vec_unh(y1); \ |
Y3 = vec_unl(y1); \ |
\ |
Y0 = vec_mradds(Y0, lCY, lOY); \ |
Y1 = vec_mradds(Y1, lCY, lOY); \ |
Y2 = vec_mradds(Y2, lCY, lOY); \ |
Y3 = vec_mradds(Y3, lCY, lOY); \ |
\ |
/* ux = (CBU * (u << CSHIFT) + 0x4000) >> 15 */ \ |
ux = vec_sl(U, lCSHIFT); \ |
ux = vec_mradds(ux, lCBU, (vector signed short) { 0 }); \ |
ux0 = vec_mergeh(ux, ux); \ |
ux1 = vec_mergel(ux, ux); \ |
\ |
/* vx = (CRV * (v << CSHIFT) + 0x4000) >> 15; */ \ |
vx = vec_sl(V, lCSHIFT); \ |
vx = vec_mradds(vx, lCRV, (vector signed short) { 0 }); \ |
vx0 = vec_mergeh(vx, vx); \ |
vx1 = vec_mergel(vx, vx); \ |
\ |
/* uvx = ((CGU * u) + (CGV * v)) >> 15 */ \ |
uvx = vec_mradds(U, lCGU, (vector signed short) { 0 }); \ |
uvx = vec_mradds(V, lCGV, uvx); \ |
uvx0 = vec_mergeh(uvx, uvx); \ |
uvx1 = vec_mergel(uvx, uvx); \ |
\ |
R0 = vec_add(Y0, vx0); \ |
G0 = vec_add(Y0, uvx0); \ |
B0 = vec_add(Y0, ux0); \ |
R1 = vec_add(Y1, vx1); \ |
G1 = vec_add(Y1, uvx1); \ |
B1 = vec_add(Y1, ux1); \ |
\ |
R = vec_packclp(R0, R1); \ |
G = vec_packclp(G0, G1); \ |
B = vec_packclp(B0, B1); \ |
\ |
out_pixels(R, G, B, oute); \ |
\ |
R0 = vec_add(Y2, vx0); \ |
G0 = vec_add(Y2, uvx0); \ |
B0 = vec_add(Y2, ux0); \ |
R1 = vec_add(Y3, vx1); \ |
G1 = vec_add(Y3, uvx1); \ |
B1 = vec_add(Y3, ux1); \ |
R = vec_packclp(R0, R1); \ |
G = vec_packclp(G0, G1); \ |
B = vec_packclp(B0, B1); \ |
\ |
\ |
out_pixels(R, G, B, outo); \ |
\ |
y1i += 16; \ |
y2i += 16; \ |
ui += 8; \ |
vi += 8; \ |
} \ |
\ |
ui += instrides_scl[1]; \ |
vi += instrides_scl[2]; \ |
y1i += instrides_scl[0]; \ |
y2i += instrides_scl[0]; \ |
} \ |
return srcSliceH; \ |
} |
#define out_abgr(a, b, c, ptr) \ |
vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), c, b, a, ptr) |
#define out_bgra(a, b, c, ptr) \ |
vec_mstrgb32(__typeof__(a), c, b, a, ((__typeof__(a)) { 255 }), ptr) |
#define out_rgba(a, b, c, ptr) \ |
vec_mstrgb32(__typeof__(a), a, b, c, ((__typeof__(a)) { 255 }), ptr) |
#define out_argb(a, b, c, ptr) \ |
vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, b, c, ptr) |
#define out_rgb24(a, b, c, ptr) vec_mstrgb24(a, b, c, ptr) |
#define out_bgr24(a, b, c, ptr) vec_mstbgr24(a, b, c, ptr) |
DEFCSP420_CVT(yuv2_abgr, out_abgr) |
DEFCSP420_CVT(yuv2_bgra, out_bgra) |
DEFCSP420_CVT(yuv2_rgba, out_rgba) |
DEFCSP420_CVT(yuv2_argb, out_argb) |
DEFCSP420_CVT(yuv2_rgb24, out_rgb24) |
DEFCSP420_CVT(yuv2_bgr24, out_bgr24) |
// uyvy|uyvy|uyvy|uyvy |
// 0123 4567 89ab cdef |
static const vector unsigned char |
demux_u = { 0x10, 0x00, 0x10, 0x00, |
0x10, 0x04, 0x10, 0x04, |
0x10, 0x08, 0x10, 0x08, |
0x10, 0x0c, 0x10, 0x0c }, |
demux_v = { 0x10, 0x02, 0x10, 0x02, |
0x10, 0x06, 0x10, 0x06, |
0x10, 0x0A, 0x10, 0x0A, |
0x10, 0x0E, 0x10, 0x0E }, |
demux_y = { 0x10, 0x01, 0x10, 0x03, |
0x10, 0x05, 0x10, 0x07, |
0x10, 0x09, 0x10, 0x0B, |
0x10, 0x0D, 0x10, 0x0F }; |
/* |
* this is so I can play live CCIR raw video |
*/ |
static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in, |
int *instrides, int srcSliceY, int srcSliceH, |
unsigned char **oplanes, int *outstrides) |
{ |
int w = c->srcW; |
int h = srcSliceH; |
int i, j; |
vector unsigned char uyvy; |
vector signed short Y, U, V; |
vector signed short R0, G0, B0, R1, G1, B1; |
vector unsigned char R, G, B; |
vector unsigned char *out; |
const ubyte *img; |
img = in[0]; |
out = (vector unsigned char *) (oplanes[0] + srcSliceY * outstrides[0]); |
for (i = 0; i < h; i++) |
for (j = 0; j < w / 16; j++) { |
uyvy = vec_ld(0, img); |
U = (vector signed short) |
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u); |
V = (vector signed short) |
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v); |
Y = (vector signed short) |
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y); |
cvtyuvtoRGB(c, Y, U, V, &R0, &G0, &B0); |
uyvy = vec_ld(16, img); |
U = (vector signed short) |
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u); |
V = (vector signed short) |
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v); |
Y = (vector signed short) |
vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y); |
cvtyuvtoRGB(c, Y, U, V, &R1, &G1, &B1); |
R = vec_packclp(R0, R1); |
G = vec_packclp(G0, G1); |
B = vec_packclp(B0, B1); |
// vec_mstbgr24 (R,G,B, out); |
out_rgba(R, G, B, out); |
img += 32; |
} |
return srcSliceH; |
} |
#endif /* HAVE_ALTIVEC */ |
/* Ok currently the acceleration routine only supports |
* inputs of widths a multiple of 16 |
* and heights a multiple 2 |
* |
* So we just fall back to the C codes for this. |
*/ |
av_cold SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c) |
{ |
#if HAVE_ALTIVEC |
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
return NULL; |
/* |
* and this seems not to matter too much I tried a bunch of |
* videos with abnormal widths and MPlayer crashes elsewhere. |
* mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv |
* boom with X11 bad match. |
* |
*/ |
if ((c->srcW & 0xf) != 0) |
return NULL; |
switch (c->srcFormat) { |
case AV_PIX_FMT_YUV410P: |
case AV_PIX_FMT_YUV420P: |
/*case IMGFMT_CLPL: ??? */ |
case AV_PIX_FMT_GRAY8: |
case AV_PIX_FMT_NV12: |
case AV_PIX_FMT_NV21: |
if ((c->srcH & 0x1) != 0) |
return NULL; |
switch (c->dstFormat) { |
case AV_PIX_FMT_RGB24: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n"); |
return altivec_yuv2_rgb24; |
case AV_PIX_FMT_BGR24: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n"); |
return altivec_yuv2_bgr24; |
case AV_PIX_FMT_ARGB: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n"); |
return altivec_yuv2_argb; |
case AV_PIX_FMT_ABGR: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n"); |
return altivec_yuv2_abgr; |
case AV_PIX_FMT_RGBA: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n"); |
return altivec_yuv2_rgba; |
case AV_PIX_FMT_BGRA: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n"); |
return altivec_yuv2_bgra; |
default: return NULL; |
} |
break; |
case AV_PIX_FMT_UYVY422: |
switch (c->dstFormat) { |
case AV_PIX_FMT_BGR32: |
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n"); |
return altivec_uyvy_rgb32; |
default: return NULL; |
} |
break; |
} |
#endif /* HAVE_ALTIVEC */ |
return NULL; |
} |
av_cold void ff_yuv2rgb_init_tables_ppc(SwsContext *c, |
const int inv_table[4], |
int brightness, |
int contrast, |
int saturation) |
{ |
#if HAVE_ALTIVEC |
union { |
DECLARE_ALIGNED(16, signed short, tmp)[8]; |
vector signed short vec; |
} buf; |
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
return; |
buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9; // cy |
buf.tmp[1] = -256 * brightness; // oy |
buf.tmp[2] = (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16); // crv |
buf.tmp[3] = (inv_table[1] >> 3) * (contrast >> 16) * (saturation >> 16); // cbu |
buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgu |
buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgv |
c->CSHIFT = (vector unsigned short) vec_splat_u16(2); |
c->CY = vec_splat((vector signed short) buf.vec, 0); |
c->OY = vec_splat((vector signed short) buf.vec, 1); |
c->CRV = vec_splat((vector signed short) buf.vec, 2); |
c->CBU = vec_splat((vector signed short) buf.vec, 3); |
c->CGU = vec_splat((vector signed short) buf.vec, 4); |
c->CGV = vec_splat((vector signed short) buf.vec, 5); |
return; |
#endif /* HAVE_ALTIVEC */ |
} |
#if HAVE_ALTIVEC |
static av_always_inline void yuv2packedX_altivec(SwsContext *c, |
const int16_t *lumFilter, |
const int16_t **lumSrc, |
int lumFilterSize, |
const int16_t *chrFilter, |
const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, |
const int16_t **alpSrc, |
uint8_t *dest, |
int dstW, int dstY, |
enum AVPixelFormat target) |
{ |
int i, j; |
vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V; |
vector signed short R0, G0, B0, R1, G1, B1; |
vector unsigned char R, G, B; |
vector unsigned char *out, *nout; |
vector signed short RND = vec_splat_s16(1 << 3); |
vector unsigned short SCL = vec_splat_u16(4); |
DECLARE_ALIGNED(16, unsigned int, scratch)[16]; |
vector signed short *YCoeffs, *CCoeffs; |
YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize; |
CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize; |
out = (vector unsigned char *) dest; |
for (i = 0; i < dstW; i += 16) { |
Y0 = RND; |
Y1 = RND; |
/* extract 16 coeffs from lumSrc */ |
for (j = 0; j < lumFilterSize; j++) { |
X0 = vec_ld(0, &lumSrc[j][i]); |
X1 = vec_ld(16, &lumSrc[j][i]); |
Y0 = vec_mradds(X0, YCoeffs[j], Y0); |
Y1 = vec_mradds(X1, YCoeffs[j], Y1); |
} |
U = RND; |
V = RND; |
/* extract 8 coeffs from U,V */ |
for (j = 0; j < chrFilterSize; j++) { |
X = vec_ld(0, &chrUSrc[j][i / 2]); |
U = vec_mradds(X, CCoeffs[j], U); |
X = vec_ld(0, &chrVSrc[j][i / 2]); |
V = vec_mradds(X, CCoeffs[j], V); |
} |
/* scale and clip signals */ |
Y0 = vec_sra(Y0, SCL); |
Y1 = vec_sra(Y1, SCL); |
U = vec_sra(U, SCL); |
V = vec_sra(V, SCL); |
Y0 = vec_clip_s16(Y0); |
Y1 = vec_clip_s16(Y1); |
U = vec_clip_s16(U); |
V = vec_clip_s16(V); |
/* now we have |
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 |
* U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7 |
* |
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 |
* U0 = u0 u0 u1 u1 u2 u2 u3 u3 U1 = u4 u4 u5 u5 u6 u6 u7 u7 |
* V0 = v0 v0 v1 v1 v2 v2 v3 v3 V1 = v4 v4 v5 v5 v6 v6 v7 v7 |
*/ |
U0 = vec_mergeh(U, U); |
V0 = vec_mergeh(V, V); |
U1 = vec_mergel(U, U); |
V1 = vec_mergel(V, V); |
cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0); |
cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1); |
R = vec_packclp(R0, R1); |
G = vec_packclp(G0, G1); |
B = vec_packclp(B0, B1); |
switch (target) { |
case AV_PIX_FMT_ABGR: |
out_abgr(R, G, B, out); |
break; |
case AV_PIX_FMT_BGRA: |
out_bgra(R, G, B, out); |
break; |
case AV_PIX_FMT_RGBA: |
out_rgba(R, G, B, out); |
break; |
case AV_PIX_FMT_ARGB: |
out_argb(R, G, B, out); |
break; |
case AV_PIX_FMT_RGB24: |
out_rgb24(R, G, B, out); |
break; |
case AV_PIX_FMT_BGR24: |
out_bgr24(R, G, B, out); |
break; |
default: |
{ |
/* If this is reached, the caller should have called yuv2packedXinC |
* instead. */ |
static int printed_error_message; |
if (!printed_error_message) { |
av_log(c, AV_LOG_ERROR, |
"altivec_yuv2packedX doesn't support %s output\n", |
av_get_pix_fmt_name(c->dstFormat)); |
printed_error_message = 1; |
} |
return; |
} |
} |
} |
if (i < dstW) { |
i -= 16; |
Y0 = RND; |
Y1 = RND; |
/* extract 16 coeffs from lumSrc */ |
for (j = 0; j < lumFilterSize; j++) { |
X0 = vec_ld(0, &lumSrc[j][i]); |
X1 = vec_ld(16, &lumSrc[j][i]); |
Y0 = vec_mradds(X0, YCoeffs[j], Y0); |
Y1 = vec_mradds(X1, YCoeffs[j], Y1); |
} |
U = RND; |
V = RND; |
/* extract 8 coeffs from U,V */ |
for (j = 0; j < chrFilterSize; j++) { |
X = vec_ld(0, &chrUSrc[j][i / 2]); |
U = vec_mradds(X, CCoeffs[j], U); |
X = vec_ld(0, &chrVSrc[j][i / 2]); |
V = vec_mradds(X, CCoeffs[j], V); |
} |
/* scale and clip signals */ |
Y0 = vec_sra(Y0, SCL); |
Y1 = vec_sra(Y1, SCL); |
U = vec_sra(U, SCL); |
V = vec_sra(V, SCL); |
Y0 = vec_clip_s16(Y0); |
Y1 = vec_clip_s16(Y1); |
U = vec_clip_s16(U); |
V = vec_clip_s16(V); |
/* now we have |
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 |
* U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7 |
* |
* Y0 = y0 y1 y2 y3 y4 y5 y6 y7 Y1 = y8 y9 y10 y11 y12 y13 y14 y15 |
* U0 = u0 u0 u1 u1 u2 u2 u3 u3 U1 = u4 u4 u5 u5 u6 u6 u7 u7 |
* V0 = v0 v0 v1 v1 v2 v2 v3 v3 V1 = v4 v4 v5 v5 v6 v6 v7 v7 |
*/ |
U0 = vec_mergeh(U, U); |
V0 = vec_mergeh(V, V); |
U1 = vec_mergel(U, U); |
V1 = vec_mergel(V, V); |
cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0); |
cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1); |
R = vec_packclp(R0, R1); |
G = vec_packclp(G0, G1); |
B = vec_packclp(B0, B1); |
nout = (vector unsigned char *) scratch; |
switch (target) { |
case AV_PIX_FMT_ABGR: |
out_abgr(R, G, B, nout); |
break; |
case AV_PIX_FMT_BGRA: |
out_bgra(R, G, B, nout); |
break; |
case AV_PIX_FMT_RGBA: |
out_rgba(R, G, B, nout); |
break; |
case AV_PIX_FMT_ARGB: |
out_argb(R, G, B, nout); |
break; |
case AV_PIX_FMT_RGB24: |
out_rgb24(R, G, B, nout); |
break; |
case AV_PIX_FMT_BGR24: |
out_bgr24(R, G, B, nout); |
break; |
default: |
/* Unreachable, I think. */ |
av_log(c, AV_LOG_ERROR, |
"altivec_yuv2packedX doesn't support %s output\n", |
av_get_pix_fmt_name(c->dstFormat)); |
return; |
} |
memcpy(&((uint32_t *) dest)[i], scratch, (dstW - i) / 4); |
} |
} |
#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \ |
void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ |
const int16_t *lumFilter, \ |
const int16_t **lumSrc, \ |
int lumFilterSize, \ |
const int16_t *chrFilter, \ |
const int16_t **chrUSrc, \ |
const int16_t **chrVSrc, \ |
int chrFilterSize, \ |
const int16_t **alpSrc, \ |
uint8_t *dest, int dstW, int dstY) \ |
{ \ |
yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \ |
chrFilter, chrUSrc, chrVSrc, \ |
chrFilterSize, alpSrc, \ |
dest, dstW, dstY, pixfmt); \ |
} |
YUV2PACKEDX_WRAPPER(abgr, AV_PIX_FMT_ABGR); |
YUV2PACKEDX_WRAPPER(bgra, AV_PIX_FMT_BGRA); |
YUV2PACKEDX_WRAPPER(argb, AV_PIX_FMT_ARGB); |
YUV2PACKEDX_WRAPPER(rgba, AV_PIX_FMT_RGBA); |
YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24); |
YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24); |
#endif /* HAVE_ALTIVEC */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/ppc/yuv2rgb_altivec.h |
---|
0,0 → 1,51 |
/* |
* AltiVec-enhanced yuv2yuvX |
* |
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> |
* based on the equivalent C code in swscale.c |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H |
#define SWSCALE_PPC_YUV2RGB_ALTIVEC_H |
#include <stdint.h> |
#include "libswscale/swscale_internal.h" |
#define YUV2PACKEDX_HEADER(suffix) \ |
void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ |
const int16_t *lumFilter, \ |
const int16_t **lumSrc, \ |
int lumFilterSize, \ |
const int16_t *chrFilter, \ |
const int16_t **chrUSrc, \ |
const int16_t **chrVSrc, \ |
int chrFilterSize, \ |
const int16_t **alpSrc, \ |
uint8_t *dest, \ |
int dstW, int dstY); |
YUV2PACKEDX_HEADER(abgr); |
YUV2PACKEDX_HEADER(bgra); |
YUV2PACKEDX_HEADER(argb); |
YUV2PACKEDX_HEADER(rgba); |
YUV2PACKEDX_HEADER(rgb24); |
YUV2PACKEDX_HEADER(bgr24); |
#endif /* SWSCALE_PPC_YUV2RGB_ALTIVEC_H */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/ppc/yuv2yuv_altivec.c |
---|
0,0 → 1,204 |
/* |
* AltiVec-enhanced yuv-to-yuv conversion routines. |
* |
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> |
* based on the equivalent C code in swscale.c |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <inttypes.h> |
#include "config.h" |
#include "libavutil/attributes.h" |
#include "libavutil/cpu.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#if HAVE_ALTIVEC |
static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t *dstParam[], |
int dstStride_a[]) |
{ |
uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; |
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, |
// srcStride[0], srcStride[1], dstStride[0]); |
const uint8_t *ysrc = src[0]; |
const uint8_t *usrc = src[1]; |
const uint8_t *vsrc = src[2]; |
const int width = c->srcW; |
const int height = srcSliceH; |
const int lumStride = srcStride[0]; |
const int chromStride = srcStride[1]; |
const int dstStride = dstStride_a[0]; |
const vector unsigned char yperm = vec_lvsl(0, ysrc); |
const int vertLumPerChroma = 2; |
register unsigned int y; |
/* This code assumes: |
* |
* 1) dst is 16 bytes-aligned |
* 2) dstStride is a multiple of 16 |
* 3) width is a multiple of 16 |
* 4) lum & chrom stride are multiples of 8 |
*/ |
for (y = 0; y < height; y++) { |
int i; |
for (i = 0; i < width - 31; i += 32) { |
const unsigned int j = i >> 1; |
vector unsigned char v_yA = vec_ld(i, ysrc); |
vector unsigned char v_yB = vec_ld(i + 16, ysrc); |
vector unsigned char v_yC = vec_ld(i + 32, ysrc); |
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); |
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); |
vector unsigned char v_uA = vec_ld(j, usrc); |
vector unsigned char v_uB = vec_ld(j + 16, usrc); |
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); |
vector unsigned char v_vA = vec_ld(j, vsrc); |
vector unsigned char v_vB = vec_ld(j + 16, vsrc); |
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); |
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); |
vector unsigned char v_uv_b = vec_mergel(v_u, v_v); |
vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); |
vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); |
vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b); |
vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b); |
vec_st(v_yuy2_0, (i << 1), dst); |
vec_st(v_yuy2_1, (i << 1) + 16, dst); |
vec_st(v_yuy2_2, (i << 1) + 32, dst); |
vec_st(v_yuy2_3, (i << 1) + 48, dst); |
} |
if (i < width) { |
const unsigned int j = i >> 1; |
vector unsigned char v_y1 = vec_ld(i, ysrc); |
vector unsigned char v_u = vec_ld(j, usrc); |
vector unsigned char v_v = vec_ld(j, vsrc); |
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); |
vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); |
vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); |
vec_st(v_yuy2_0, (i << 1), dst); |
vec_st(v_yuy2_1, (i << 1) + 16, dst); |
} |
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { |
usrc += chromStride; |
vsrc += chromStride; |
} |
ysrc += lumStride; |
dst += dstStride; |
} |
return srcSliceH; |
} |
static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t *dstParam[], |
int dstStride_a[]) |
{ |
uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; |
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, |
// srcStride[0], srcStride[1], dstStride[0]); |
const uint8_t *ysrc = src[0]; |
const uint8_t *usrc = src[1]; |
const uint8_t *vsrc = src[2]; |
const int width = c->srcW; |
const int height = srcSliceH; |
const int lumStride = srcStride[0]; |
const int chromStride = srcStride[1]; |
const int dstStride = dstStride_a[0]; |
const int vertLumPerChroma = 2; |
const vector unsigned char yperm = vec_lvsl(0, ysrc); |
register unsigned int y; |
/* This code assumes: |
* |
* 1) dst is 16 bytes-aligned |
* 2) dstStride is a multiple of 16 |
* 3) width is a multiple of 16 |
* 4) lum & chrom stride are multiples of 8 |
*/ |
for (y = 0; y < height; y++) { |
int i; |
for (i = 0; i < width - 31; i += 32) { |
const unsigned int j = i >> 1; |
vector unsigned char v_yA = vec_ld(i, ysrc); |
vector unsigned char v_yB = vec_ld(i + 16, ysrc); |
vector unsigned char v_yC = vec_ld(i + 32, ysrc); |
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); |
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); |
vector unsigned char v_uA = vec_ld(j, usrc); |
vector unsigned char v_uB = vec_ld(j + 16, usrc); |
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); |
vector unsigned char v_vA = vec_ld(j, vsrc); |
vector unsigned char v_vB = vec_ld(j + 16, vsrc); |
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); |
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); |
vector unsigned char v_uv_b = vec_mergel(v_u, v_v); |
vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); |
vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); |
vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2); |
vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2); |
vec_st(v_uyvy_0, (i << 1), dst); |
vec_st(v_uyvy_1, (i << 1) + 16, dst); |
vec_st(v_uyvy_2, (i << 1) + 32, dst); |
vec_st(v_uyvy_3, (i << 1) + 48, dst); |
} |
if (i < width) { |
const unsigned int j = i >> 1; |
vector unsigned char v_y1 = vec_ld(i, ysrc); |
vector unsigned char v_u = vec_ld(j, usrc); |
vector unsigned char v_v = vec_ld(j, vsrc); |
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); |
vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); |
vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); |
vec_st(v_uyvy_0, (i << 1), dst); |
vec_st(v_uyvy_1, (i << 1) + 16, dst); |
} |
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { |
usrc += chromStride; |
vsrc += chromStride; |
} |
ysrc += lumStride; |
dst += dstStride; |
} |
return srcSliceH; |
} |
#endif /* HAVE_ALTIVEC */ |
av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c) |
{ |
#if HAVE_ALTIVEC |
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
return; |
if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) && |
c->srcFormat == AV_PIX_FMT_YUV420P) { |
enum AVPixelFormat dstFormat = c->dstFormat; |
// unscaled YV12 -> packed YUV, we want speed |
if (dstFormat == AV_PIX_FMT_YUYV422) |
c->swscale = yv12toyuy2_unscaled_altivec; |
else if (dstFormat == AV_PIX_FMT_UYVY422) |
c->swscale = yv12touyvy_unscaled_altivec; |
} |
#endif /* HAVE_ALTIVEC */ |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/rgb2rgb.c |
---|
0,0 → 1,393 |
/* |
* software RGB to RGB converter |
* pluralize by software PAL8 to RGB converter |
* software YUV to YUV converter |
* software YUV to RGB converter |
* Written by Nick Kurshev. |
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <inttypes.h> |
#include "libavutil/attributes.h" |
#include "libavutil/bswap.h" |
#include "config.h" |
#include "rgb2rgb.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); |
void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, |
uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride); |
void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, |
uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride, |
int32_t *rgb2yuv); |
void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, |
int srcStride, int dstStride); |
void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, |
int width, int height, int src1Stride, |
int src2Stride, int dstStride); |
void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, |
int width, int height, int srcStride, |
int dst1Stride, int dst2Stride); |
void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
uint8_t *dst1, uint8_t *dst2, |
int width, int height, |
int srcStride1, int srcStride2, |
int dstStride1, int dstStride2); |
void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, |
const uint8_t *src3, uint8_t *dst, |
int width, int height, |
int srcStride1, int srcStride2, |
int srcStride3, int dstStride); |
void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride); |
void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride); |
void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride); |
void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride); |
#define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define GY ((int)( 0.504 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define GV ((int)(-0.368 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define GU ((int)(-0.291 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define RY ((int)( 0.257 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define RV ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define RU ((int)(-0.148 * (1 << RGB2YUV_SHIFT) + 0.5)) |
//plain C versions |
#include "rgb2rgb_template.c" |
/* |
* RGB15->RGB16 original by Strepto/Astral |
* ported to gcc & bugfixed : A'rpi |
* MMXEXT, 3DNOW optimization by Nick Kurshev |
* 32-bit C version, and and&add trick by Michael Niedermayer |
*/ |
av_cold void sws_rgb2rgb_init(void) |
{ |
rgb2rgb_init_c(); |
if (ARCH_X86) |
rgb2rgb_init_x86(); |
} |
void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
int i, num_pixels = src_size >> 2; |
for (i = 0; i < num_pixels; i++) { |
#if HAVE_BIGENDIAN |
/* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */ |
dst[3 * i + 0] = src[4 * i + 1]; |
dst[3 * i + 1] = src[4 * i + 2]; |
dst[3 * i + 2] = src[4 * i + 3]; |
#else |
dst[3 * i + 0] = src[4 * i + 2]; |
dst[3 * i + 1] = src[4 * i + 1]; |
dst[3 * i + 2] = src[4 * i + 0]; |
#endif |
} |
} |
void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
int i; |
for (i = 0; 3 * i < src_size; i++) { |
#if HAVE_BIGENDIAN |
/* RGB24 (= R, G, B) -> BGR32 (= A, R, G, B) */ |
dst[4 * i + 0] = 255; |
dst[4 * i + 1] = src[3 * i + 0]; |
dst[4 * i + 2] = src[3 * i + 1]; |
dst[4 * i + 3] = src[3 * i + 2]; |
#else |
dst[4 * i + 0] = src[3 * i + 2]; |
dst[4 * i + 1] = src[3 * i + 1]; |
dst[4 * i + 2] = src[3 * i + 0]; |
dst[4 * i + 3] = 255; |
#endif |
} |
} |
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
#if HAVE_BIGENDIAN |
*d++ = 255; |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
#else |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = 255; |
#endif |
} |
} |
void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint16_t rgb, r, g, b; |
uint16_t *d = (uint16_t *)dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
rgb = *s++; |
r = rgb & 0xF00; |
g = rgb & 0x0F0; |
b = rgb & 0x00F; |
r = (r << 3) | ((r & 0x800) >> 1); |
g = (g << 2) | ((g & 0x080) >> 2); |
b = (b << 1) | ( b >> 3); |
*d++ = r | g | b; |
} |
} |
void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
} |
} |
void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
int i, num_pixels = src_size >> 1; |
for (i = 0; i < num_pixels; i++) { |
unsigned rgb = ((const uint16_t *)src)[i]; |
((uint16_t *)dst)[i] = (rgb >> 11) | (rgb & 0x7E0) | (rgb << 11); |
} |
} |
void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
int i, num_pixels = src_size >> 1; |
for (i = 0; i < num_pixels; i++) { |
unsigned rgb = ((const uint16_t *)src)[i]; |
((uint16_t *)dst)[i] = (rgb >> 11) | ((rgb & 0x7C0) >> 1) | ((rgb & 0x1F) << 10); |
} |
} |
void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
#if HAVE_BIGENDIAN |
*d++ = 255; |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
#else |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = 255; |
#endif |
} |
} |
void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
} |
} |
void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
int i, num_pixels = src_size >> 1; |
for (i = 0; i < num_pixels; i++) { |
unsigned rgb = ((const uint16_t *)src)[i]; |
((uint16_t *)dst)[i] = ((rgb & 0x7C00) >> 10) | ((rgb & 0x3E0) << 1) | (rgb << 11); |
} |
} |
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
int i, num_pixels = src_size >> 1; |
for (i = 0; i < num_pixels; i++) { |
unsigned rgb = ((const uint16_t *)src)[i]; |
unsigned br = rgb & 0x7C1F; |
((uint16_t *)dst)[i] = (br >> 10) | (rgb & 0x3E0) | (br << 10); |
} |
} |
void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
uint16_t *s = (uint16_t *)src; |
int i, num_pixels = src_size >> 1; |
for (i = 0; i < num_pixels; i++) { |
unsigned rgb = s[i]; |
d[i] = (rgb << 8 | rgb & 0xF0 | rgb >> 8) & 0xFFF; |
} |
} |
#define DEFINE_SHUFFLE_BYTES(a, b, c, d) \ |
void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src, \ |
uint8_t *dst, int src_size) \ |
{ \ |
int i; \ |
\ |
for (i = 0; i < src_size; i += 4) { \ |
dst[i + 0] = src[i + a]; \ |
dst[i + 1] = src[i + b]; \ |
dst[i + 2] = src[i + c]; \ |
dst[i + 3] = src[i + d]; \ |
} \ |
} |
DEFINE_SHUFFLE_BYTES(1, 2, 3, 0) |
DEFINE_SHUFFLE_BYTES(3, 0, 1, 2) |
DEFINE_SHUFFLE_BYTES(3, 2, 1, 0) |
#define DEFINE_RGB48TOBGR48(need_bswap, swap) \ |
void rgb48tobgr48_ ## need_bswap(const uint8_t *src, \ |
uint8_t *dst, int src_size) \ |
{ \ |
uint16_t *d = (uint16_t *)dst; \ |
uint16_t *s = (uint16_t *)src; \ |
int i, num_pixels = src_size >> 1; \ |
\ |
for (i = 0; i < num_pixels; i += 3) { \ |
d[i ] = swap ? av_bswap16(s[i + 2]) : s[i + 2]; \ |
d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1]; \ |
d[i + 2] = swap ? av_bswap16(s[i ]) : s[i ]; \ |
} \ |
} |
DEFINE_RGB48TOBGR48(nobswap, 0) |
DEFINE_RGB48TOBGR48(bswap, 1) |
#define DEFINE_RGB64TOBGR48(need_bswap, swap) \ |
void rgb64tobgr48_ ## need_bswap(const uint8_t *src, \ |
uint8_t *dst, int src_size) \ |
{ \ |
uint16_t *d = (uint16_t *)dst; \ |
uint16_t *s = (uint16_t *)src; \ |
int i, num_pixels = src_size >> 3; \ |
\ |
for (i = 0; i < num_pixels; i++) { \ |
d[3 * i ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \ |
d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \ |
d[3 * i + 2] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \ |
} \ |
} |
DEFINE_RGB64TOBGR48(nobswap, 0) |
DEFINE_RGB64TOBGR48(bswap, 1) |
#define DEFINE_RGB64TO48(need_bswap, swap) \ |
void rgb64to48_ ## need_bswap(const uint8_t *src, \ |
uint8_t *dst, int src_size) \ |
{ \ |
uint16_t *d = (uint16_t *)dst; \ |
uint16_t *s = (uint16_t *)src; \ |
int i, num_pixels = src_size >> 3; \ |
\ |
for (i = 0; i < num_pixels; i++) { \ |
d[3 * i ] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \ |
d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \ |
d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \ |
} \ |
} |
DEFINE_RGB64TO48(nobswap, 0) |
DEFINE_RGB64TO48(bswap, 1) |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/rgb2rgb.h |
---|
0,0 → 1,171 |
/* |
* software RGB to RGB converter |
* pluralize by Software PAL8 to RGB converter |
* Software YUV to YUV converter |
* Software YUV to RGB converter |
* Written by Nick Kurshev. |
* YUV & runtime CPU stuff by Michael (michaelni@gmx.at) |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#ifndef SWSCALE_RGB2RGB_H |
#define SWSCALE_RGB2RGB_H |
#include <inttypes.h> |
#include "libavutil/avutil.h" |
#include "swscale.h" |
/* A full collection of RGB to RGB(BGR) converters */ |
extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size); |
extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size); |
void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); |
void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); |
void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size); |
void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size); |
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, |
uint8_t *vdst, int width, int height, int lumStride, |
int chromStride, int srcStride, int32_t *rgb2yuv); |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
/** |
* Width should be a multiple of 16. |
*/ |
extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride); |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
/** |
* Width should be a multiple of 16. |
*/ |
extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride); |
/** |
* Height should be a multiple of 2 and width should be a multiple of 2. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
* Chrominance data is only taken from every second line, others are ignored. |
* FIXME: Write high quality version. |
*/ |
extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride, |
int32_t *rgb2yuv); |
extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, |
int srcStride, int dstStride); |
extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, |
int width, int height, int src1Stride, |
int src2Stride, int dstStride); |
extern void (*deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, |
int width, int height, int srcStride, |
int dst1Stride, int dst2Stride); |
extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
uint8_t *dst1, uint8_t *dst2, |
int width, int height, |
int srcStride1, int srcStride2, |
int dstStride1, int dstStride2); |
extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, |
uint8_t *dst, |
int width, int height, |
int srcStride1, int srcStride2, |
int srcStride3, int dstStride); |
extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride); |
extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride); |
extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride); |
extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride); |
void sws_rgb2rgb_init(void); |
void rgb2rgb_init_x86(void); |
#endif /* SWSCALE_RGB2RGB_H */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/rgb2rgb_template.c |
---|
0,0 → 1,971 |
/* |
* software RGB to RGB converter |
* pluralize by software PAL8 to RGB converter |
* software YUV to YUV converter |
* software YUV to RGB converter |
* Written by Nick Kurshev. |
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) |
* lot of big-endian byte order fixes by Alex Beregszaszi |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stddef.h> |
#include "libavutil/attributes.h" |
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint8_t *dest = dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
#if HAVE_BIGENDIAN |
/* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */ |
*dest++ = 255; |
*dest++ = s[2]; |
*dest++ = s[1]; |
*dest++ = s[0]; |
s += 3; |
#else |
*dest++ = *s++; |
*dest++ = *s++; |
*dest++ = *s++; |
*dest++ = 255; |
#endif |
} |
} |
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint8_t *dest = dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
#if HAVE_BIGENDIAN |
/* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */ |
s++; |
dest[2] = *s++; |
dest[1] = *s++; |
dest[0] = *s++; |
dest += 3; |
#else |
*dest++ = *s++; |
*dest++ = *s++; |
*dest++ = *s++; |
s++; |
#endif |
} |
} |
/* |
* original by Strepto/Astral |
* ported to gcc & bugfixed: A'rpi |
* MMXEXT, 3DNOW optimization by Nick Kurshev |
* 32-bit C version, and and&add trick by Michael Niedermayer |
*/ |
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
register uint8_t *d = dst; |
register const uint8_t *s = src; |
register const uint8_t *end = s + src_size; |
const uint8_t *mm_end = end - 3; |
while (s < mm_end) { |
register unsigned x = *((const uint32_t *)s); |
*((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0); |
d += 4; |
s += 4; |
} |
if (s < end) { |
register unsigned short x = *((const uint16_t *)s); |
*((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0); |
} |
} |
static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
register uint8_t *d = dst; |
register const uint8_t *s = src; |
register const uint8_t *end = s + src_size; |
const uint8_t *mm_end = end - 3; |
while (s < mm_end) { |
register uint32_t x = *((const uint32_t *)s); |
*((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F); |
s += 4; |
d += 4; |
} |
if (s < end) { |
register uint16_t x = *((const uint16_t *)s); |
*((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F); |
} |
} |
static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
register int rgb = *(const uint32_t *)s; |
s += 4; |
*d++ = ((rgb & 0xFF) >> 3) + |
((rgb & 0xFC00) >> 5) + |
((rgb & 0xF80000) >> 8); |
} |
} |
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
register int rgb = *(const uint32_t *)s; |
s += 4; |
*d++ = ((rgb & 0xF8) << 8) + |
((rgb & 0xFC00) >> 5) + |
((rgb & 0xF80000) >> 19); |
} |
} |
static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
register int rgb = *(const uint32_t *)s; |
s += 4; |
*d++ = ((rgb & 0xFF) >> 3) + |
((rgb & 0xF800) >> 6) + |
((rgb & 0xF80000) >> 9); |
} |
} |
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
register int rgb = *(const uint32_t *)s; |
s += 4; |
*d++ = ((rgb & 0xF8) << 7) + |
((rgb & 0xF800) >> 6) + |
((rgb & 0xF80000) >> 19); |
} |
} |
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
const int b = *s++; |
const int g = *s++; |
const int r = *s++; |
*d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8); |
} |
} |
static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
const int r = *s++; |
const int g = *s++; |
const int b = *s++; |
*d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8); |
} |
} |
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
const int b = *s++; |
const int g = *s++; |
const int r = *s++; |
*d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7); |
} |
} |
static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint16_t *d = (uint16_t *)dst; |
const uint8_t *s = src; |
const uint8_t *end = s + src_size; |
while (s < end) { |
const int r = *s++; |
const int g = *s++; |
const int b = *s++; |
*d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7); |
} |
} |
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
} |
} |
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
uint8_t *d = (uint8_t *)dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
} |
} |
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
#if HAVE_BIGENDIAN |
*d++ = 255; |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
#else |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
*d++ = 255; |
#endif |
} |
} |
static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
const uint16_t *end = s + src_size / 2; |
while (s < end) { |
register uint16_t bgr = *s++; |
#if HAVE_BIGENDIAN |
*d++ = 255; |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
#else |
*d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
*d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
*d++ = 255; |
#endif |
} |
} |
static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
int idx = 15 - src_size; |
const uint8_t *s = src - idx; |
uint8_t *d = dst - idx; |
for (; idx < 15; idx += 4) { |
register unsigned v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; |
v &= 0xff00ff; |
*(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16); |
} |
} |
static inline void shuffle_bytes_0321_c(const uint8_t *src, uint8_t *dst, |
int src_size) |
{ |
int idx = 15 - src_size; |
const uint8_t *s = src - idx; |
uint8_t *d = dst - idx; |
for (; idx < 15; idx += 4) { |
register unsigned v = *(const uint32_t *)&s[idx], g = v & 0x00ff00ff; |
v &= 0xff00ff00; |
*(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16); |
} |
} |
static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
unsigned i; |
for (i = 0; i < src_size; i += 3) { |
register uint8_t x = src[i + 2]; |
dst[i + 1] = src[i + 1]; |
dst[i + 2] = src[i + 0]; |
dst[i + 0] = x; |
} |
} |
static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, |
int dstStride, int vertLumPerChroma) |
{ |
int y, i; |
const int chromWidth = width >> 1; |
for (y = 0; y < height; y++) { |
#if HAVE_FAST_64BIT |
uint64_t *ldst = (uint64_t *)dst; |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
for (i = 0; i < chromWidth; i += 2) { |
uint64_t k = yc[0] + (uc[0] << 8) + |
(yc[1] << 16) + ((unsigned) vc[0] << 24); |
uint64_t l = yc[2] + (uc[1] << 8) + |
(yc[3] << 16) + ((unsigned) vc[1] << 24); |
*ldst++ = k + (l << 32); |
yc += 4; |
uc += 2; |
vc += 2; |
} |
#else |
int *idst = (int32_t *)dst; |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
for (i = 0; i < chromWidth; i++) { |
#if HAVE_BIGENDIAN |
*idst++ = (yc[0] << 24) + (uc[0] << 16) + |
(yc[1] << 8) + (vc[0] << 0); |
#else |
*idst++ = yc[0] + (uc[0] << 8) + |
(yc[1] << 16) + (vc[0] << 24); |
#endif |
yc += 2; |
uc++; |
vc++; |
} |
#endif |
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { |
usrc += chromStride; |
vsrc += chromStride; |
} |
ysrc += lumStride; |
dst += dstStride; |
} |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, int lumStride, |
int chromStride, int dstStride) |
{ |
//FIXME interpolate chroma |
yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
chromStride, dstStride, 2); |
} |
static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, |
int dstStride, int vertLumPerChroma) |
{ |
int y, i; |
const int chromWidth = width >> 1; |
for (y = 0; y < height; y++) { |
#if HAVE_FAST_64BIT |
uint64_t *ldst = (uint64_t *)dst; |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
for (i = 0; i < chromWidth; i += 2) { |
uint64_t k = uc[0] + (yc[0] << 8) + |
(vc[0] << 16) + ((unsigned) yc[1] << 24); |
uint64_t l = uc[1] + (yc[2] << 8) + |
(vc[1] << 16) + ((unsigned) yc[3] << 24); |
*ldst++ = k + (l << 32); |
yc += 4; |
uc += 2; |
vc += 2; |
} |
#else |
int *idst = (int32_t *)dst; |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
for (i = 0; i < chromWidth; i++) { |
#if HAVE_BIGENDIAN |
*idst++ = (uc[0] << 24) + (yc[0] << 16) + |
(vc[0] << 8) + (yc[1] << 0); |
#else |
*idst++ = uc[0] + (yc[0] << 8) + |
(vc[0] << 16) + (yc[1] << 24); |
#endif |
yc += 2; |
uc++; |
vc++; |
} |
#endif |
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { |
usrc += chromStride; |
vsrc += chromStride; |
} |
ysrc += lumStride; |
dst += dstStride; |
} |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16 |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, int lumStride, |
int chromStride, int dstStride) |
{ |
//FIXME interpolate chroma |
yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
chromStride, dstStride, 2); |
} |
/** |
* Width should be a multiple of 16. |
*/ |
static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, int lumStride, |
int chromStride, int dstStride) |
{ |
yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
chromStride, dstStride, 1); |
} |
/** |
* Width should be a multiple of 16. |
*/ |
static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
const uint8_t *vsrc, uint8_t *dst, |
int width, int height, int lumStride, |
int chromStride, int dstStride) |
{ |
yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
chromStride, dstStride, 1); |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst, |
uint8_t *udst, uint8_t *vdst, |
int width, int height, int lumStride, |
int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = width >> 1; |
for (y = 0; y < height; y += 2) { |
int i; |
for (i = 0; i < chromWidth; i++) { |
ydst[2 * i + 0] = src[4 * i + 0]; |
udst[i] = src[4 * i + 1]; |
ydst[2 * i + 1] = src[4 * i + 2]; |
vdst[i] = src[4 * i + 3]; |
} |
ydst += lumStride; |
src += srcStride; |
for (i = 0; i < chromWidth; i++) { |
ydst[2 * i + 0] = src[4 * i + 0]; |
ydst[2 * i + 1] = src[4 * i + 2]; |
} |
udst += chromStride; |
vdst += chromStride; |
ydst += lumStride; |
src += srcStride; |
} |
} |
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth, |
int srcHeight, int srcStride, int dstStride) |
{ |
int x, y; |
dst[0] = src[0]; |
// first line |
for (x = 0; x < srcWidth - 1; x++) { |
dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2; |
dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2; |
} |
dst[2 * srcWidth - 1] = src[srcWidth - 1]; |
dst += dstStride; |
for (y = 1; y < srcHeight; y++) { |
const int mmxSize = 1; |
dst[0] = (src[0] * 3 + src[srcStride]) >> 2; |
dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2; |
for (x = mmxSize - 1; x < srcWidth - 1; x++) { |
dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2; |
dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2; |
dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2; |
dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2; |
} |
dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2; |
dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2; |
dst += dstStride * 2; |
src += srcStride; |
} |
// last line |
dst[0] = src[0]; |
for (x = 0; x < srcWidth - 1; x++) { |
dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2; |
dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2; |
} |
dst[2 * srcWidth - 1] = src[srcWidth - 1]; |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
* Chrominance data is only taken from every second line, others are ignored. |
* FIXME: Write HQ version. |
*/ |
static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, |
uint8_t *udst, uint8_t *vdst, |
int width, int height, int lumStride, |
int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = width >> 1; |
for (y = 0; y < height; y += 2) { |
int i; |
for (i = 0; i < chromWidth; i++) { |
udst[i] = src[4 * i + 0]; |
ydst[2 * i + 0] = src[4 * i + 1]; |
vdst[i] = src[4 * i + 2]; |
ydst[2 * i + 1] = src[4 * i + 3]; |
} |
ydst += lumStride; |
src += srcStride; |
for (i = 0; i < chromWidth; i++) { |
ydst[2 * i + 0] = src[4 * i + 1]; |
ydst[2 * i + 1] = src[4 * i + 3]; |
} |
udst += chromStride; |
vdst += chromStride; |
ydst += lumStride; |
src += srcStride; |
} |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 2. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
* Chrominance data is only taken from every second line, |
* others are ignored in the C version. |
* FIXME: Write HQ version. |
*/ |
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, |
uint8_t *vdst, int width, int height, int lumStride, |
int chromStride, int srcStride, int32_t *rgb2yuv) |
{ |
int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
int y; |
const int chromWidth = width >> 1; |
for (y = 0; y < height; y += 2) { |
int i; |
for (i = 0; i < chromWidth; i++) { |
unsigned int b = src[6 * i + 0]; |
unsigned int g = src[6 * i + 1]; |
unsigned int r = src[6 * i + 2]; |
unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; |
unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; |
udst[i] = U; |
vdst[i] = V; |
ydst[2 * i] = Y; |
b = src[6 * i + 3]; |
g = src[6 * i + 4]; |
r = src[6 * i + 5]; |
Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
ydst[2 * i + 1] = Y; |
} |
ydst += lumStride; |
src += srcStride; |
if (y+1 == height) |
break; |
for (i = 0; i < chromWidth; i++) { |
unsigned int b = src[6 * i + 0]; |
unsigned int g = src[6 * i + 1]; |
unsigned int r = src[6 * i + 2]; |
unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
ydst[2 * i] = Y; |
b = src[6 * i + 3]; |
g = src[6 * i + 4]; |
r = src[6 * i + 5]; |
Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
ydst[2 * i + 1] = Y; |
} |
udst += chromStride; |
vdst += chromStride; |
ydst += lumStride; |
src += srcStride; |
} |
} |
static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, |
uint8_t *dest, int width, int height, |
int src1Stride, int src2Stride, int dstStride) |
{ |
int h; |
for (h = 0; h < height; h++) { |
int w; |
for (w = 0; w < width; w++) { |
dest[2 * w + 0] = src1[w]; |
dest[2 * w + 1] = src2[w]; |
} |
dest += dstStride; |
src1 += src1Stride; |
src2 += src2Stride; |
} |
} |
static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, |
int width, int height, int srcStride, |
int dst1Stride, int dst2Stride) |
{ |
int h; |
for (h = 0; h < height; h++) { |
int w; |
for (w = 0; w < width; w++) { |
dst1[w] = src[2 * w + 0]; |
dst2[w] = src[2 * w + 1]; |
} |
src += srcStride; |
dst1 += dst1Stride; |
dst2 += dst2Stride; |
} |
} |
static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2, |
uint8_t *dst1, uint8_t *dst2, |
int width, int height, |
int srcStride1, int srcStride2, |
int dstStride1, int dstStride2) |
{ |
int x, y; |
int w = width / 2; |
int h = height / 2; |
for (y = 0; y < h; y++) { |
const uint8_t *s1 = src1 + srcStride1 * (y >> 1); |
uint8_t *d = dst1 + dstStride1 * y; |
for (x = 0; x < w; x++) |
d[2 * x] = d[2 * x + 1] = s1[x]; |
} |
for (y = 0; y < h; y++) { |
const uint8_t *s2 = src2 + srcStride2 * (y >> 1); |
uint8_t *d = dst2 + dstStride2 * y; |
for (x = 0; x < w; x++) |
d[2 * x] = d[2 * x + 1] = s2[x]; |
} |
} |
static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2, |
const uint8_t *src3, uint8_t *dst, |
int width, int height, |
int srcStride1, int srcStride2, |
int srcStride3, int dstStride) |
{ |
int x, y; |
int w = width / 2; |
int h = height; |
for (y = 0; y < h; y++) { |
const uint8_t *yp = src1 + srcStride1 * y; |
const uint8_t *up = src2 + srcStride2 * (y >> 2); |
const uint8_t *vp = src3 + srcStride3 * (y >> 2); |
uint8_t *d = dst + dstStride * y; |
for (x = 0; x < w; x++) { |
const int x2 = x << 2; |
d[8 * x + 0] = yp[x2]; |
d[8 * x + 1] = up[x]; |
d[8 * x + 2] = yp[x2 + 1]; |
d[8 * x + 3] = vp[x]; |
d[8 * x + 4] = yp[x2 + 2]; |
d[8 * x + 5] = up[x]; |
d[8 * x + 6] = yp[x2 + 3]; |
d[8 * x + 7] = vp[x]; |
} |
} |
} |
static void extract_even_c(const uint8_t *src, uint8_t *dst, int count) |
{ |
dst += count; |
src += count * 2; |
count = -count; |
while (count < 0) { |
dst[count] = src[2 * count]; |
count++; |
} |
} |
static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, |
int count) |
{ |
dst0 += count; |
dst1 += count; |
src += count * 4; |
count = -count; |
while (count < 0) { |
dst0[count] = src[4 * count + 0]; |
dst1[count] = src[4 * count + 2]; |
count++; |
} |
} |
static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1, |
uint8_t *dst0, uint8_t *dst1, int count) |
{ |
dst0 += count; |
dst1 += count; |
src0 += count * 4; |
src1 += count * 4; |
count = -count; |
while (count < 0) { |
dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1; |
dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1; |
count++; |
} |
} |
static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, |
int count) |
{ |
dst0 += count; |
dst1 += count; |
src += count * 4; |
count = -count; |
src++; |
while (count < 0) { |
dst0[count] = src[4 * count + 0]; |
dst1[count] = src[4 * count + 2]; |
count++; |
} |
} |
static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1, |
uint8_t *dst0, uint8_t *dst1, int count) |
{ |
dst0 += count; |
dst1 += count; |
src0 += count * 4; |
src1 += count * 4; |
count = -count; |
src0++; |
src1++; |
while (count < 0) { |
dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1; |
dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1; |
count++; |
} |
} |
static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y = 0; y < height; y++) { |
extract_even_c(src, ydst, width); |
if (y & 1) { |
extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth); |
udst += chromStride; |
vdst += chromStride; |
} |
src += srcStride; |
ydst += lumStride; |
} |
} |
static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y = 0; y < height; y++) { |
extract_even_c(src, ydst, width); |
extract_odd2_c(src, udst, vdst, chromWidth); |
src += srcStride; |
ydst += lumStride; |
udst += chromStride; |
vdst += chromStride; |
} |
} |
static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y = 0; y < height; y++) { |
extract_even_c(src + 1, ydst, width); |
if (y & 1) { |
extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth); |
udst += chromStride; |
vdst += chromStride; |
} |
src += srcStride; |
ydst += lumStride; |
} |
} |
static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
const uint8_t *src, int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y = 0; y < height; y++) { |
extract_even_c(src + 1, ydst, width); |
extract_even2_c(src, udst, vdst, chromWidth); |
src += srcStride; |
ydst += lumStride; |
udst += chromStride; |
vdst += chromStride; |
} |
} |
static av_cold void rgb2rgb_init_c(void) |
{ |
rgb15to16 = rgb15to16_c; |
rgb15tobgr24 = rgb15tobgr24_c; |
rgb15to32 = rgb15to32_c; |
rgb16tobgr24 = rgb16tobgr24_c; |
rgb16to32 = rgb16to32_c; |
rgb16to15 = rgb16to15_c; |
rgb24tobgr16 = rgb24tobgr16_c; |
rgb24tobgr15 = rgb24tobgr15_c; |
rgb24tobgr32 = rgb24tobgr32_c; |
rgb32to16 = rgb32to16_c; |
rgb32to15 = rgb32to15_c; |
rgb32tobgr24 = rgb32tobgr24_c; |
rgb24to15 = rgb24to15_c; |
rgb24to16 = rgb24to16_c; |
rgb24tobgr24 = rgb24tobgr24_c; |
#if HAVE_BIGENDIAN |
shuffle_bytes_0321 = shuffle_bytes_2103_c; |
shuffle_bytes_2103 = shuffle_bytes_0321_c; |
#else |
shuffle_bytes_0321 = shuffle_bytes_0321_c; |
shuffle_bytes_2103 = shuffle_bytes_2103_c; |
#endif |
rgb32tobgr16 = rgb32tobgr16_c; |
rgb32tobgr15 = rgb32tobgr15_c; |
yv12toyuy2 = yv12toyuy2_c; |
yv12touyvy = yv12touyvy_c; |
yuv422ptoyuy2 = yuv422ptoyuy2_c; |
yuv422ptouyvy = yuv422ptouyvy_c; |
yuy2toyv12 = yuy2toyv12_c; |
planar2x = planar2x_c; |
ff_rgb24toyv12 = ff_rgb24toyv12_c; |
interleaveBytes = interleaveBytes_c; |
deinterleaveBytes = deinterleaveBytes_c; |
vu9_to_vu12 = vu9_to_vu12_c; |
yvu9_to_yuy2 = yvu9_to_yuy2_c; |
uyvytoyuv420 = uyvytoyuv420_c; |
uyvytoyuv422 = uyvytoyuv422_c; |
yuyvtoyuv420 = yuyvtoyuv420_c; |
yuyvtoyuv422 = yuyvtoyuv422_c; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/slice.c |
---|
0,0 → 1,349 |
/* |
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "swscale_internal.h" |
static void free_lines(SwsSlice *s) |
{ |
int i; |
for (i = 0; i < 2; ++i) { |
int n = s->plane[i].available_lines; |
int j; |
for (j = 0; j < n; ++j) { |
av_freep(&s->plane[i].line[j]); |
if (s->is_ring) |
s->plane[i].line[j+n] = NULL; |
} |
} |
for (i = 0; i < 4; ++i) |
memset(s->plane[i].line, 0, sizeof(uint8_t*) * s->plane[i].available_lines * (s->is_ring ? 3 : 1)); |
s->should_free_lines = 0; |
} |
/* |
slice lines contains extra bytes for vetorial code thus @size |
is the allocated memory size and @width is the number of pixels |
*/ |
static int alloc_lines(SwsSlice *s, int size, int width) |
{ |
int i; |
int idx[2] = {3, 2}; |
s->should_free_lines = 1; |
s->width = width; |
for (i = 0; i < 2; ++i) { |
int n = s->plane[i].available_lines; |
int j; |
int ii = idx[i]; |
av_assert0(n == s->plane[ii].available_lines); |
for (j = 0; j < n; ++j) { |
// chroma plane line U and V are expected to be contiguous in memory |
// by mmx vertical scaler code |
s->plane[i].line[j] = av_malloc(size * 2 + 32); |
if (!s->plane[i].line[j]) { |
free_lines(s); |
return AVERROR(ENOMEM); |
} |
s->plane[ii].line[j] = s->plane[i].line[j] + size + 16; |
if (s->is_ring) { |
s->plane[i].line[j+n] = s->plane[i].line[j]; |
s->plane[ii].line[j+n] = s->plane[ii].line[j]; |
} |
} |
} |
return 0; |
} |
static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring) |
{ |
int i; |
int size[4] = { lumLines, |
chrLines, |
chrLines, |
lumLines }; |
s->h_chr_sub_sample = h_sub_sample; |
s->v_chr_sub_sample = v_sub_sample; |
s->fmt = fmt; |
s->is_ring = ring; |
s->should_free_lines = 0; |
for (i = 0; i < 4; ++i) { |
int n = size[i] * ( ring == 0 ? 1 : 3); |
s->plane[i].line = av_mallocz_array(sizeof(uint8_t*), n); |
if (!s->plane[i].line) |
return AVERROR(ENOMEM); |
s->plane[i].tmp = ring ? s->plane[i].line + size[i] * 2 : NULL; |
s->plane[i].available_lines = size[i]; |
s->plane[i].sliceY = 0; |
s->plane[i].sliceH = 0; |
} |
return 0; |
} |
static void free_slice(SwsSlice *s) |
{ |
int i; |
if (s) { |
if (s->should_free_lines) |
free_lines(s); |
for (i = 0; i < 4; ++i) { |
av_freep(&s->plane[i].line); |
s->plane[i].tmp = NULL; |
} |
} |
} |
int ff_rotate_slice(SwsSlice *s, int lum, int chr) |
{ |
int i; |
if (lum) { |
for (i = 0; i < 4; i+=3) { |
int n = s->plane[i].available_lines; |
int l = lum - s->plane[i].sliceY; |
if (l >= n * 2) { |
s->plane[i].sliceY += n; |
s->plane[i].sliceH -= n; |
} |
} |
} |
if (chr) { |
for (i = 1; i < 3; ++i) { |
int n = s->plane[i].available_lines; |
int l = chr - s->plane[i].sliceY; |
if (l >= n * 2) { |
s->plane[i].sliceY += n; |
s->plane[i].sliceH -= n; |
} |
} |
} |
return 0; |
} |
int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative) |
{ |
int i = 0; |
const int start[4] = {lumY, |
chrY, |
chrY, |
lumY}; |
const int end[4] = {lumY +lumH, |
chrY + chrH, |
chrY + chrH, |
lumY + lumH}; |
const uint8_t *src_[4] = {src[0] + (relative ? 0 : start[0]) * stride[0], |
src[1] + (relative ? 0 : start[1]) * stride[0], |
src[2] + (relative ? 0 : start[2]) * stride[0], |
src[3] + (relative ? 0 : start[3]) * stride[0]}; |
s->width = srcW; |
for (i = 0; i < 4; ++i) { |
int j; |
int first = s->plane[i].sliceY; |
int n = s->plane[i].available_lines; |
int lines = end[i] - start[i]; |
int tot_lines = end[i] - first; |
if (start[i] >= first && n >= tot_lines) { |
s->plane[i].sliceH = FFMAX(tot_lines, s->plane[i].sliceH); |
for (j = 0; j < lines; j+= 1) |
s->plane[i].line[start[i] - first + j] = src_[i] + j * stride[i]; |
} else { |
s->plane[i].sliceY = start[i]; |
lines = lines > n ? n : lines; |
s->plane[i].sliceH = lines; |
for (j = 0; j < lines; j+= 1) |
s->plane[i].line[j] = src_[i] + j * stride[i]; |
} |
} |
return 0; |
} |
static void fill_ones(SwsSlice *s, int n, int is16bit) |
{ |
int i; |
for (i = 0; i < 4; ++i) { |
int j; |
int size = s->plane[i].available_lines; |
for (j = 0; j < size; ++j) { |
int k; |
int end = is16bit ? n>>1: n; |
// fill also one extra element |
end += 1; |
if (is16bit) |
for (k = 0; k < end; ++k) |
((int32_t*)(s->plane[i].line[j]))[k] = 1<<18; |
else |
for (k = 0; k < end; ++k) |
((int16_t*)(s->plane[i].line[j]))[k] = 1<<14; |
} |
} |
} |
int ff_init_filters(SwsContext * c) |
{ |
int i; |
int index; |
int num_ydesc; |
int num_cdesc; |
int num_vdesc = isPlanarYUV(c->dstFormat) && !isGray(c->dstFormat) ? 2 : 1; |
int need_lum_conv = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar; |
int need_chr_conv = c->chrToYV12 || c->readChrPlanar; |
int need_gamma = c->is_internal_gamma; |
int srcIdx, dstIdx; |
int dst_stride = FFALIGN(c->dstW * sizeof(int16_t) + 66, 16); |
uint32_t * pal = usePal(c->srcFormat) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table; |
int res = 0; |
if (c->dstBpc == 16) |
dst_stride <<= 1; |
num_ydesc = need_lum_conv ? 2 : 1; |
num_cdesc = need_chr_conv ? 2 : 1; |
c->numSlice = FFMAX(num_ydesc, num_cdesc) + 2; |
c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0); |
c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0); |
c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0); |
c->desc = av_mallocz_array(sizeof(SwsFilterDescriptor), c->numDesc); |
if (!c->desc) |
return AVERROR(ENOMEM); |
c->slice = av_mallocz_array(sizeof(SwsSlice), c->numSlice); |
res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); |
if (res < 0) goto cleanup; |
for (i = 1; i < c->numSlice-2; ++i) { |
res = alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize + MAX_LINES_AHEAD, c->vChrFilterSize + MAX_LINES_AHEAD, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); |
if (res < 0) goto cleanup; |
res = alloc_lines(&c->slice[i], FFALIGN(c->srcW*2+78, 16), c->srcW); |
if (res < 0) goto cleanup; |
} |
// horizontal scaler output |
res = alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize + MAX_LINES_AHEAD, c->vChrFilterSize + MAX_LINES_AHEAD, c->chrDstHSubSample, c->chrDstVSubSample, 1); |
if (res < 0) goto cleanup; |
res = alloc_lines(&c->slice[i], dst_stride, c->dstW); |
if (res < 0) goto cleanup; |
fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc == 16); |
// vertical scaler output |
++i; |
res = alloc_slice(&c->slice[i], c->dstFormat, c->dstH, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0); |
if (res < 0) goto cleanup; |
index = 0; |
srcIdx = 0; |
dstIdx = 1; |
if (need_gamma) { |
res = ff_init_gamma_convert(c->desc + index, c->slice + srcIdx, c->inv_gamma); |
if (res < 0) goto cleanup; |
++index; |
} |
if (need_lum_conv) { |
res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal); |
if (res < 0) goto cleanup; |
c->desc[index].alpha = c->alpPixBuf != 0; |
++index; |
srcIdx = dstIdx; |
} |
dstIdx = FFMAX(num_ydesc, num_cdesc); |
res = ff_init_desc_hscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc); |
if (res < 0) goto cleanup; |
c->desc[index].alpha = c->alpPixBuf != 0; |
++index; |
{ |
srcIdx = 0; |
dstIdx = 1; |
if (need_chr_conv) { |
res = ff_init_desc_cfmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal); |
if (res < 0) goto cleanup; |
++index; |
srcIdx = dstIdx; |
} |
dstIdx = FFMAX(num_ydesc, num_cdesc); |
if (c->needs_hcscale) |
res = ff_init_desc_chscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hChrFilter, c->hChrFilterPos, c->hChrFilterSize, c->chrXInc); |
else |
res = ff_init_desc_no_chr(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx]); |
if (res < 0) goto cleanup; |
} |
++index; |
{ |
srcIdx = c->numSlice - 2; |
dstIdx = c->numSlice - 1; |
res = ff_init_vscale(c, c->desc + index, c->slice + srcIdx, c->slice + dstIdx); |
if (res < 0) goto cleanup; |
} |
++index; |
if (need_gamma) { |
res = ff_init_gamma_convert(c->desc + index, c->slice + dstIdx, c->gamma); |
if (res < 0) goto cleanup; |
} |
return 0; |
cleanup: |
ff_free_filters(c); |
return res; |
} |
int ff_free_filters(SwsContext *c) |
{ |
int i; |
if (c->desc) { |
for (i = 0; i < c->numDesc; ++i) |
av_freep(&c->desc[i].instance); |
av_freep(&c->desc); |
} |
if (c->slice) { |
for (i = 0; i < c->numSlice; ++i) |
free_slice(&c->slice[i]); |
av_freep(&c->slice); |
} |
return 0; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale-3.def |
---|
0,0 → 1,38 |
EXPORTS |
DllStartup |
sws_addVec |
sws_allocVec |
sws_alloc_context |
sws_alloc_set_opts |
sws_cloneVec |
sws_context_class DATA |
sws_convVec |
sws_convertPalette8ToPacked24 |
sws_convertPalette8ToPacked32 |
sws_freeContext |
sws_freeFilter |
sws_freeVec |
sws_getCachedContext |
sws_getCoefficients |
sws_getColorspaceDetails |
sws_getConstVec |
sws_getContext |
sws_getDefaultFilter |
sws_getGaussianVec |
sws_getIdentityVec |
sws_get_class |
sws_init_context |
sws_isSupportedEndiannessConversion |
sws_isSupportedInput |
sws_isSupportedOutput |
sws_normalizeVec |
sws_printVec2 |
sws_rgb2rgb_init |
sws_scale |
sws_scaleVec |
sws_setColorspaceDetails |
sws_shiftVec |
sws_subVec |
swscale_configuration |
swscale_license |
swscale_version |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale-3.orig.def |
---|
0,0 → 1,38 |
EXPORTS |
DllStartup @1 |
sws_addVec @2 |
sws_allocVec @3 |
sws_alloc_context @4 |
sws_alloc_set_opts @5 |
sws_cloneVec @6 |
sws_context_class @7 DATA |
sws_convVec @8 |
sws_convertPalette8ToPacked24 @9 |
sws_convertPalette8ToPacked32 @10 |
sws_freeContext @11 |
sws_freeFilter @12 |
sws_freeVec @13 |
sws_getCachedContext @14 |
sws_getCoefficients @15 |
sws_getColorspaceDetails @16 |
sws_getConstVec @17 |
sws_getContext @18 |
sws_getDefaultFilter @19 |
sws_getGaussianVec @20 |
sws_getIdentityVec @21 |
sws_get_class @22 |
sws_init_context @23 |
sws_isSupportedEndiannessConversion @24 |
sws_isSupportedInput @25 |
sws_isSupportedOutput @26 |
sws_normalizeVec @27 |
sws_printVec2 @28 |
sws_rgb2rgb_init @29 |
sws_scale @30 |
sws_scaleVec @31 |
sws_setColorspaceDetails @32 |
sws_shiftVec @33 |
sws_subVec @34 |
swscale_configuration @35 |
swscale_license @36 |
swscale_version @37 |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale-test.c |
---|
0,0 → 1,417 |
/* |
* Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <string.h> |
#include <inttypes.h> |
#include <stdarg.h> |
#undef HAVE_AV_CONFIG_H |
#include "libavutil/imgutils.h" |
#include "libavutil/mem.h" |
#include "libavutil/avutil.h" |
#include "libavutil/crc.h" |
#include "libavutil/pixdesc.h" |
#include "libavutil/lfg.h" |
#include "swscale.h" |
/* HACK Duplicated from swscale_internal.h. |
* Should be removed when a cleaner pixel format system exists. */ |
#define isGray(x) \ |
((x) == AV_PIX_FMT_GRAY8 || \ |
(x) == AV_PIX_FMT_YA8 || \ |
(x) == AV_PIX_FMT_GRAY16BE || \ |
(x) == AV_PIX_FMT_GRAY16LE || \ |
(x) == AV_PIX_FMT_YA16BE || \ |
(x) == AV_PIX_FMT_YA16LE) |
#define hasChroma(x) \ |
(!(isGray(x) || \ |
(x) == AV_PIX_FMT_MONOBLACK || \ |
(x) == AV_PIX_FMT_MONOWHITE)) |
#define isALPHA(x) \ |
((x) == AV_PIX_FMT_BGR32 || \ |
(x) == AV_PIX_FMT_BGR32_1 || \ |
(x) == AV_PIX_FMT_RGB32 || \ |
(x) == AV_PIX_FMT_RGB32_1 || \ |
(x) == AV_PIX_FMT_YUVA420P) |
static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2, int stride1, |
int stride2, int w, int h) |
{ |
int x, y; |
uint64_t ssd = 0; |
for (y = 0; y < h; y++) { |
for (x = 0; x < w; x++) { |
int d = src1[x + y * stride1] - src2[x + y * stride2]; |
ssd += d * d; |
} |
} |
return ssd; |
} |
struct Results { |
uint64_t ssdY; |
uint64_t ssdU; |
uint64_t ssdV; |
uint64_t ssdA; |
uint32_t crc; |
}; |
// test by ref -> src -> dst -> out & compare out against ref |
// ref & out are YV12 |
static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, |
enum AVPixelFormat srcFormat, enum AVPixelFormat dstFormat, |
int srcW, int srcH, int dstW, int dstH, int flags, |
struct Results *r) |
{ |
const AVPixFmtDescriptor *desc_yuva420p = av_pix_fmt_desc_get(AV_PIX_FMT_YUVA420P); |
const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat); |
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat); |
static enum AVPixelFormat cur_srcFormat; |
static int cur_srcW, cur_srcH; |
static uint8_t *src[4]; |
static int srcStride[4]; |
uint8_t *dst[4] = { 0 }; |
uint8_t *out[4] = { 0 }; |
int dstStride[4] = {0}; |
int i; |
uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0; |
struct SwsContext *dstContext = NULL, *outContext = NULL; |
uint32_t crc = 0; |
int res = 0; |
if (cur_srcFormat != srcFormat || cur_srcW != srcW || cur_srcH != srcH) { |
struct SwsContext *srcContext = NULL; |
int p; |
for (p = 0; p < 4; p++) |
av_freep(&src[p]); |
av_image_fill_linesizes(srcStride, srcFormat, srcW); |
for (p = 0; p < 4; p++) { |
srcStride[p] = FFALIGN(srcStride[p], 16); |
if (srcStride[p]) |
src[p] = av_mallocz(srcStride[p] * srcH + 16); |
if (srcStride[p] && !src[p]) { |
perror("Malloc"); |
res = -1; |
goto end; |
} |
} |
srcContext = sws_getContext(w, h, AV_PIX_FMT_YUVA420P, srcW, srcH, |
srcFormat, SWS_BILINEAR, NULL, NULL, NULL); |
if (!srcContext) { |
fprintf(stderr, "Failed to get %s ---> %s\n", |
desc_yuva420p->name, |
desc_src->name); |
res = -1; |
goto end; |
} |
sws_scale(srcContext, (const uint8_t * const*)ref, refStride, 0, h, src, srcStride); |
sws_freeContext(srcContext); |
cur_srcFormat = srcFormat; |
cur_srcW = srcW; |
cur_srcH = srcH; |
} |
av_image_fill_linesizes(dstStride, dstFormat, dstW); |
for (i = 0; i < 4; i++) { |
/* Image buffers passed into libswscale can be allocated any way you |
* prefer, as long as they're aligned enough for the architecture, and |
* they're freed appropriately (such as using av_free for buffers |
* allocated with av_malloc). */ |
/* An extra 16 bytes is being allocated because some scalers may write |
* out of bounds. */ |
dstStride[i] = FFALIGN(dstStride[i], 16); |
if (dstStride[i]) |
dst[i] = av_mallocz(dstStride[i] * dstH + 16); |
if (dstStride[i] && !dst[i]) { |
perror("Malloc"); |
res = -1; |
goto end; |
} |
} |
dstContext = sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, |
flags, NULL, NULL, NULL); |
if (!dstContext) { |
fprintf(stderr, "Failed to get %s ---> %s\n", |
desc_src->name, desc_dst->name); |
res = -1; |
goto end; |
} |
printf(" %s %dx%d -> %s %3dx%3d flags=%2d", |
desc_src->name, srcW, srcH, |
desc_dst->name, dstW, dstH, |
flags); |
fflush(stdout); |
sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); |
for (i = 0; i < 4 && dstStride[i]; i++) |
crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], |
dstStride[i] * dstH); |
if (r && crc == r->crc) { |
ssdY = r->ssdY; |
ssdU = r->ssdU; |
ssdV = r->ssdV; |
ssdA = r->ssdA; |
} else { |
for (i = 0; i < 4; i++) { |
refStride[i] = FFALIGN(refStride[i], 16); |
if (refStride[i]) |
out[i] = av_mallocz(refStride[i] * h); |
if (refStride[i] && !out[i]) { |
perror("Malloc"); |
res = -1; |
goto end; |
} |
} |
outContext = sws_getContext(dstW, dstH, dstFormat, w, h, |
AV_PIX_FMT_YUVA420P, SWS_BILINEAR, |
NULL, NULL, NULL); |
if (!outContext) { |
fprintf(stderr, "Failed to get %s ---> %s\n", |
desc_dst->name, |
desc_yuva420p->name); |
res = -1; |
goto end; |
} |
sws_scale(outContext, (const uint8_t * const*)dst, dstStride, 0, dstH, out, refStride); |
ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); |
if (hasChroma(srcFormat) && hasChroma(dstFormat)) { |
//FIXME check that output is really gray |
ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1], |
(w + 1) >> 1, (h + 1) >> 1); |
ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2], |
(w + 1) >> 1, (h + 1) >> 1); |
} |
if (isALPHA(srcFormat) && isALPHA(dstFormat)) |
ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h); |
ssdY /= w * h; |
ssdU /= w * h / 4; |
ssdV /= w * h / 4; |
ssdA /= w * h; |
sws_freeContext(outContext); |
for (i = 0; i < 4; i++) |
if (refStride[i]) |
av_free(out[i]); |
} |
printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n", |
crc, ssdY, ssdU, ssdV, ssdA); |
end: |
sws_freeContext(dstContext); |
for (i = 0; i < 4; i++) |
if (dstStride[i]) |
av_free(dst[i]); |
return res; |
} |
static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, |
enum AVPixelFormat srcFormat_in, |
enum AVPixelFormat dstFormat_in) |
{ |
const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC, |
SWS_X, SWS_POINT, SWS_AREA, 0 }; |
const int srcW = w; |
const int srcH = h; |
const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 }; |
const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 }; |
enum AVPixelFormat srcFormat, dstFormat; |
const AVPixFmtDescriptor *desc_src, *desc_dst; |
for (srcFormat = srcFormat_in != AV_PIX_FMT_NONE ? srcFormat_in : 0; |
srcFormat < AV_PIX_FMT_NB; srcFormat++) { |
if (!sws_isSupportedInput(srcFormat) || |
!sws_isSupportedOutput(srcFormat)) |
continue; |
desc_src = av_pix_fmt_desc_get(srcFormat); |
for (dstFormat = dstFormat_in != AV_PIX_FMT_NONE ? dstFormat_in : 0; |
dstFormat < AV_PIX_FMT_NB; dstFormat++) { |
int i, j, k; |
int res = 0; |
if (!sws_isSupportedInput(dstFormat) || |
!sws_isSupportedOutput(dstFormat)) |
continue; |
desc_dst = av_pix_fmt_desc_get(dstFormat); |
printf("%s -> %s\n", desc_src->name, desc_dst->name); |
fflush(stdout); |
for (k = 0; flags[k] && !res; k++) |
for (i = 0; dstW[i] && !res; i++) |
for (j = 0; dstH[j] && !res; j++) |
res = doTest(ref, refStride, w, h, |
srcFormat, dstFormat, |
srcW, srcH, dstW[i], dstH[j], flags[k], |
NULL); |
if (dstFormat_in != AV_PIX_FMT_NONE) |
break; |
} |
if (srcFormat_in != AV_PIX_FMT_NONE) |
break; |
} |
} |
static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp, |
enum AVPixelFormat srcFormat_in, |
enum AVPixelFormat dstFormat_in) |
{ |
char buf[256]; |
while (fgets(buf, sizeof(buf), fp)) { |
struct Results r; |
enum AVPixelFormat srcFormat; |
char srcStr[12]; |
int srcW, srcH; |
enum AVPixelFormat dstFormat; |
char dstStr[12]; |
int dstW, dstH; |
int flags; |
int ret; |
ret = sscanf(buf, |
" %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" |
" SSD=%"SCNd64 ", %"SCNd64 ", %"SCNd64 ", %"SCNd64 "\n", |
srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, |
&flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); |
if (ret != 12) { |
srcStr[0] = dstStr[0] = 0; |
ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr); |
} |
srcFormat = av_get_pix_fmt(srcStr); |
dstFormat = av_get_pix_fmt(dstStr); |
if (srcFormat == AV_PIX_FMT_NONE || dstFormat == AV_PIX_FMT_NONE || |
srcW > 8192U || srcH > 8192U || dstW > 8192U || dstH > 8192U) { |
fprintf(stderr, "malformed input file\n"); |
return -1; |
} |
if ((srcFormat_in != AV_PIX_FMT_NONE && srcFormat_in != srcFormat) || |
(dstFormat_in != AV_PIX_FMT_NONE && dstFormat_in != dstFormat)) |
continue; |
if (ret != 12) { |
printf("%s", buf); |
continue; |
} |
doTest(ref, refStride, w, h, |
srcFormat, dstFormat, |
srcW, srcH, dstW, dstH, flags, |
&r); |
} |
return 0; |
} |
#define W 96 |
#define H 96 |
int main(int argc, char **argv) |
{ |
enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; |
enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; |
uint8_t *rgb_data = av_malloc(W * H * 4); |
const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; |
int rgb_stride[4] = { 4 * W, 0, 0, 0 }; |
uint8_t *data = av_malloc(4 * W * H); |
uint8_t *src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; |
int stride[4] = { W, W, W, W }; |
int x, y; |
struct SwsContext *sws; |
AVLFG rand; |
int res = -1; |
int i; |
FILE *fp = NULL; |
if (!rgb_data || !data) |
return -1; |
for (i = 1; i < argc; i += 2) { |
if (argv[i][0] != '-' || i + 1 == argc) |
goto bad_option; |
if (!strcmp(argv[i], "-ref")) { |
fp = fopen(argv[i + 1], "r"); |
if (!fp) { |
fprintf(stderr, "could not open '%s'\n", argv[i + 1]); |
goto error; |
} |
} else if (!strcmp(argv[i], "-src")) { |
srcFormat = av_get_pix_fmt(argv[i + 1]); |
if (srcFormat == AV_PIX_FMT_NONE) { |
fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); |
return -1; |
} |
} else if (!strcmp(argv[i], "-dst")) { |
dstFormat = av_get_pix_fmt(argv[i + 1]); |
if (dstFormat == AV_PIX_FMT_NONE) { |
fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); |
return -1; |
} |
} else { |
bad_option: |
fprintf(stderr, "bad option or argument missing (%s)\n", argv[i]); |
goto error; |
} |
} |
sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, |
AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); |
av_lfg_init(&rand, 1); |
for (y = 0; y < H; y++) |
for (x = 0; x < W * 4; x++) |
rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); |
sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, src, stride); |
sws_freeContext(sws); |
av_free(rgb_data); |
if(fp) { |
res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); |
fclose(fp); |
} else { |
selfTest(src, stride, W, H, srcFormat, dstFormat); |
res = 0; |
} |
error: |
av_free(data); |
return res; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale.c |
---|
0,0 → 1,1276 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <inttypes.h> |
#include <math.h> |
#include <stdio.h> |
#include <string.h> |
#include "libavutil/avassert.h" |
#include "libavutil/avutil.h" |
#include "libavutil/bswap.h" |
#include "libavutil/cpu.h" |
#include "libavutil/imgutils.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/mathematics.h" |
#include "libavutil/pixdesc.h" |
#include "config.h" |
#include "rgb2rgb.h" |
#include "swscale_internal.h" |
#include "swscale.h" |
DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = { |
{ 36, 68, 60, 92, 34, 66, 58, 90, }, |
{ 100, 4, 124, 28, 98, 2, 122, 26, }, |
{ 52, 84, 44, 76, 50, 82, 42, 74, }, |
{ 116, 20, 108, 12, 114, 18, 106, 10, }, |
{ 32, 64, 56, 88, 38, 70, 62, 94, }, |
{ 96, 0, 120, 24, 102, 6, 126, 30, }, |
{ 48, 80, 40, 72, 54, 86, 46, 78, }, |
{ 112, 16, 104, 8, 118, 22, 110, 14, }, |
{ 36, 68, 60, 92, 34, 66, 58, 90, }, |
}; |
DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = { |
64, 64, 64, 64, 64, 64, 64, 64 |
}; |
static void gamma_convert(uint8_t * src[], int width, uint16_t *gamma) |
{ |
int i; |
uint16_t *src1 = (uint16_t*)src[0]; |
for (i = 0; i < width; ++i) { |
uint16_t r = AV_RL16(src1 + i*4 + 0); |
uint16_t g = AV_RL16(src1 + i*4 + 1); |
uint16_t b = AV_RL16(src1 + i*4 + 2); |
AV_WL16(src1 + i*4 + 0, gamma[r]); |
AV_WL16(src1 + i*4 + 1, gamma[g]); |
AV_WL16(src1 + i*4 + 2, gamma[b]); |
} |
} |
static av_always_inline void fillPlane(uint8_t *plane, int stride, int width, |
int height, int y, uint8_t val) |
{ |
int i; |
uint8_t *ptr = plane + stride * y; |
for (i = 0; i < height; i++) { |
memset(ptr, val, width); |
ptr += stride; |
} |
} |
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, |
const uint8_t *_src, const int16_t *filter, |
const int32_t *filterPos, int filterSize) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); |
int i; |
int32_t *dst = (int32_t *) _dst; |
const uint16_t *src = (const uint16_t *) _src; |
int bits = desc->comp[0].depth_minus1; |
int sh = bits - 4; |
if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth_minus1<15) |
sh= 9; |
for (i = 0; i < dstW; i++) { |
int j; |
int srcPos = filterPos[i]; |
int val = 0; |
for (j = 0; j < filterSize; j++) { |
val += src[srcPos + j] * filter[filterSize * i + j]; |
} |
// filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit |
dst[i] = FFMIN(val >> sh, (1 << 19) - 1); |
} |
} |
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, |
const uint8_t *_src, const int16_t *filter, |
const int32_t *filterPos, int filterSize) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); |
int i; |
const uint16_t *src = (const uint16_t *) _src; |
int sh = desc->comp[0].depth_minus1; |
if(sh<15) |
sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : desc->comp[0].depth_minus1; |
for (i = 0; i < dstW; i++) { |
int j; |
int srcPos = filterPos[i]; |
int val = 0; |
for (j = 0; j < filterSize; j++) { |
val += src[srcPos + j] * filter[filterSize * i + j]; |
} |
// filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit |
dst[i] = FFMIN(val >> sh, (1 << 15) - 1); |
} |
} |
// bilinear / bicubic scaling |
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, |
const uint8_t *src, const int16_t *filter, |
const int32_t *filterPos, int filterSize) |
{ |
int i; |
for (i = 0; i < dstW; i++) { |
int j; |
int srcPos = filterPos[i]; |
int val = 0; |
for (j = 0; j < filterSize; j++) { |
val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; |
} |
dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ... |
} |
} |
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, |
const uint8_t *src, const int16_t *filter, |
const int32_t *filterPos, int filterSize) |
{ |
int i; |
int32_t *dst = (int32_t *) _dst; |
for (i = 0; i < dstW; i++) { |
int j; |
int srcPos = filterPos[i]; |
int val = 0; |
for (j = 0; j < filterSize; j++) { |
val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; |
} |
dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ... |
} |
} |
// FIXME all pal and rgb srcFormats could do this conversion as well |
// FIXME all scalers more complex than bilinear could do half of this transform |
static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) |
{ |
int i; |
for (i = 0; i < width; i++) { |
dstU[i] = (FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12; // -264 |
dstV[i] = (FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12; // -264 |
} |
} |
static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) |
{ |
int i; |
for (i = 0; i < width; i++) { |
dstU[i] = (dstU[i] * 1799 + 4081085) >> 11; // 1469 |
dstV[i] = (dstV[i] * 1799 + 4081085) >> 11; // 1469 |
} |
} |
static void lumRangeToJpeg_c(int16_t *dst, int width) |
{ |
int i; |
for (i = 0; i < width; i++) |
dst[i] = (FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14; |
} |
static void lumRangeFromJpeg_c(int16_t *dst, int width) |
{ |
int i; |
for (i = 0; i < width; i++) |
dst[i] = (dst[i] * 14071 + 33561947) >> 14; |
} |
static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) |
{ |
int i; |
int32_t *dstU = (int32_t *) _dstU; |
int32_t *dstV = (int32_t *) _dstV; |
for (i = 0; i < width; i++) { |
dstU[i] = (FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264 |
dstV[i] = (FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12; // -264 |
} |
} |
static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width) |
{ |
int i; |
int32_t *dstU = (int32_t *) _dstU; |
int32_t *dstV = (int32_t *) _dstV; |
for (i = 0; i < width; i++) { |
dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11; // 1469 |
dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11; // 1469 |
} |
} |
static void lumRangeToJpeg16_c(int16_t *_dst, int width) |
{ |
int i; |
int32_t *dst = (int32_t *) _dst; |
for (i = 0; i < width; i++) { |
dst[i] = ((int)(FFMIN(dst[i], 30189 << 4) * 4769U - (39057361 << 2))) >> 12; |
} |
} |
static void lumRangeFromJpeg16_c(int16_t *_dst, int width) |
{ |
int i; |
int32_t *dst = (int32_t *) _dst; |
for (i = 0; i < width; i++) |
dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; |
} |
// *** horizontal scale Y line to temp buffer |
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, |
const uint8_t *src_in[4], |
int srcW, int xInc, |
const int16_t *hLumFilter, |
const int32_t *hLumFilterPos, |
int hLumFilterSize, |
uint8_t *formatConvBuffer, |
uint32_t *pal, int isAlpha) |
{ |
void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = |
isAlpha ? c->alpToYV12 : c->lumToYV12; |
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; |
const uint8_t *src = src_in[isAlpha ? 3 : 0]; |
if (toYV12) { |
toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); |
src = formatConvBuffer; |
} else if (c->readLumPlanar && !isAlpha) { |
c->readLumPlanar(formatConvBuffer, src_in, srcW, c->input_rgb2yuv_table); |
src = formatConvBuffer; |
} else if (c->readAlpPlanar && isAlpha) { |
c->readAlpPlanar(formatConvBuffer, src_in, srcW, NULL); |
src = formatConvBuffer; |
} |
if (!c->hyscale_fast) { |
c->hyScale(c, dst, dstWidth, src, hLumFilter, |
hLumFilterPos, hLumFilterSize); |
} else { // fast bilinear upscale / crap downscale |
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); |
} |
if (convertRange) |
convertRange(dst, dstWidth); |
} |
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, |
int16_t *dst2, int dstWidth, |
const uint8_t *src_in[4], |
int srcW, int xInc, |
const int16_t *hChrFilter, |
const int32_t *hChrFilterPos, |
int hChrFilterSize, |
uint8_t *formatConvBuffer, uint32_t *pal) |
{ |
const uint8_t *src1 = src_in[1], *src2 = src_in[2]; |
if (c->chrToYV12) { |
uint8_t *buf2 = formatConvBuffer + |
FFALIGN(srcW*2+78, 16); |
c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); |
src1= formatConvBuffer; |
src2= buf2; |
} else if (c->readChrPlanar) { |
uint8_t *buf2 = formatConvBuffer + |
FFALIGN(srcW*2+78, 16); |
c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW, c->input_rgb2yuv_table); |
src1 = formatConvBuffer; |
src2 = buf2; |
} |
if (!c->hcscale_fast) { |
c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); |
c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); |
} else { // fast bilinear upscale / crap downscale |
c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); |
} |
if (c->chrConvertRange) |
c->chrConvertRange(dst1, dst2, dstWidth); |
} |
#define DEBUG_SWSCALE_BUFFERS 0 |
#define DEBUG_BUFFERS(...) \ |
if (DEBUG_SWSCALE_BUFFERS) \ |
av_log(c, AV_LOG_DEBUG, __VA_ARGS__) |
static int swscale(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t *dst[], int dstStride[]) |
{ |
/* load a few things into local vars to make the code more readable? |
* and faster */ |
#ifndef NEW_FILTER |
const int srcW = c->srcW; |
#endif |
const int dstW = c->dstW; |
const int dstH = c->dstH; |
#ifndef NEW_FILTER |
const int chrDstW = c->chrDstW; |
const int chrSrcW = c->chrSrcW; |
const int lumXInc = c->lumXInc; |
const int chrXInc = c->chrXInc; |
#endif |
const enum AVPixelFormat dstFormat = c->dstFormat; |
const int flags = c->flags; |
int32_t *vLumFilterPos = c->vLumFilterPos; |
int32_t *vChrFilterPos = c->vChrFilterPos; |
#ifndef NEW_FILTER |
int32_t *hLumFilterPos = c->hLumFilterPos; |
int32_t *hChrFilterPos = c->hChrFilterPos; |
int16_t *hLumFilter = c->hLumFilter; |
int16_t *hChrFilter = c->hChrFilter; |
int32_t *lumMmxFilter = c->lumMmxFilter; |
int32_t *chrMmxFilter = c->chrMmxFilter; |
#endif |
const int vLumFilterSize = c->vLumFilterSize; |
const int vChrFilterSize = c->vChrFilterSize; |
#ifndef NEW_FILTER |
const int hLumFilterSize = c->hLumFilterSize; |
const int hChrFilterSize = c->hChrFilterSize; |
int16_t **lumPixBuf = c->lumPixBuf; |
int16_t **chrUPixBuf = c->chrUPixBuf; |
int16_t **chrVPixBuf = c->chrVPixBuf; |
#endif |
int16_t **alpPixBuf = c->alpPixBuf; |
const int vLumBufSize = c->vLumBufSize; |
const int vChrBufSize = c->vChrBufSize; |
#ifndef NEW_FILTER |
uint8_t *formatConvBuffer = c->formatConvBuffer; |
uint32_t *pal = c->pal_yuv; |
int perform_gamma = c->is_internal_gamma; |
#endif |
yuv2planar1_fn yuv2plane1 = c->yuv2plane1; |
yuv2planarX_fn yuv2planeX = c->yuv2planeX; |
yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; |
yuv2packed1_fn yuv2packed1 = c->yuv2packed1; |
yuv2packed2_fn yuv2packed2 = c->yuv2packed2; |
yuv2packedX_fn yuv2packedX = c->yuv2packedX; |
yuv2anyX_fn yuv2anyX = c->yuv2anyX; |
const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample; |
const int chrSrcSliceH = FF_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample); |
int should_dither = is9_OR_10BPS(c->srcFormat) || |
is16BPS(c->srcFormat); |
int lastDstY; |
/* vars which will change and which we need to store back in the context */ |
int dstY = c->dstY; |
int lumBufIndex = c->lumBufIndex; |
int chrBufIndex = c->chrBufIndex; |
int lastInLumBuf = c->lastInLumBuf; |
int lastInChrBuf = c->lastInChrBuf; |
#ifdef NEW_FILTER |
int lumStart = 0; |
int lumEnd = c->descIndex[0]; |
int chrStart = lumEnd; |
int chrEnd = c->descIndex[1]; |
int vStart = chrEnd; |
int vEnd = c->numDesc; |
SwsSlice *src_slice = &c->slice[lumStart]; |
SwsSlice *hout_slice = &c->slice[c->numSlice-2]; |
SwsSlice *vout_slice = &c->slice[c->numSlice-1]; |
SwsFilterDescriptor *desc = c->desc; |
int hasLumHoles = 1; |
int hasChrHoles = 1; |
#endif |
#ifndef NEW_FILTER |
if (!usePal(c->srcFormat)) { |
pal = c->input_rgb2yuv_table; |
} |
#endif |
if (isPacked(c->srcFormat)) { |
src[0] = |
src[1] = |
src[2] = |
src[3] = src[0]; |
srcStride[0] = |
srcStride[1] = |
srcStride[2] = |
srcStride[3] = srcStride[0]; |
} |
srcStride[1] <<= c->vChrDrop; |
srcStride[2] <<= c->vChrDrop; |
DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", |
src[0], srcStride[0], src[1], srcStride[1], |
src[2], srcStride[2], src[3], srcStride[3], |
dst[0], dstStride[0], dst[1], dstStride[1], |
dst[2], dstStride[2], dst[3], dstStride[3]); |
DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n", |
srcSliceY, srcSliceH, dstY, dstH); |
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", |
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); |
if (dstStride[0]&15 || dstStride[1]&15 || |
dstStride[2]&15 || dstStride[3]&15) { |
static int warnedAlready = 0; // FIXME maybe move this into the context |
if (flags & SWS_PRINT_INFO && !warnedAlready) { |
av_log(c, AV_LOG_WARNING, |
"Warning: dstStride is not aligned!\n" |
" ->cannot do aligned memory accesses anymore\n"); |
warnedAlready = 1; |
} |
} |
if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15 |
|| (uintptr_t)src[0]&15 || (uintptr_t)src[1]&15 || (uintptr_t)src[2]&15 |
|| dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15 |
|| srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15 |
) { |
static int warnedAlready=0; |
int cpu_flags = av_get_cpu_flags(); |
if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ |
av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); |
warnedAlready=1; |
} |
} |
/* Note the user might start scaling the picture in the middle so this |
* will not get executed. This is not really intended but works |
* currently, so people might do it. */ |
if (srcSliceY == 0) { |
lumBufIndex = -1; |
chrBufIndex = -1; |
dstY = 0; |
lastInLumBuf = -1; |
lastInChrBuf = -1; |
} |
if (!should_dither) { |
c->chrDither8 = c->lumDither8 = sws_pb_64; |
} |
lastDstY = dstY; |
#ifdef NEW_FILTER |
ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX, |
yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, c->use_mmx_vfilter); |
ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW, |
srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1); |
ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW, |
dstY, dstH, dstY >> c->chrDstVSubSample, |
FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0); |
if (srcSliceY == 0) { |
hout_slice->plane[0].sliceY = lastInLumBuf + 1; |
hout_slice->plane[1].sliceY = lastInChrBuf + 1; |
hout_slice->plane[2].sliceY = lastInChrBuf + 1; |
hout_slice->plane[3].sliceY = lastInLumBuf + 1; |
hout_slice->plane[0].sliceH = |
hout_slice->plane[1].sliceH = |
hout_slice->plane[2].sliceH = |
hout_slice->plane[3].sliceH = 0; |
hout_slice->width = dstW; |
} |
#endif |
for (; dstY < dstH; dstY++) { |
const int chrDstY = dstY >> c->chrDstVSubSample; |
#ifndef NEW_FILTER |
uint8_t *dest[4] = { |
dst[0] + dstStride[0] * dstY, |
dst[1] + dstStride[1] * chrDstY, |
dst[2] + dstStride[2] * chrDstY, |
(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, |
}; |
#endif |
int use_mmx_vfilter= c->use_mmx_vfilter; |
// First line needed as input |
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); |
const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]); |
// First line needed as input |
const int firstChrSrcY = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]); |
// Last line needed as input |
int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; |
int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1; |
int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; |
int enough_lines; |
#ifdef NEW_FILTER |
int i; |
int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY; |
#endif |
// handle holes (FAST_BILINEAR & weird filters) |
if (firstLumSrcY > lastInLumBuf) { |
#ifdef NEW_FILTER |
hasLumHoles = lastInLumBuf != firstLumSrcY - 1; |
if (hasLumHoles) { |
hout_slice->plane[0].sliceY = firstLumSrcY; |
hout_slice->plane[3].sliceY = firstLumSrcY; |
hout_slice->plane[0].sliceH = |
hout_slice->plane[3].sliceH = 0; |
} |
#endif |
lastInLumBuf = firstLumSrcY - 1; |
} |
if (firstChrSrcY > lastInChrBuf) { |
#ifdef NEW_FILTER |
hasChrHoles = lastInChrBuf != firstChrSrcY - 1; |
if (hasChrHoles) { |
hout_slice->plane[1].sliceY = firstChrSrcY; |
hout_slice->plane[2].sliceY = firstChrSrcY; |
hout_slice->plane[1].sliceH = |
hout_slice->plane[2].sliceH = 0; |
} |
#endif |
lastInChrBuf = firstChrSrcY - 1; |
} |
av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); |
av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); |
DEBUG_BUFFERS("dstY: %d\n", dstY); |
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n", |
firstLumSrcY, lastLumSrcY, lastInLumBuf); |
DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n", |
firstChrSrcY, lastChrSrcY, lastInChrBuf); |
// Do we have enough lines in this slice to output the dstY line |
enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && |
lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample); |
if (!enough_lines) { |
lastLumSrcY = srcSliceY + srcSliceH - 1; |
lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; |
DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n", |
lastLumSrcY, lastChrSrcY); |
} |
#ifdef NEW_FILTER |
posY = hout_slice->plane[0].sliceY + hout_slice->plane[0].sliceH; |
if (posY <= lastLumSrcY && !hasLumHoles) { |
firstPosY = FFMAX(firstLumSrcY, posY); |
lastPosY = FFMIN(lastLumSrcY + MAX_LINES_AHEAD, srcSliceY + srcSliceH - 1); |
} else { |
firstPosY = lastInLumBuf + 1; |
lastPosY = lastLumSrcY; |
} |
cPosY = hout_slice->plane[1].sliceY + hout_slice->plane[1].sliceH; |
if (cPosY <= lastChrSrcY && !hasChrHoles) { |
firstCPosY = FFMAX(firstChrSrcY, cPosY); |
lastCPosY = FFMIN(lastChrSrcY + MAX_LINES_AHEAD, FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample) - 1); |
} else { |
firstCPosY = lastInChrBuf + 1; |
lastCPosY = lastChrSrcY; |
} |
ff_rotate_slice(hout_slice, lastPosY, lastCPosY); |
if (posY < lastLumSrcY + 1) { |
for (i = lumStart; i < lumEnd; ++i) |
desc[i].process(c, &desc[i], firstPosY, lastPosY - firstPosY + 1); |
} |
lumBufIndex += lastLumSrcY - lastInLumBuf; |
lastInLumBuf = lastLumSrcY; |
if (cPosY < lastChrSrcY + 1) { |
for (i = chrStart; i < chrEnd; ++i) |
desc[i].process(c, &desc[i], firstCPosY, lastCPosY - firstCPosY + 1); |
} |
chrBufIndex += lastChrSrcY - lastInChrBuf; |
lastInChrBuf = lastChrSrcY; |
#else |
// Do horizontal scaling |
while (lastInLumBuf < lastLumSrcY) { |
const uint8_t *src1[4] = { |
src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0], |
src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1], |
src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2], |
src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3], |
}; |
lumBufIndex++; |
av_assert0(lumBufIndex < 2 * vLumBufSize); |
av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH); |
av_assert0(lastInLumBuf + 1 - srcSliceY >= 0); |
if (perform_gamma) |
gamma_convert((uint8_t **)src1, srcW, c->inv_gamma); |
hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc, |
hLumFilter, hLumFilterPos, hLumFilterSize, |
formatConvBuffer, pal, 0); |
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) |
hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW, |
lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, |
formatConvBuffer, pal, 1); |
lastInLumBuf++; |
DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", |
lumBufIndex, lastInLumBuf); |
} |
while (lastInChrBuf < lastChrSrcY) { |
const uint8_t *src1[4] = { |
src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0], |
src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1], |
src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2], |
src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3], |
}; |
chrBufIndex++; |
av_assert0(chrBufIndex < 2 * vChrBufSize); |
av_assert0(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); |
av_assert0(lastInChrBuf + 1 - chrSrcSliceY >= 0); |
// FIXME replace parameters through context struct (some at least) |
if (c->needs_hcscale) |
hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], |
chrDstW, src1, chrSrcW, chrXInc, |
hChrFilter, hChrFilterPos, hChrFilterSize, |
formatConvBuffer, pal); |
lastInChrBuf++; |
DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", |
chrBufIndex, lastInChrBuf); |
} |
#endif |
// wrap buf index around to stay inside the ring buffer |
if (lumBufIndex >= vLumBufSize) |
lumBufIndex -= vLumBufSize; |
if (chrBufIndex >= vChrBufSize) |
chrBufIndex -= vChrBufSize; |
if (!enough_lines) |
break; // we can't output a dstY line so let's try with the next slice |
#if HAVE_MMX_INLINE |
ff_updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, |
lastInLumBuf, lastInChrBuf); |
#endif |
if (should_dither) { |
c->chrDither8 = ff_dither_8x8_128[chrDstY & 7]; |
c->lumDither8 = ff_dither_8x8_128[dstY & 7]; |
} |
if (dstY >= dstH - 2) { |
/* hmm looks like we can't use MMX here without overwriting |
* this array's tail */ |
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, |
&yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX); |
use_mmx_vfilter= 0; |
ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX, |
yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter); |
} |
{ |
#ifdef NEW_FILTER |
for (i = vStart; i < vEnd; ++i) |
desc[i].process(c, &desc[i], dstY, 1); |
#else |
const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? |
(const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; |
int16_t *vLumFilter = c->vLumFilter; |
int16_t *vChrFilter = c->vChrFilter; |
if (isPlanarYUV(dstFormat) || |
(isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like |
const int chrSkipMask = (1 << c->chrDstVSubSample) - 1; |
vLumFilter += dstY * vLumFilterSize; |
vChrFilter += chrDstY * vChrFilterSize; |
// av_assert0(use_mmx_vfilter != ( |
// yuv2planeX == yuv2planeX_10BE_c |
// || yuv2planeX == yuv2planeX_10LE_c |
// || yuv2planeX == yuv2planeX_9BE_c |
// || yuv2planeX == yuv2planeX_9LE_c |
// || yuv2planeX == yuv2planeX_16BE_c |
// || yuv2planeX == yuv2planeX_16LE_c |
// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); |
if(use_mmx_vfilter){ |
vLumFilter= (int16_t *)c->lumMmxFilter; |
vChrFilter= (int16_t *)c->chrMmxFilter; |
} |
if (vLumFilterSize == 1) { |
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); |
} else { |
yuv2planeX(vLumFilter, vLumFilterSize, |
lumSrcPtr, dest[0], |
dstW, c->lumDither8, 0); |
} |
if (!((dstY & chrSkipMask) || isGray(dstFormat))) { |
if (yuv2nv12cX) { |
yuv2nv12cX(c, vChrFilter, |
vChrFilterSize, chrUSrcPtr, chrVSrcPtr, |
dest[1], chrDstW); |
} else if (vChrFilterSize == 1) { |
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); |
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); |
} else { |
yuv2planeX(vChrFilter, |
vChrFilterSize, chrUSrcPtr, dest[1], |
chrDstW, c->chrDither8, 0); |
yuv2planeX(vChrFilter, |
vChrFilterSize, chrVSrcPtr, dest[2], |
chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); |
} |
} |
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { |
if(use_mmx_vfilter){ |
vLumFilter= (int16_t *)c->alpMmxFilter; |
} |
if (vLumFilterSize == 1) { |
yuv2plane1(alpSrcPtr[0], dest[3], dstW, |
c->lumDither8, 0); |
} else { |
yuv2planeX(vLumFilter, |
vLumFilterSize, alpSrcPtr, dest[3], |
dstW, c->lumDither8, 0); |
} |
} |
} else if (yuv2packedX) { |
av_assert1(lumSrcPtr + vLumFilterSize - 1 < (const int16_t **)lumPixBuf + vLumBufSize * 2); |
av_assert1(chrUSrcPtr + vChrFilterSize - 1 < (const int16_t **)chrUPixBuf + vChrBufSize * 2); |
if (c->yuv2packed1 && vLumFilterSize == 1 && |
vChrFilterSize <= 2) { // unscaled RGB |
int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1]; |
yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr, |
alpPixBuf ? *alpSrcPtr : NULL, |
dest[0], dstW, chrAlpha, dstY); |
} else if (c->yuv2packed2 && vLumFilterSize == 2 && |
vChrFilterSize == 2) { // bilinear upscale RGB |
int lumAlpha = vLumFilter[2 * dstY + 1]; |
int chrAlpha = vChrFilter[2 * dstY + 1]; |
lumMmxFilter[2] = |
lumMmxFilter[3] = vLumFilter[2 * dstY] * 0x10001; |
chrMmxFilter[2] = |
chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001; |
yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr, |
alpPixBuf ? alpSrcPtr : NULL, |
dest[0], dstW, lumAlpha, chrAlpha, dstY); |
} else { // general RGB |
yuv2packedX(c, vLumFilter + dstY * vLumFilterSize, |
lumSrcPtr, vLumFilterSize, |
vChrFilter + dstY * vChrFilterSize, |
chrUSrcPtr, chrVSrcPtr, vChrFilterSize, |
alpSrcPtr, dest[0], dstW, dstY); |
} |
} else { |
av_assert1(!yuv2packed1 && !yuv2packed2); |
yuv2anyX(c, vLumFilter + dstY * vLumFilterSize, |
lumSrcPtr, vLumFilterSize, |
vChrFilter + dstY * vChrFilterSize, |
chrUSrcPtr, chrVSrcPtr, vChrFilterSize, |
alpSrcPtr, dest, dstW, dstY); |
} |
if (perform_gamma) |
gamma_convert(dest, dstW, c->gamma); |
#endif |
} |
} |
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) { |
int length = dstW; |
int height = dstY - lastDstY; |
if (is16BPS(dstFormat) || isNBPS(dstFormat)) { |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat); |
fillPlane16(dst[3], dstStride[3], length, height, lastDstY, |
1, desc->comp[3].depth_minus1, |
isBE(dstFormat)); |
} else |
fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255); |
} |
#if HAVE_MMXEXT_INLINE |
if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT) |
__asm__ volatile ("sfence" ::: "memory"); |
#endif |
emms_c(); |
/* store changed local vars back in the context */ |
c->dstY = dstY; |
c->lumBufIndex = lumBufIndex; |
c->chrBufIndex = chrBufIndex; |
c->lastInLumBuf = lastInLumBuf; |
c->lastInChrBuf = lastInChrBuf; |
return dstY - lastDstY; |
} |
av_cold void ff_sws_init_range_convert(SwsContext *c) |
{ |
c->lumConvertRange = NULL; |
c->chrConvertRange = NULL; |
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { |
if (c->dstBpc <= 14) { |
if (c->srcRange) { |
c->lumConvertRange = lumRangeFromJpeg_c; |
c->chrConvertRange = chrRangeFromJpeg_c; |
} else { |
c->lumConvertRange = lumRangeToJpeg_c; |
c->chrConvertRange = chrRangeToJpeg_c; |
} |
} else { |
if (c->srcRange) { |
c->lumConvertRange = lumRangeFromJpeg16_c; |
c->chrConvertRange = chrRangeFromJpeg16_c; |
} else { |
c->lumConvertRange = lumRangeToJpeg16_c; |
c->chrConvertRange = chrRangeToJpeg16_c; |
} |
} |
} |
} |
static av_cold void sws_init_swscale(SwsContext *c) |
{ |
enum AVPixelFormat srcFormat = c->srcFormat; |
ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX, |
&c->yuv2nv12cX, &c->yuv2packed1, |
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX); |
ff_sws_init_input_funcs(c); |
if (c->srcBpc == 8) { |
if (c->dstBpc <= 14) { |
c->hyScale = c->hcScale = hScale8To15_c; |
if (c->flags & SWS_FAST_BILINEAR) { |
c->hyscale_fast = ff_hyscale_fast_c; |
c->hcscale_fast = ff_hcscale_fast_c; |
} |
} else { |
c->hyScale = c->hcScale = hScale8To19_c; |
} |
} else { |
c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c |
: hScale16To15_c; |
} |
ff_sws_init_range_convert(c); |
if (!(isGray(srcFormat) || isGray(c->dstFormat) || |
srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE)) |
c->needs_hcscale = 1; |
} |
SwsFunc ff_getSwsFunc(SwsContext *c) |
{ |
sws_init_swscale(c); |
if (ARCH_PPC) |
ff_sws_init_swscale_ppc(c); |
if (ARCH_X86) |
ff_sws_init_swscale_x86(c); |
return swscale; |
} |
static void reset_ptr(const uint8_t *src[], enum AVPixelFormat format) |
{ |
if (!isALPHA(format)) |
src[3] = NULL; |
if (!isPlanar(format)) { |
src[3] = src[2] = NULL; |
if (!usePal(format)) |
src[1] = NULL; |
} |
} |
static int check_image_pointers(const uint8_t * const data[4], enum AVPixelFormat pix_fmt, |
const int linesizes[4]) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
int i; |
av_assert2(desc); |
for (i = 0; i < 4; i++) { |
int plane = desc->comp[i].plane; |
if (!data[plane] || !linesizes[plane]) |
return 0; |
} |
return 1; |
} |
static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst, |
const uint16_t *src, int stride, int h) |
{ |
int xp,yp; |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat); |
for (yp=0; yp<h; yp++) { |
for (xp=0; xp+2<stride; xp+=3) { |
int x, y, z, r, g, b; |
if (desc->flags & AV_PIX_FMT_FLAG_BE) { |
x = AV_RB16(src + xp + 0); |
y = AV_RB16(src + xp + 1); |
z = AV_RB16(src + xp + 2); |
} else { |
x = AV_RL16(src + xp + 0); |
y = AV_RL16(src + xp + 1); |
z = AV_RL16(src + xp + 2); |
} |
x = c->xyzgamma[x>>4]; |
y = c->xyzgamma[y>>4]; |
z = c->xyzgamma[z>>4]; |
// convert from XYZlinear to sRGBlinear |
r = c->xyz2rgb_matrix[0][0] * x + |
c->xyz2rgb_matrix[0][1] * y + |
c->xyz2rgb_matrix[0][2] * z >> 12; |
g = c->xyz2rgb_matrix[1][0] * x + |
c->xyz2rgb_matrix[1][1] * y + |
c->xyz2rgb_matrix[1][2] * z >> 12; |
b = c->xyz2rgb_matrix[2][0] * x + |
c->xyz2rgb_matrix[2][1] * y + |
c->xyz2rgb_matrix[2][2] * z >> 12; |
// limit values to 12-bit depth |
r = av_clip_uintp2(r, 12); |
g = av_clip_uintp2(g, 12); |
b = av_clip_uintp2(b, 12); |
// convert from sRGBlinear to RGB and scale from 12bit to 16bit |
if (desc->flags & AV_PIX_FMT_FLAG_BE) { |
AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4); |
AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4); |
AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4); |
} else { |
AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4); |
AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4); |
AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4); |
} |
} |
src += stride; |
dst += stride; |
} |
} |
static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst, |
const uint16_t *src, int stride, int h) |
{ |
int xp,yp; |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); |
for (yp=0; yp<h; yp++) { |
for (xp=0; xp+2<stride; xp+=3) { |
int x, y, z, r, g, b; |
if (desc->flags & AV_PIX_FMT_FLAG_BE) { |
r = AV_RB16(src + xp + 0); |
g = AV_RB16(src + xp + 1); |
b = AV_RB16(src + xp + 2); |
} else { |
r = AV_RL16(src + xp + 0); |
g = AV_RL16(src + xp + 1); |
b = AV_RL16(src + xp + 2); |
} |
r = c->rgbgammainv[r>>4]; |
g = c->rgbgammainv[g>>4]; |
b = c->rgbgammainv[b>>4]; |
// convert from sRGBlinear to XYZlinear |
x = c->rgb2xyz_matrix[0][0] * r + |
c->rgb2xyz_matrix[0][1] * g + |
c->rgb2xyz_matrix[0][2] * b >> 12; |
y = c->rgb2xyz_matrix[1][0] * r + |
c->rgb2xyz_matrix[1][1] * g + |
c->rgb2xyz_matrix[1][2] * b >> 12; |
z = c->rgb2xyz_matrix[2][0] * r + |
c->rgb2xyz_matrix[2][1] * g + |
c->rgb2xyz_matrix[2][2] * b >> 12; |
// limit values to 12-bit depth |
x = av_clip_uintp2(x, 12); |
y = av_clip_uintp2(y, 12); |
z = av_clip_uintp2(z, 12); |
// convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit |
if (desc->flags & AV_PIX_FMT_FLAG_BE) { |
AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4); |
AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4); |
AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4); |
} else { |
AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4); |
AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4); |
AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4); |
} |
} |
src += stride; |
dst += stride; |
} |
} |
/** |
* swscale wrapper, so we don't need to export the SwsContext. |
* Assumes planar YUV to be in YUV order instead of YVU. |
*/ |
int attribute_align_arg sws_scale(struct SwsContext *c, |
const uint8_t * const srcSlice[], |
const int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t *const dst[], |
const int dstStride[]) |
{ |
int i, ret; |
const uint8_t *src2[4]; |
uint8_t *dst2[4]; |
uint8_t *rgb0_tmp = NULL; |
if (!srcStride || !dstStride || !dst || !srcSlice) { |
av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n"); |
return 0; |
} |
if (c->gamma_flag && c->cascaded_context[0]) { |
ret = sws_scale(c->cascaded_context[0], |
srcSlice, srcStride, srcSliceY, srcSliceH, |
c->cascaded_tmp, c->cascaded_tmpStride); |
if (ret < 0) |
return ret; |
if (c->cascaded_context[2]) |
ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp, c->cascaded_tmpStride, srcSliceY, srcSliceH, c->cascaded1_tmp, c->cascaded1_tmpStride); |
else |
ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp, c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride); |
if (ret < 0) |
return ret; |
if (c->cascaded_context[2]) { |
ret = sws_scale(c->cascaded_context[2], |
(const uint8_t * const *)c->cascaded1_tmp, c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret, c->cascaded_context[1]->dstY, |
dst, dstStride); |
} |
return ret; |
} |
if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH) { |
ret = sws_scale(c->cascaded_context[0], |
srcSlice, srcStride, srcSliceY, srcSliceH, |
c->cascaded_tmp, c->cascaded_tmpStride); |
if (ret < 0) |
return ret; |
ret = sws_scale(c->cascaded_context[1], |
(const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride, 0, c->cascaded_context[0]->dstH, |
dst, dstStride); |
return ret; |
} |
memcpy(src2, srcSlice, sizeof(src2)); |
memcpy(dst2, dst, sizeof(dst2)); |
// do not mess up sliceDir if we have a "trailing" 0-size slice |
if (srcSliceH == 0) |
return 0; |
if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) { |
av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); |
return 0; |
} |
if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) { |
av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); |
return 0; |
} |
if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) { |
av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n"); |
return 0; |
} |
if (c->sliceDir == 0) { |
if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; |
} |
if (usePal(c->srcFormat)) { |
for (i = 0; i < 256; i++) { |
int r, g, b, y, u, v, a = 0xff; |
if (c->srcFormat == AV_PIX_FMT_PAL8) { |
uint32_t p = ((const uint32_t *)(srcSlice[1]))[i]; |
a = (p >> 24) & 0xFF; |
r = (p >> 16) & 0xFF; |
g = (p >> 8) & 0xFF; |
b = p & 0xFF; |
} else if (c->srcFormat == AV_PIX_FMT_RGB8) { |
r = ( i >> 5 ) * 36; |
g = ((i >> 2) & 7) * 36; |
b = ( i & 3) * 85; |
} else if (c->srcFormat == AV_PIX_FMT_BGR8) { |
b = ( i >> 6 ) * 85; |
g = ((i >> 3) & 7) * 36; |
r = ( i & 7) * 36; |
} else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) { |
r = ( i >> 3 ) * 255; |
g = ((i >> 1) & 3) * 85; |
b = ( i & 1) * 255; |
} else if (c->srcFormat == AV_PIX_FMT_GRAY8 || c->srcFormat == AV_PIX_FMT_GRAY8A) { |
r = g = b = i; |
} else { |
av_assert1(c->srcFormat == AV_PIX_FMT_BGR4_BYTE); |
b = ( i >> 3 ) * 255; |
g = ((i >> 1) & 3) * 85; |
r = ( i & 1) * 255; |
} |
#define RGB2YUV_SHIFT 15 |
#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) |
y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); |
u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); |
v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); |
c->pal_yuv[i]= y + (u<<8) + (v<<16) + ((unsigned)a<<24); |
switch (c->dstFormat) { |
case AV_PIX_FMT_BGR32: |
#if !HAVE_BIGENDIAN |
case AV_PIX_FMT_RGB24: |
#endif |
c->pal_rgb[i]= r + (g<<8) + (b<<16) + ((unsigned)a<<24); |
break; |
case AV_PIX_FMT_BGR32_1: |
#if HAVE_BIGENDIAN |
case AV_PIX_FMT_BGR24: |
#endif |
c->pal_rgb[i]= a + (r<<8) + (g<<16) + ((unsigned)b<<24); |
break; |
case AV_PIX_FMT_RGB32_1: |
#if HAVE_BIGENDIAN |
case AV_PIX_FMT_RGB24: |
#endif |
c->pal_rgb[i]= a + (b<<8) + (g<<16) + ((unsigned)r<<24); |
break; |
case AV_PIX_FMT_RGB32: |
#if !HAVE_BIGENDIAN |
case AV_PIX_FMT_BGR24: |
#endif |
default: |
c->pal_rgb[i]= b + (g<<8) + (r<<16) + ((unsigned)a<<24); |
} |
} |
} |
if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) { |
uint8_t *base; |
int x,y; |
rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); |
if (!rgb0_tmp) |
return AVERROR(ENOMEM); |
base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; |
for (y=0; y<srcSliceH; y++){ |
memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW); |
for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) { |
base[ srcStride[0]*y + x] = 0xFF; |
} |
} |
src2[0] = base; |
} |
if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) { |
uint8_t *base; |
rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); |
if (!rgb0_tmp) |
return AVERROR(ENOMEM); |
base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; |
xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH); |
src2[0] = base; |
} |
if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0]) |
for (i = 0; i < 4; i++) |
memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2)); |
// copy strides, so they can safely be modified |
if (c->sliceDir == 1) { |
// slices go from top to bottom |
int srcStride2[4] = { srcStride[0], srcStride[1], srcStride[2], |
srcStride[3] }; |
int dstStride2[4] = { dstStride[0], dstStride[1], dstStride[2], |
dstStride[3] }; |
reset_ptr(src2, c->srcFormat); |
reset_ptr((void*)dst2, c->dstFormat); |
/* reset slice direction at end of frame */ |
if (srcSliceY + srcSliceH == c->srcH) |
c->sliceDir = 0; |
ret = c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, |
dstStride2); |
} else { |
// slices go from bottom to top => we flip the image internally |
int srcStride2[4] = { -srcStride[0], -srcStride[1], -srcStride[2], |
-srcStride[3] }; |
int dstStride2[4] = { -dstStride[0], -dstStride[1], -dstStride[2], |
-dstStride[3] }; |
src2[0] += (srcSliceH - 1) * srcStride[0]; |
if (!usePal(c->srcFormat)) |
src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1]; |
src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2]; |
src2[3] += (srcSliceH - 1) * srcStride[3]; |
dst2[0] += ( c->dstH - 1) * dstStride[0]; |
dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1]; |
dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2]; |
dst2[3] += ( c->dstH - 1) * dstStride[3]; |
reset_ptr(src2, c->srcFormat); |
reset_ptr((void*)dst2, c->dstFormat); |
/* reset slice direction at end of frame */ |
if (!srcSliceY) |
c->sliceDir = 0; |
ret = c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, |
srcSliceH, dst2, dstStride2); |
} |
if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) { |
/* replace on the same data */ |
rgb48Toxyz12(c, (uint16_t*)dst2[0], (const uint16_t*)dst2[0], dstStride[0]/2, ret); |
} |
av_free(rgb0_tmp); |
return ret; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale.h |
---|
0,0 → 1,365 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#ifndef SWSCALE_SWSCALE_H |
#define SWSCALE_SWSCALE_H |
/** |
* @file |
* @ingroup libsws |
* external API header |
*/ |
#include <stdint.h> |
#include "libavutil/avutil.h" |
#include "libavutil/log.h" |
#include "libavutil/pixfmt.h" |
#include "version.h" |
/** |
* @defgroup libsws Color conversion and scaling |
* @{ |
* |
* Return the LIBSWSCALE_VERSION_INT constant. |
*/ |
unsigned swscale_version(void); |
/** |
* Return the libswscale build-time configuration. |
*/ |
const char *swscale_configuration(void); |
/** |
* Return the libswscale license. |
*/ |
const char *swscale_license(void); |
/* values for the flags, the stuff on the command line is different */ |
#define SWS_FAST_BILINEAR 1 |
#define SWS_BILINEAR 2 |
#define SWS_BICUBIC 4 |
#define SWS_X 8 |
#define SWS_POINT 0x10 |
#define SWS_AREA 0x20 |
#define SWS_BICUBLIN 0x40 |
#define SWS_GAUSS 0x80 |
#define SWS_SINC 0x100 |
#define SWS_LANCZOS 0x200 |
#define SWS_SPLINE 0x400 |
#define SWS_SRC_V_CHR_DROP_MASK 0x30000 |
#define SWS_SRC_V_CHR_DROP_SHIFT 16 |
#define SWS_PARAM_DEFAULT 123456 |
#define SWS_PRINT_INFO 0x1000 |
//the following 3 flags are not completely implemented |
//internal chrominace subsampling info |
#define SWS_FULL_CHR_H_INT 0x2000 |
//input subsampling info |
#define SWS_FULL_CHR_H_INP 0x4000 |
#define SWS_DIRECT_BGR 0x8000 |
#define SWS_ACCURATE_RND 0x40000 |
#define SWS_BITEXACT 0x80000 |
#define SWS_ERROR_DIFFUSION 0x800000 |
#if FF_API_SWS_CPU_CAPS |
/** |
* CPU caps are autodetected now, those flags |
* are only provided for API compatibility. |
*/ |
#define SWS_CPU_CAPS_MMX 0x80000000 |
#define SWS_CPU_CAPS_MMXEXT 0x20000000 |
#define SWS_CPU_CAPS_MMX2 0x20000000 |
#define SWS_CPU_CAPS_3DNOW 0x40000000 |
#define SWS_CPU_CAPS_ALTIVEC 0x10000000 |
#if FF_API_ARCH_BFIN |
#define SWS_CPU_CAPS_BFIN 0x01000000 |
#endif |
#define SWS_CPU_CAPS_SSE2 0x02000000 |
#endif |
#define SWS_MAX_REDUCE_CUTOFF 0.002 |
#define SWS_CS_ITU709 1 |
#define SWS_CS_FCC 4 |
#define SWS_CS_ITU601 5 |
#define SWS_CS_ITU624 5 |
#define SWS_CS_SMPTE170M 5 |
#define SWS_CS_SMPTE240M 7 |
#define SWS_CS_DEFAULT 5 |
/** |
* Return a pointer to yuv<->rgb coefficients for the given colorspace |
* suitable for sws_setColorspaceDetails(). |
* |
* @param colorspace One of the SWS_CS_* macros. If invalid, |
* SWS_CS_DEFAULT is used. |
*/ |
const int *sws_getCoefficients(int colorspace); |
// when used for filters they must have an odd number of elements |
// coeffs cannot be shared between vectors |
typedef struct SwsVector { |
double *coeff; ///< pointer to the list of coefficients |
int length; ///< number of coefficients in the vector |
} SwsVector; |
// vectors can be shared |
typedef struct SwsFilter { |
SwsVector *lumH; |
SwsVector *lumV; |
SwsVector *chrH; |
SwsVector *chrV; |
} SwsFilter; |
struct SwsContext; |
/** |
* Return a positive value if pix_fmt is a supported input format, 0 |
* otherwise. |
*/ |
int sws_isSupportedInput(enum AVPixelFormat pix_fmt); |
/** |
* Return a positive value if pix_fmt is a supported output format, 0 |
* otherwise. |
*/ |
int sws_isSupportedOutput(enum AVPixelFormat pix_fmt); |
/** |
* @param[in] pix_fmt the pixel format |
* @return a positive value if an endianness conversion for pix_fmt is |
* supported, 0 otherwise. |
*/ |
int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt); |
/** |
* Allocate an empty SwsContext. This must be filled and passed to |
* sws_init_context(). For filling see AVOptions, options.c and |
* sws_setColorspaceDetails(). |
*/ |
struct SwsContext *sws_alloc_context(void); |
/** |
* Initialize the swscaler context sws_context. |
* |
* @return zero or positive value on success, a negative value on |
* error |
*/ |
int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter); |
/** |
* Free the swscaler context swsContext. |
* If swsContext is NULL, then does nothing. |
*/ |
void sws_freeContext(struct SwsContext *swsContext); |
/** |
* Allocate and return an SwsContext. You need it to perform |
* scaling/conversion operations using sws_scale(). |
* |
* @param srcW the width of the source image |
* @param srcH the height of the source image |
* @param srcFormat the source image format |
* @param dstW the width of the destination image |
* @param dstH the height of the destination image |
* @param dstFormat the destination image format |
* @param flags specify which algorithm and options to use for rescaling |
* @param param extra parameters to tune the used scaler |
* For SWS_BICUBIC param[0] and [1] tune the shape of the basis |
* function, param[0] tunes f(1) and param[1] f´(1) |
* For SWS_GAUSS param[0] tunes the exponent and thus cutoff |
* frequency |
* For SWS_LANCZOS param[0] tunes the width of the window function |
* @return a pointer to an allocated context, or NULL in case of error |
* @note this function is to be removed after a saner alternative is |
* written |
*/ |
struct SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, |
int dstW, int dstH, enum AVPixelFormat dstFormat, |
int flags, SwsFilter *srcFilter, |
SwsFilter *dstFilter, const double *param); |
/** |
* Scale the image slice in srcSlice and put the resulting scaled |
* slice in the image in dst. A slice is a sequence of consecutive |
* rows in an image. |
* |
* Slices have to be provided in sequential order, either in |
* top-bottom or bottom-top order. If slices are provided in |
* non-sequential order the behavior of the function is undefined. |
* |
* @param c the scaling context previously created with |
* sws_getContext() |
* @param srcSlice the array containing the pointers to the planes of |
* the source slice |
* @param srcStride the array containing the strides for each plane of |
* the source image |
* @param srcSliceY the position in the source image of the slice to |
* process, that is the number (counted starting from |
* zero) in the image of the first row of the slice |
* @param srcSliceH the height of the source slice, that is the number |
* of rows in the slice |
* @param dst the array containing the pointers to the planes of |
* the destination image |
* @param dstStride the array containing the strides for each plane of |
* the destination image |
* @return the height of the output slice |
*/ |
int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], |
const int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *const dst[], const int dstStride[]); |
/** |
* @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg) |
* @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg) |
* @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x] |
* @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x] |
* @param brightness 16.16 fixed point brightness correction |
* @param contrast 16.16 fixed point contrast correction |
* @param saturation 16.16 fixed point saturation correction |
* @return -1 if not supported |
*/ |
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], |
int srcRange, const int table[4], int dstRange, |
int brightness, int contrast, int saturation); |
/** |
* @return -1 if not supported |
*/ |
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, |
int *srcRange, int **table, int *dstRange, |
int *brightness, int *contrast, int *saturation); |
/** |
* Allocate and return an uninitialized vector with length coefficients. |
*/ |
SwsVector *sws_allocVec(int length); |
/** |
* Return a normalized Gaussian curve used to filter stuff |
* quality = 3 is high quality, lower is lower quality. |
*/ |
SwsVector *sws_getGaussianVec(double variance, double quality); |
/** |
* Allocate and return a vector with length coefficients, all |
* with the same value c. |
*/ |
SwsVector *sws_getConstVec(double c, int length); |
/** |
* Allocate and return a vector with just one coefficient, with |
* value 1.0. |
*/ |
SwsVector *sws_getIdentityVec(void); |
/** |
* Scale all the coefficients of a by the scalar value. |
*/ |
void sws_scaleVec(SwsVector *a, double scalar); |
/** |
* Scale all the coefficients of a so that their sum equals height. |
*/ |
void sws_normalizeVec(SwsVector *a, double height); |
void sws_convVec(SwsVector *a, SwsVector *b); |
void sws_addVec(SwsVector *a, SwsVector *b); |
void sws_subVec(SwsVector *a, SwsVector *b); |
void sws_shiftVec(SwsVector *a, int shift); |
/** |
* Allocate and return a clone of the vector a, that is a vector |
* with the same coefficients as a. |
*/ |
SwsVector *sws_cloneVec(SwsVector *a); |
/** |
* Print with av_log() a textual representation of the vector a |
* if log_level <= av_log_level. |
*/ |
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level); |
void sws_freeVec(SwsVector *a); |
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, |
float lumaSharpen, float chromaSharpen, |
float chromaHShift, float chromaVShift, |
int verbose); |
void sws_freeFilter(SwsFilter *filter); |
/** |
* Check if context can be reused, otherwise reallocate a new one. |
* |
* If context is NULL, just calls sws_getContext() to get a new |
* context. Otherwise, checks if the parameters are the ones already |
* saved in context. If that is the case, returns the current |
* context. Otherwise, frees context and gets a new context with |
* the new parameters. |
* |
* Be warned that srcFilter and dstFilter are not checked, they |
* are assumed to remain the same. |
*/ |
struct SwsContext *sws_getCachedContext(struct SwsContext *context, |
int srcW, int srcH, enum AVPixelFormat srcFormat, |
int dstW, int dstH, enum AVPixelFormat dstFormat, |
int flags, SwsFilter *srcFilter, |
SwsFilter *dstFilter, const double *param); |
/** |
* Convert an 8-bit paletted frame into a frame with a color depth of 32 bits. |
* |
* The output frame will have the same packed format as the palette. |
* |
* @param src source frame buffer |
* @param dst destination frame buffer |
* @param num_pixels number of pixels to convert |
* @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src |
*/ |
void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette); |
/** |
* Convert an 8-bit paletted frame into a frame with a color depth of 24 bits. |
* |
* With the palette format "ABCD", the destination frame ends up with the format "ABC". |
* |
* @param src source frame buffer |
* @param dst destination frame buffer |
* @param num_pixels number of pixels to convert |
* @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src |
*/ |
void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette); |
/** |
* Get the AVClass for swsContext. It can be used in combination with |
* AV_OPT_SEARCH_FAKE_OBJ for examining options. |
* |
* @see av_opt_find(). |
*/ |
const AVClass *sws_get_class(void); |
/** |
* @} |
*/ |
#endif /* SWSCALE_SWSCALE_H */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale_internal.h |
---|
0,0 → 1,1056 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#ifndef SWSCALE_SWSCALE_INTERNAL_H |
#define SWSCALE_SWSCALE_INTERNAL_H |
#include "config.h" |
#if HAVE_ALTIVEC_H |
#include <altivec.h> |
#endif |
#include "version.h" |
#include "libavutil/avassert.h" |
#include "libavutil/avutil.h" |
#include "libavutil/common.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/log.h" |
#include "libavutil/pixfmt.h" |
#include "libavutil/pixdesc.h" |
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long |
#define YUVRGB_TABLE_HEADROOM 512 |
#define YUVRGB_TABLE_LUMA_HEADROOM 512 |
#define MAX_FILTER_SIZE SWS_MAX_FILTER_SIZE |
#define DITHER1XBPP |
#if HAVE_BIGENDIAN |
#define ALT32_CORR (-1) |
#else |
#define ALT32_CORR 1 |
#endif |
#if ARCH_X86_64 |
# define APCK_PTR2 8 |
# define APCK_COEF 16 |
# define APCK_SIZE 24 |
#else |
# define APCK_PTR2 4 |
# define APCK_COEF 8 |
# define APCK_SIZE 16 |
#endif |
#define RETCODE_USE_CASCADE -12345 |
struct SwsContext; |
typedef enum SwsDither { |
SWS_DITHER_NONE = 0, |
SWS_DITHER_AUTO, |
SWS_DITHER_BAYER, |
SWS_DITHER_ED, |
SWS_DITHER_A_DITHER, |
SWS_DITHER_X_DITHER, |
NB_SWS_DITHER, |
} SwsDither; |
typedef enum SwsAlphaBlend { |
SWS_ALPHA_BLEND_NONE = 0, |
SWS_ALPHA_BLEND_UNIFORM, |
SWS_ALPHA_BLEND_CHECKERBOARD, |
SWS_ALPHA_BLEND_NB, |
} SwsAlphaBlend; |
typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]); |
/** |
* Write one line of horizontally scaled data to planar output |
* without any additional vertical scaling (or point-scaling). |
* |
* @param src scaled source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param dest pointer to the output plane. For >8bit |
* output, this is in uint16_t |
* @param dstW width of destination in pixels |
* @param dither ordered dither array of type int16_t and size 8 |
* @param offset Dither offset |
*/ |
typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset); |
/** |
* Write one line of horizontally scaled data to planar output |
* with multi-point vertical scaling between input pixels. |
* |
* @param filter vertical luma/alpha scaling coefficients, 12bit [0,4096] |
* @param src scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param filterSize number of vertical input lines to scale |
* @param dest pointer to output plane. For >8bit |
* output, this is in uint16_t |
* @param dstW width of destination pixels |
* @param offset Dither offset |
*/ |
typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset); |
/** |
* Write one line of horizontally scaled chroma to interleaved output |
* with multi-point vertical scaling between input pixels. |
* |
* @param c SWS scaling context |
* @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] |
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrFilterSize number of vertical chroma input lines to scale |
* @param dest pointer to the output plane. For >8bit |
* output, this is in uint16_t |
* @param dstW width of chroma planes |
*/ |
typedef void (*yuv2interleavedX_fn)(struct SwsContext *c, |
const int16_t *chrFilter, |
int chrFilterSize, |
const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
uint8_t *dest, int dstW); |
/** |
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB |
* output without any additional vertical scaling (or point-scaling). Note |
* that this function may do chroma scaling, see the "uvalpha" argument. |
* |
* @param c SWS scaling context |
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param dest pointer to the output plane. For 16bit output, this is |
* uint16_t |
* @param dstW width of lumSrc and alpSrc in pixels, number of pixels |
* to write into dest[] |
* @param uvalpha chroma scaling coefficient for the second line of chroma |
* pixels, either 2048 or 0. If 0, one chroma input is used |
* for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag |
* is set, it generates 1 output pixel). If 2048, two chroma |
* input pixels should be averaged for 2 output pixels (this |
* only happens if SWS_FLAG_FULL_CHR_INT is not set) |
* @param y vertical line number for this output. This does not need |
* to be used to calculate the offset in the destination, |
* but can be used to generate comfort noise using dithering |
* for some output formats. |
*/ |
typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc, |
const int16_t *chrUSrc[2], |
const int16_t *chrVSrc[2], |
const int16_t *alpSrc, uint8_t *dest, |
int dstW, int uvalpha, int y); |
/** |
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB |
* output by doing bilinear scaling between two input lines. |
* |
* @param c SWS scaling context |
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param dest pointer to the output plane. For 16bit output, this is |
* uint16_t |
* @param dstW width of lumSrc and alpSrc in pixels, number of pixels |
* to write into dest[] |
* @param yalpha luma/alpha scaling coefficients for the second input line. |
* The first line's coefficients can be calculated by using |
* 4096 - yalpha |
* @param uvalpha chroma scaling coefficient for the second input line. The |
* first line's coefficients can be calculated by using |
* 4096 - uvalpha |
* @param y vertical line number for this output. This does not need |
* to be used to calculate the offset in the destination, |
* but can be used to generate comfort noise using dithering |
* for some output formats. |
*/ |
typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2], |
const int16_t *chrUSrc[2], |
const int16_t *chrVSrc[2], |
const int16_t *alpSrc[2], |
uint8_t *dest, |
int dstW, int yalpha, int uvalpha, int y); |
/** |
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB |
* output by doing multi-point vertical scaling between input pixels. |
* |
* @param c SWS scaling context |
* @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] |
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param lumFilterSize number of vertical luma/alpha input lines to scale |
* @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] |
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrFilterSize number of vertical chroma input lines to scale |
* @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param dest pointer to the output plane. For 16bit output, this is |
* uint16_t |
* @param dstW width of lumSrc and alpSrc in pixels, number of pixels |
* to write into dest[] |
* @param y vertical line number for this output. This does not need |
* to be used to calculate the offset in the destination, |
* but can be used to generate comfort noise using dithering |
* or some output formats. |
*/ |
typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, |
const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t *dest, |
int dstW, int y); |
/** |
* Write one line of horizontally scaled Y/U/V/A to YUV/RGB |
* output by doing multi-point vertical scaling between input pixels. |
* |
* @param c SWS scaling context |
* @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] |
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param lumFilterSize number of vertical luma/alpha input lines to scale |
* @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] |
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param chrFilterSize number of vertical chroma input lines to scale |
* @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, |
* 19-bit for 16bit output (in int32_t) |
* @param dest pointer to the output planes. For 16bit output, this is |
* uint16_t |
* @param dstW width of lumSrc and alpSrc in pixels, number of pixels |
* to write into dest[] |
* @param y vertical line number for this output. This does not need |
* to be used to calculate the offset in the destination, |
* but can be used to generate comfort noise using dithering |
* or some output formats. |
*/ |
typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, |
const int16_t **chrUSrc, |
const int16_t **chrVSrc, int chrFilterSize, |
const int16_t **alpSrc, uint8_t **dest, |
int dstW, int y); |
struct SwsSlice; |
struct SwsFilterDescriptor; |
/* This struct should be aligned on at least a 32-byte boundary. */ |
typedef struct SwsContext { |
/** |
* info on struct for av_log |
*/ |
const AVClass *av_class; |
/** |
* Note that src, dst, srcStride, dstStride will be copied in the |
* sws_scale() wrapper so they can be freely modified here. |
*/ |
SwsFunc swscale; |
int srcW; ///< Width of source luma/alpha planes. |
int srcH; ///< Height of source luma/alpha planes. |
int dstH; ///< Height of destination luma/alpha planes. |
int chrSrcW; ///< Width of source chroma planes. |
int chrSrcH; ///< Height of source chroma planes. |
int chrDstW; ///< Width of destination chroma planes. |
int chrDstH; ///< Height of destination chroma planes. |
int lumXInc, chrXInc; |
int lumYInc, chrYInc; |
enum AVPixelFormat dstFormat; ///< Destination pixel format. |
enum AVPixelFormat srcFormat; ///< Source pixel format. |
int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format. |
int srcFormatBpp; ///< Number of bits per pixel of the source pixel format. |
int dstBpc, srcBpc; |
int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image. |
int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image. |
int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image. |
int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image. |
int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user. |
int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). |
double param[2]; ///< Input parameters for scaling algorithms that need them. |
/* The cascaded_* fields allow spliting a scaler task into multiple |
* sequential steps, this is for example used to limit the maximum |
* downscaling factor that needs to be supported in one scaler. |
*/ |
struct SwsContext *cascaded_context[3]; |
int cascaded_tmpStride[4]; |
uint8_t *cascaded_tmp[4]; |
int cascaded1_tmpStride[4]; |
uint8_t *cascaded1_tmp[4]; |
double gamma_value; |
int gamma_flag; |
int is_internal_gamma; |
uint16_t *gamma; |
uint16_t *inv_gamma; |
int numDesc; |
int descIndex[2]; |
int numSlice; |
struct SwsSlice *slice; |
struct SwsFilterDescriptor *desc; |
uint32_t pal_yuv[256]; |
uint32_t pal_rgb[256]; |
/** |
* @name Scaled horizontal lines ring buffer. |
* The horizontal scaler keeps just enough scaled lines in a ring buffer |
* so they may be passed to the vertical scaler. The pointers to the |
* allocated buffers for each line are duplicated in sequence in the ring |
* buffer to simplify indexing and avoid wrapping around between lines |
* inside the vertical scaler code. The wrapping is done before the |
* vertical scaler is called. |
*/ |
//@{ |
int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler. |
int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. |
int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. |
int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. |
int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. |
int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. |
int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. |
int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. |
int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. |
int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. |
//@} |
uint8_t *formatConvBuffer; |
/** |
* @name Horizontal and vertical filters. |
* To better understand the following fields, here is a pseudo-code of |
* their usage in filtering a horizontal line: |
* @code |
* for (i = 0; i < width; i++) { |
* dst[i] = 0; |
* for (j = 0; j < filterSize; j++) |
* dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ]; |
* dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point. |
* } |
* @endcode |
*/ |
//@{ |
int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes. |
int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. |
int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. |
int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. |
int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. |
int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. |
int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. |
int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. |
int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. |
int hChrFilterSize; ///< Horizontal filter size for chroma pixels. |
int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. |
int vChrFilterSize; ///< Vertical filter size for chroma pixels. |
//@} |
int lumMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes. |
int chrMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes. |
uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes. |
uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes. |
int canMMXEXTBeUsed; |
int warned_unuseable_bilinear; |
int dstY; ///< Last destination vertical line output from last slice. |
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... |
void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() |
// alignment ensures the offset can be added in a single |
// instruction on e.g. ARM |
DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM]; |
uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM]; |
uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; |
uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; |
DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points |
#define RY_IDX 0 |
#define GY_IDX 1 |
#define BY_IDX 2 |
#define RU_IDX 3 |
#define GU_IDX 4 |
#define BU_IDX 5 |
#define RV_IDX 6 |
#define GV_IDX 7 |
#define BV_IDX 8 |
#define RGB2YUV_SHIFT 15 |
int *dither_error[4]; |
//Colorspace stuff |
int contrast, brightness, saturation; // for sws_getColorspaceDetails |
int srcColorspaceTable[4]; |
int dstColorspaceTable[4]; |
int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image). |
int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image). |
int src0Alpha; |
int dst0Alpha; |
int srcXYZ; |
int dstXYZ; |
int src_h_chr_pos; |
int dst_h_chr_pos; |
int src_v_chr_pos; |
int dst_v_chr_pos; |
int yuv2rgb_y_offset; |
int yuv2rgb_y_coeff; |
int yuv2rgb_v2r_coeff; |
int yuv2rgb_v2g_coeff; |
int yuv2rgb_u2g_coeff; |
int yuv2rgb_u2b_coeff; |
#define RED_DITHER "0*8" |
#define GREEN_DITHER "1*8" |
#define BLUE_DITHER "2*8" |
#define Y_COEFF "3*8" |
#define VR_COEFF "4*8" |
#define UB_COEFF "5*8" |
#define VG_COEFF "6*8" |
#define UG_COEFF "7*8" |
#define Y_OFFSET "8*8" |
#define U_OFFSET "9*8" |
#define V_OFFSET "10*8" |
#define LUM_MMX_FILTER_OFFSET "11*8" |
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE) |
#define DSTW_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2" |
#define ESP_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+8" |
#define VROUNDER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+16" |
#define U_TEMP "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+24" |
#define V_TEMP "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+32" |
#define Y_TEMP "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+40" |
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+48" |
#define UV_OFF_PX "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+48" |
#define UV_OFF_BYTE "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+56" |
#define DITHER16 "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+64" |
#define DITHER32 "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+80" |
#define DITHER32_INT (11*8+4*4*MAX_FILTER_SIZE*3+80) // value equal to above, used for checking that the struct hasn't been changed by mistake |
DECLARE_ALIGNED(8, uint64_t, redDither); |
DECLARE_ALIGNED(8, uint64_t, greenDither); |
DECLARE_ALIGNED(8, uint64_t, blueDither); |
DECLARE_ALIGNED(8, uint64_t, yCoeff); |
DECLARE_ALIGNED(8, uint64_t, vrCoeff); |
DECLARE_ALIGNED(8, uint64_t, ubCoeff); |
DECLARE_ALIGNED(8, uint64_t, vgCoeff); |
DECLARE_ALIGNED(8, uint64_t, ugCoeff); |
DECLARE_ALIGNED(8, uint64_t, yOffset); |
DECLARE_ALIGNED(8, uint64_t, uOffset); |
DECLARE_ALIGNED(8, uint64_t, vOffset); |
int32_t lumMmxFilter[4 * MAX_FILTER_SIZE]; |
int32_t chrMmxFilter[4 * MAX_FILTER_SIZE]; |
int dstW; ///< Width of destination luma/alpha planes. |
DECLARE_ALIGNED(8, uint64_t, esp); |
DECLARE_ALIGNED(8, uint64_t, vRounder); |
DECLARE_ALIGNED(8, uint64_t, u_temp); |
DECLARE_ALIGNED(8, uint64_t, v_temp); |
DECLARE_ALIGNED(8, uint64_t, y_temp); |
int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; |
// alignment of these values is not necessary, but merely here |
// to maintain the same offset across x8632 and x86-64. Once we |
// use proper offset macros in the asm, they can be removed. |
DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes |
DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes |
DECLARE_ALIGNED(8, uint16_t, dither16)[8]; |
DECLARE_ALIGNED(8, uint32_t, dither32)[8]; |
const uint8_t *chrDither8, *lumDither8; |
#if HAVE_ALTIVEC |
vector signed short CY; |
vector signed short CRV; |
vector signed short CBU; |
vector signed short CGU; |
vector signed short CGV; |
vector signed short OY; |
vector unsigned short CSHIFT; |
vector signed short *vYCoeffsBank, *vCCoeffsBank; |
#endif |
int use_mmx_vfilter; |
/* pre defined color-spaces gamma */ |
#define XYZ_GAMMA (2.6f) |
#define RGB_GAMMA (2.2f) |
int16_t *xyzgamma; |
int16_t *rgbgamma; |
int16_t *xyzgammainv; |
int16_t *rgbgammainv; |
int16_t xyz2rgb_matrix[3][4]; |
int16_t rgb2xyz_matrix[3][4]; |
/* function pointers for swscale() */ |
yuv2planar1_fn yuv2plane1; |
yuv2planarX_fn yuv2planeX; |
yuv2interleavedX_fn yuv2nv12cX; |
yuv2packed1_fn yuv2packed1; |
yuv2packed2_fn yuv2packed2; |
yuv2packedX_fn yuv2packedX; |
yuv2anyX_fn yuv2anyX; |
/// Unscaled conversion of luma plane to YV12 for horizontal scaler. |
void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, |
int width, uint32_t *pal); |
/// Unscaled conversion of alpha plane to YV12 for horizontal scaler. |
void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, |
int width, uint32_t *pal); |
/// Unscaled conversion of chroma planes to YV12 for horizontal scaler. |
void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, |
const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, |
int width, uint32_t *pal); |
/** |
* Functions to read planar input, such as planar RGB, and convert |
* internally to Y/UV/A. |
*/ |
/** @{ */ |
void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); |
void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], |
int width, int32_t *rgb2yuv); |
void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); |
/** @} */ |
/** |
* Scale one horizontal line of input data using a bilinear filter |
* to produce one line of output data. Compared to SwsContext->hScale(), |
* please take note of the following caveats when using these: |
* - Scaling is done using only 7bit instead of 14bit coefficients. |
* - You can use no more than 5 input pixels to produce 4 output |
* pixels. Therefore, this filter should not be used for downscaling |
* by more than ~20% in width (because that equals more than 5/4th |
* downscaling and thus more than 5 pixels input per 4 pixels output). |
* - In general, bilinear filters create artifacts during downscaling |
* (even when <20%), because one output pixel will span more than one |
* input pixel, and thus some pixels will need edges of both neighbor |
* pixels to interpolate the output pixel. Since you can use at most |
* two input pixels per output pixel in bilinear scaling, this is |
* impossible and thus downscaling by any size will create artifacts. |
* To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR |
* in SwsContext->flags. |
*/ |
/** @{ */ |
void (*hyscale_fast)(struct SwsContext *c, |
int16_t *dst, int dstWidth, |
const uint8_t *src, int srcW, int xInc); |
void (*hcscale_fast)(struct SwsContext *c, |
int16_t *dst1, int16_t *dst2, int dstWidth, |
const uint8_t *src1, const uint8_t *src2, |
int srcW, int xInc); |
/** @} */ |
/** |
* Scale one horizontal line of input data using a filter over the input |
* lines, to produce one (differently sized) line of output data. |
* |
* @param dst pointer to destination buffer for horizontally scaled |
* data. If the number of bits per component of one |
* destination pixel (SwsContext->dstBpc) is <= 10, data |
* will be 15bpc in 16bits (int16_t) width. Else (i.e. |
* SwsContext->dstBpc == 16), data will be 19bpc in |
* 32bits (int32_t) width. |
* @param dstW width of destination image |
* @param src pointer to source data to be scaled. If the number of |
* bits per component of a source pixel (SwsContext->srcBpc) |
* is 8, this is 8bpc in 8bits (uint8_t) width. Else |
* (i.e. SwsContext->dstBpc > 8), this is native depth |
* in 16bits (uint16_t) width. In other words, for 9-bit |
* YUV input, this is 9bpc, for 10-bit YUV input, this is |
* 10bpc, and for 16-bit RGB or YUV, this is 16bpc. |
* @param filter filter coefficients to be used per output pixel for |
* scaling. This contains 14bpp filtering coefficients. |
* Guaranteed to contain dstW * filterSize entries. |
* @param filterPos position of the first input pixel to be used for |
* each output pixel during scaling. Guaranteed to |
* contain dstW entries. |
* @param filterSize the number of input coefficients to be used (and |
* thus the number of input pixels to be used) for |
* creating a single output pixel. Is aligned to 4 |
* (and input coefficients thus padded with zeroes) |
* to simplify creating SIMD code. |
*/ |
/** @{ */ |
void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, |
const uint8_t *src, const int16_t *filter, |
const int32_t *filterPos, int filterSize); |
void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, |
const uint8_t *src, const int16_t *filter, |
const int32_t *filterPos, int filterSize); |
/** @} */ |
/// Color range conversion function for luma plane if needed. |
void (*lumConvertRange)(int16_t *dst, int width); |
/// Color range conversion function for chroma planes if needed. |
void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); |
int needs_hcscale; ///< Set if there are chroma planes to be converted. |
SwsDither dither; |
SwsAlphaBlend alphablend; |
} SwsContext; |
//FIXME check init (where 0) |
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c); |
int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], |
int fullRange, int brightness, |
int contrast, int saturation); |
void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4], |
int brightness, int contrast, int saturation); |
void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, |
int lastInLumBuf, int lastInChrBuf); |
av_cold void ff_sws_init_range_convert(SwsContext *c); |
SwsFunc ff_yuv2rgb_init_x86(SwsContext *c); |
SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c); |
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return desc->comp[0].depth_minus1 == 15; |
} |
static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return desc->comp[0].depth_minus1 >= 8 && desc->comp[0].depth_minus1 <= 13; |
} |
#define isNBPS(x) is9_OR_10BPS(x) |
static av_always_inline int isBE(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return desc->flags & AV_PIX_FMT_FLAG_BE; |
} |
static av_always_inline int isYUV(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2; |
} |
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt)); |
} |
static av_always_inline int isRGB(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return (desc->flags & AV_PIX_FMT_FLAG_RGB); |
} |
#if 0 // FIXME |
#define isGray(x) \ |
(!(av_pix_fmt_desc_get(x)->flags & AV_PIX_FMT_FLAG_PAL) && \ |
av_pix_fmt_desc_get(x)->nb_components <= 2) |
#else |
#define isGray(x) \ |
((x) == AV_PIX_FMT_GRAY8 || \ |
(x) == AV_PIX_FMT_YA8 || \ |
(x) == AV_PIX_FMT_GRAY16BE || \ |
(x) == AV_PIX_FMT_GRAY16LE || \ |
(x) == AV_PIX_FMT_YA16BE || \ |
(x) == AV_PIX_FMT_YA16LE) |
#endif |
#define isRGBinInt(x) \ |
( \ |
(x) == AV_PIX_FMT_RGB48BE || \ |
(x) == AV_PIX_FMT_RGB48LE || \ |
(x) == AV_PIX_FMT_RGB32 || \ |
(x) == AV_PIX_FMT_RGB32_1 || \ |
(x) == AV_PIX_FMT_RGB24 || \ |
(x) == AV_PIX_FMT_RGB565BE || \ |
(x) == AV_PIX_FMT_RGB565LE || \ |
(x) == AV_PIX_FMT_RGB555BE || \ |
(x) == AV_PIX_FMT_RGB555LE || \ |
(x) == AV_PIX_FMT_RGB444BE || \ |
(x) == AV_PIX_FMT_RGB444LE || \ |
(x) == AV_PIX_FMT_RGB8 || \ |
(x) == AV_PIX_FMT_RGB4 || \ |
(x) == AV_PIX_FMT_RGB4_BYTE || \ |
(x) == AV_PIX_FMT_RGBA64BE || \ |
(x) == AV_PIX_FMT_RGBA64LE || \ |
(x) == AV_PIX_FMT_MONOBLACK || \ |
(x) == AV_PIX_FMT_MONOWHITE \ |
) |
#define isBGRinInt(x) \ |
( \ |
(x) == AV_PIX_FMT_BGR48BE || \ |
(x) == AV_PIX_FMT_BGR48LE || \ |
(x) == AV_PIX_FMT_BGR32 || \ |
(x) == AV_PIX_FMT_BGR32_1 || \ |
(x) == AV_PIX_FMT_BGR24 || \ |
(x) == AV_PIX_FMT_BGR565BE || \ |
(x) == AV_PIX_FMT_BGR565LE || \ |
(x) == AV_PIX_FMT_BGR555BE || \ |
(x) == AV_PIX_FMT_BGR555LE || \ |
(x) == AV_PIX_FMT_BGR444BE || \ |
(x) == AV_PIX_FMT_BGR444LE || \ |
(x) == AV_PIX_FMT_BGR8 || \ |
(x) == AV_PIX_FMT_BGR4 || \ |
(x) == AV_PIX_FMT_BGR4_BYTE || \ |
(x) == AV_PIX_FMT_BGRA64BE || \ |
(x) == AV_PIX_FMT_BGRA64LE || \ |
(x) == AV_PIX_FMT_MONOBLACK || \ |
(x) == AV_PIX_FMT_MONOWHITE \ |
) |
#define isRGBinBytes(x) ( \ |
(x) == AV_PIX_FMT_RGB48BE \ |
|| (x) == AV_PIX_FMT_RGB48LE \ |
|| (x) == AV_PIX_FMT_RGBA64BE \ |
|| (x) == AV_PIX_FMT_RGBA64LE \ |
|| (x) == AV_PIX_FMT_RGBA \ |
|| (x) == AV_PIX_FMT_ARGB \ |
|| (x) == AV_PIX_FMT_RGB24 \ |
) |
#define isBGRinBytes(x) ( \ |
(x) == AV_PIX_FMT_BGR48BE \ |
|| (x) == AV_PIX_FMT_BGR48LE \ |
|| (x) == AV_PIX_FMT_BGRA64BE \ |
|| (x) == AV_PIX_FMT_BGRA64LE \ |
|| (x) == AV_PIX_FMT_BGRA \ |
|| (x) == AV_PIX_FMT_ABGR \ |
|| (x) == AV_PIX_FMT_BGR24 \ |
) |
#define isBayer(x) ( \ |
(x)==AV_PIX_FMT_BAYER_BGGR8 \ |
|| (x)==AV_PIX_FMT_BAYER_BGGR16LE \ |
|| (x)==AV_PIX_FMT_BAYER_BGGR16BE \ |
|| (x)==AV_PIX_FMT_BAYER_RGGB8 \ |
|| (x)==AV_PIX_FMT_BAYER_RGGB16LE \ |
|| (x)==AV_PIX_FMT_BAYER_RGGB16BE \ |
|| (x)==AV_PIX_FMT_BAYER_GBRG8 \ |
|| (x)==AV_PIX_FMT_BAYER_GBRG16LE \ |
|| (x)==AV_PIX_FMT_BAYER_GBRG16BE \ |
|| (x)==AV_PIX_FMT_BAYER_GRBG8 \ |
|| (x)==AV_PIX_FMT_BAYER_GRBG16LE \ |
|| (x)==AV_PIX_FMT_BAYER_GRBG16BE \ |
) |
#define isAnyRGB(x) \ |
( \ |
isBayer(x) || \ |
isRGBinInt(x) || \ |
isBGRinInt(x) || \ |
isRGB(x) \ |
) |
static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
if (pix_fmt == AV_PIX_FMT_PAL8) |
return 1; |
return desc->flags & AV_PIX_FMT_FLAG_ALPHA; |
} |
#if 1 |
#define isPacked(x) ( \ |
(x)==AV_PIX_FMT_PAL8 \ |
|| (x)==AV_PIX_FMT_YUYV422 \ |
|| (x)==AV_PIX_FMT_YVYU422 \ |
|| (x)==AV_PIX_FMT_UYVY422 \ |
|| (x)==AV_PIX_FMT_YA8 \ |
|| (x)==AV_PIX_FMT_YA16LE \ |
|| (x)==AV_PIX_FMT_YA16BE \ |
|| (x)==AV_PIX_FMT_AYUV64LE \ |
|| (x)==AV_PIX_FMT_AYUV64BE \ |
|| isRGBinInt(x) \ |
|| isBGRinInt(x) \ |
) |
#else |
static av_always_inline int isPacked(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return ((desc->nb_components >= 2 && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) || |
pix_fmt == AV_PIX_FMT_PAL8); |
} |
#endif |
static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return (desc->nb_components >= 2 && (desc->flags & AV_PIX_FMT_FLAG_PLANAR)); |
} |
static av_always_inline int isPackedRGB(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == AV_PIX_FMT_FLAG_RGB); |
} |
static av_always_inline int isPlanarRGB(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == |
(AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)); |
} |
static av_always_inline int usePal(enum AVPixelFormat pix_fmt) |
{ |
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
av_assert0(desc); |
return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL); |
} |
extern const uint64_t ff_dither4[2]; |
extern const uint64_t ff_dither8[2]; |
extern const uint8_t ff_dither_2x2_4[3][8]; |
extern const uint8_t ff_dither_2x2_8[3][8]; |
extern const uint8_t ff_dither_4x4_16[5][8]; |
extern const uint8_t ff_dither_8x8_32[9][8]; |
extern const uint8_t ff_dither_8x8_73[9][8]; |
extern const uint8_t ff_dither_8x8_128[9][8]; |
extern const uint8_t ff_dither_8x8_220[9][8]; |
extern const int32_t ff_yuv2rgb_coeffs[8][4]; |
extern const AVClass sws_context_class; |
/** |
* Set c->swscale to an unscaled converter if one exists for the specific |
* source and destination formats, bit depths, flags, etc. |
*/ |
void ff_get_unscaled_swscale(SwsContext *c); |
void ff_get_unscaled_swscale_ppc(SwsContext *c); |
void ff_get_unscaled_swscale_arm(SwsContext *c); |
/** |
* Return function pointer to fastest main scaler path function depending |
* on architecture and available optimizations. |
*/ |
SwsFunc ff_getSwsFunc(SwsContext *c); |
void ff_sws_init_input_funcs(SwsContext *c); |
void ff_sws_init_output_funcs(SwsContext *c, |
yuv2planar1_fn *yuv2plane1, |
yuv2planarX_fn *yuv2planeX, |
yuv2interleavedX_fn *yuv2nv12cX, |
yuv2packed1_fn *yuv2packed1, |
yuv2packed2_fn *yuv2packed2, |
yuv2packedX_fn *yuv2packedX, |
yuv2anyX_fn *yuv2anyX); |
void ff_sws_init_swscale_ppc(SwsContext *c); |
void ff_sws_init_swscale_x86(SwsContext *c); |
void ff_hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, |
const uint8_t *src, int srcW, int xInc); |
void ff_hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, |
int dstWidth, const uint8_t *src1, |
const uint8_t *src2, int srcW, int xInc); |
int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, |
int16_t *filter, int32_t *filterPos, |
int numSplits); |
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, |
int dstWidth, const uint8_t *src, |
int srcW, int xInc); |
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, |
int dstWidth, const uint8_t *src1, |
const uint8_t *src2, int srcW, int xInc); |
/** |
* Allocate and return an SwsContext. |
* This is like sws_getContext() but does not perform the init step, allowing |
* the user to set additional AVOptions. |
* |
* @see sws_getContext() |
*/ |
struct SwsContext *sws_alloc_set_opts(int srcW, int srcH, enum AVPixelFormat srcFormat, |
int dstW, int dstH, enum AVPixelFormat dstFormat, |
int flags, const double *param); |
int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]); |
static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y, |
int alpha, int bits, const int big_endian) |
{ |
int i, j; |
uint8_t *ptr = plane + stride * y; |
int v = alpha ? 0xFFFF>>(15-bits) : (1<<bits); |
for (i = 0; i < height; i++) { |
#define FILL(wfunc) \ |
for (j = 0; j < width; j++) {\ |
wfunc(ptr+2*j, v);\ |
} |
if (big_endian) { |
FILL(AV_WB16); |
} else { |
FILL(AV_WL16); |
} |
ptr += stride; |
} |
} |
#define MAX_SLICE_PLANES 4 |
/// Slice plane |
typedef struct SwsPlane |
{ |
int available_lines; ///< max number of lines that can be hold by this plane |
int sliceY; ///< index of first line |
int sliceH; ///< number of lines |
uint8_t **line; ///< line buffer |
uint8_t **tmp; ///< Tmp line buffer used by mmx code |
} SwsPlane; |
/** |
* Struct which defines a slice of an image to be scaled or a output for |
* a scaled slice. |
* A slice can also be used as intermediate ring buffer for scaling steps. |
*/ |
typedef struct SwsSlice |
{ |
int width; ///< Slice line width |
int h_chr_sub_sample; ///< horizontal chroma subsampling factor |
int v_chr_sub_sample; ///< vertical chroma subsampling factor |
int is_ring; ///< flag to identify if this slice is a ring buffer |
int should_free_lines; ///< flag to identify if there are dynamic allocated lines |
enum AVPixelFormat fmt; ///< planes pixel format |
SwsPlane plane[MAX_SLICE_PLANES]; ///< color planes |
} SwsSlice; |
/** |
* Struct which holds all necessary data for processing a slice. |
* A processing step can be a color conversion or horizontal/vertical scaling. |
*/ |
typedef struct SwsFilterDescriptor |
{ |
SwsSlice *src; ///< Source slice |
SwsSlice *dst; ///< Output slice |
int alpha; ///< Flag for processing alpha channel |
void *instance; ///< Filter instance data |
/// Function for processing input slice sliceH lines starting from line sliceY |
int (*process)(SwsContext *c, struct SwsFilterDescriptor *desc, int sliceY, int sliceH); |
} SwsFilterDescriptor; |
/// Color conversion instance data |
typedef struct ColorContext |
{ |
uint32_t *pal; |
} ColorContext; |
/// Scaler instance data |
typedef struct FilterContext |
{ |
uint16_t *filter; |
int *filter_pos; |
int filter_size; |
int xInc; |
} FilterContext; |
// warp input lines in the form (src + width*i + j) to slice format (line[i][j]) |
// relative=true means first line src[x][0] otherwise first line is src[x][lum/crh Y] |
int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative); |
// Initialize scaler filter descriptor chain |
int ff_init_filters(SwsContext *c); |
// Free all filter data |
int ff_free_filters(SwsContext *c); |
/* |
function for applying ring buffer logic into slice s |
It checks if the slice can hold more @lum lines, if yes |
do nothing otherwise remove @lum least used lines. |
It applies the same procedure for @chr lines. |
*/ |
int ff_rotate_slice(SwsSlice *s, int lum, int chr); |
/// initializes gamma conversion descriptor |
int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice * src, uint16_t *table); |
/// initializes lum pixel format conversion descriptor |
int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal); |
/// initializes lum horizontal scaling descriptor |
int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc); |
/// initializes chr pixel format conversion descriptor |
int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal); |
/// initializes chr horizontal scaling descriptor |
int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc); |
int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst); |
/// initializes vertical scaling descriptors |
int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst); |
/// setup vertical scaler functions |
void ff_init_vscale_pfn(SwsContext *c, yuv2planar1_fn yuv2plane1, yuv2planarX_fn yuv2planeX, |
yuv2interleavedX_fn yuv2nv12cX, yuv2packed1_fn yuv2packed1, yuv2packed2_fn yuv2packed2, |
yuv2packedX_fn yuv2packedX, yuv2anyX_fn yuv2anyX, int use_mmx); |
//number of extra lines to process |
#define MAX_LINES_AHEAD 4 |
// enable use of refactored scaler code |
#define NEW_FILTER |
#endif /* SWSCALE_SWSCALE_INTERNAL_H */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscale_unscaled.c |
---|
0,0 → 1,1792 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <inttypes.h> |
#include <string.h> |
#include <math.h> |
#include <stdio.h> |
#include "config.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
#include "rgb2rgb.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/cpu.h" |
#include "libavutil/avutil.h" |
#include "libavutil/mathematics.h" |
#include "libavutil/bswap.h" |
#include "libavutil/pixdesc.h" |
#include "libavutil/avassert.h" |
DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={ |
{ |
{ 0, 1, 0, 1, 0, 1, 0, 1,}, |
{ 1, 0, 1, 0, 1, 0, 1, 0,}, |
{ 0, 1, 0, 1, 0, 1, 0, 1,}, |
{ 1, 0, 1, 0, 1, 0, 1, 0,}, |
{ 0, 1, 0, 1, 0, 1, 0, 1,}, |
{ 1, 0, 1, 0, 1, 0, 1, 0,}, |
{ 0, 1, 0, 1, 0, 1, 0, 1,}, |
{ 1, 0, 1, 0, 1, 0, 1, 0,}, |
},{ |
{ 1, 2, 1, 2, 1, 2, 1, 2,}, |
{ 3, 0, 3, 0, 3, 0, 3, 0,}, |
{ 1, 2, 1, 2, 1, 2, 1, 2,}, |
{ 3, 0, 3, 0, 3, 0, 3, 0,}, |
{ 1, 2, 1, 2, 1, 2, 1, 2,}, |
{ 3, 0, 3, 0, 3, 0, 3, 0,}, |
{ 1, 2, 1, 2, 1, 2, 1, 2,}, |
{ 3, 0, 3, 0, 3, 0, 3, 0,}, |
},{ |
{ 2, 4, 3, 5, 2, 4, 3, 5,}, |
{ 6, 0, 7, 1, 6, 0, 7, 1,}, |
{ 3, 5, 2, 4, 3, 5, 2, 4,}, |
{ 7, 1, 6, 0, 7, 1, 6, 0,}, |
{ 2, 4, 3, 5, 2, 4, 3, 5,}, |
{ 6, 0, 7, 1, 6, 0, 7, 1,}, |
{ 3, 5, 2, 4, 3, 5, 2, 4,}, |
{ 7, 1, 6, 0, 7, 1, 6, 0,}, |
},{ |
{ 4, 8, 7, 11, 4, 8, 7, 11,}, |
{ 12, 0, 15, 3, 12, 0, 15, 3,}, |
{ 6, 10, 5, 9, 6, 10, 5, 9,}, |
{ 14, 2, 13, 1, 14, 2, 13, 1,}, |
{ 4, 8, 7, 11, 4, 8, 7, 11,}, |
{ 12, 0, 15, 3, 12, 0, 15, 3,}, |
{ 6, 10, 5, 9, 6, 10, 5, 9,}, |
{ 14, 2, 13, 1, 14, 2, 13, 1,}, |
},{ |
{ 9, 17, 15, 23, 8, 16, 14, 22,}, |
{ 25, 1, 31, 7, 24, 0, 30, 6,}, |
{ 13, 21, 11, 19, 12, 20, 10, 18,}, |
{ 29, 5, 27, 3, 28, 4, 26, 2,}, |
{ 8, 16, 14, 22, 9, 17, 15, 23,}, |
{ 24, 0, 30, 6, 25, 1, 31, 7,}, |
{ 12, 20, 10, 18, 13, 21, 11, 19,}, |
{ 28, 4, 26, 2, 29, 5, 27, 3,}, |
},{ |
{ 18, 34, 30, 46, 17, 33, 29, 45,}, |
{ 50, 2, 62, 14, 49, 1, 61, 13,}, |
{ 26, 42, 22, 38, 25, 41, 21, 37,}, |
{ 58, 10, 54, 6, 57, 9, 53, 5,}, |
{ 16, 32, 28, 44, 19, 35, 31, 47,}, |
{ 48, 0, 60, 12, 51, 3, 63, 15,}, |
{ 24, 40, 20, 36, 27, 43, 23, 39,}, |
{ 56, 8, 52, 4, 59, 11, 55, 7,}, |
},{ |
{ 18, 34, 30, 46, 17, 33, 29, 45,}, |
{ 50, 2, 62, 14, 49, 1, 61, 13,}, |
{ 26, 42, 22, 38, 25, 41, 21, 37,}, |
{ 58, 10, 54, 6, 57, 9, 53, 5,}, |
{ 16, 32, 28, 44, 19, 35, 31, 47,}, |
{ 48, 0, 60, 12, 51, 3, 63, 15,}, |
{ 24, 40, 20, 36, 27, 43, 23, 39,}, |
{ 56, 8, 52, 4, 59, 11, 55, 7,}, |
},{ |
{ 36, 68, 60, 92, 34, 66, 58, 90,}, |
{ 100, 4,124, 28, 98, 2,122, 26,}, |
{ 52, 84, 44, 76, 50, 82, 42, 74,}, |
{ 116, 20,108, 12,114, 18,106, 10,}, |
{ 32, 64, 56, 88, 38, 70, 62, 94,}, |
{ 96, 0,120, 24,102, 6,126, 30,}, |
{ 48, 80, 40, 72, 54, 86, 46, 78,}, |
{ 112, 16,104, 8,118, 22,110, 14,}, |
}}; |
static const uint16_t dither_scale[15][16]={ |
{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, |
{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, |
{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, |
{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, |
{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, |
{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, |
{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, |
{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, |
{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, |
{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, |
{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, |
{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, |
{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, |
{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, |
{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, |
}; |
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, |
uint8_t val) |
{ |
int i; |
uint8_t *ptr = plane + stride * y; |
for (i = 0; i < height; i++) { |
memset(ptr, val, width); |
ptr += stride; |
} |
} |
static void copyPlane(const uint8_t *src, int srcStride, |
int srcSliceY, int srcSliceH, int width, |
uint8_t *dst, int dstStride) |
{ |
dst += dstStride * srcSliceY; |
if (dstStride == srcStride && srcStride > 0) { |
memcpy(dst, src, srcSliceH * dstStride); |
} else { |
int i; |
for (i = 0; i < srcSliceH; i++) { |
memcpy(dst, src, width); |
src += srcStride; |
dst += dstStride; |
} |
} |
} |
static int planarToNv12Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t *dstParam[], |
int dstStride[]) |
{ |
uint8_t *dst = dstParam[1] + dstStride[1] * srcSliceY / 2; |
copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, |
dstParam[0], dstStride[0]); |
if (c->dstFormat == AV_PIX_FMT_NV12) |
interleaveBytes(src[1], src[2], dst, c->srcW / 2, srcSliceH / 2, |
srcStride[1], srcStride[2], dstStride[1]); |
else |
interleaveBytes(src[2], src[1], dst, c->srcW / 2, srcSliceH / 2, |
srcStride[2], srcStride[1], dstStride[1]); |
return srcSliceH; |
} |
static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t *dstParam[], |
int dstStride[]) |
{ |
uint8_t *dst1 = dstParam[1] + dstStride[1] * srcSliceY / 2; |
uint8_t *dst2 = dstParam[2] + dstStride[2] * srcSliceY / 2; |
copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, |
dstParam[0], dstStride[0]); |
if (c->srcFormat == AV_PIX_FMT_NV12) |
deinterleaveBytes(src[1], dst1, dst2,c->srcW / 2, srcSliceH / 2, |
srcStride[1], dstStride[1], dstStride[2]); |
else |
deinterleaveBytes(src[1], dst2, dst1, c->srcW / 2, srcSliceH / 2, |
srcStride[1], dstStride[2], dstStride[1]); |
return srcSliceH; |
} |
static int planarToYuy2Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; |
yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], |
srcStride[1], dstStride[0]); |
return srcSliceH; |
} |
static int planarToUyvyWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; |
yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], |
srcStride[1], dstStride[0]); |
return srcSliceH; |
} |
static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; |
yuv422ptoyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], |
srcStride[1], dstStride[0]); |
return srcSliceH; |
} |
static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *dst = dstParam[0] + dstStride[0] * srcSliceY; |
yuv422ptouyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], |
srcStride[1], dstStride[0]); |
return srcSliceH; |
} |
static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; |
uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY / 2; |
uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY / 2; |
yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], |
dstStride[1], srcStride[0]); |
if (dstParam[3]) |
fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); |
return srcSliceH; |
} |
static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; |
uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY; |
uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY; |
yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], |
dstStride[1], srcStride[0]); |
return srcSliceH; |
} |
static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; |
uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY / 2; |
uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY / 2; |
uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], |
dstStride[1], srcStride[0]); |
if (dstParam[3]) |
fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); |
return srcSliceH; |
} |
static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dstParam[], int dstStride[]) |
{ |
uint8_t *ydst = dstParam[0] + dstStride[0] * srcSliceY; |
uint8_t *udst = dstParam[1] + dstStride[1] * srcSliceY; |
uint8_t *vdst = dstParam[2] + dstStride[2] * srcSliceY; |
uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], |
dstStride[1], srcStride[0]); |
return srcSliceH; |
} |
static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, |
const uint8_t *palette) |
{ |
int i; |
for (i = 0; i < num_pixels; i++) |
((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i << 1]] | (src[(i << 1) + 1] << 24); |
} |
static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, |
const uint8_t *palette) |
{ |
int i; |
for (i = 0; i < num_pixels; i++) |
((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i << 1]] | src[(i << 1) + 1]; |
} |
static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, |
const uint8_t *palette) |
{ |
int i; |
for (i = 0; i < num_pixels; i++) { |
//FIXME slow? |
dst[0] = palette[src[i << 1] * 4 + 0]; |
dst[1] = palette[src[i << 1] * 4 + 1]; |
dst[2] = palette[src[i << 1] * 4 + 2]; |
dst += 3; |
} |
} |
static int packed_16bpc_bswap(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int i, j, p; |
for (p = 0; p < 4; p++) { |
int srcstr = srcStride[p] / 2; |
int dststr = dstStride[p] / 2; |
uint16_t *dstPtr = (uint16_t *) dst[p]; |
const uint16_t *srcPtr = (const uint16_t *) src[p]; |
int min_stride = FFMIN(FFABS(srcstr), FFABS(dststr)); |
if(!dstPtr || !srcPtr) |
continue; |
for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) { |
for (j = 0; j < min_stride; j++) { |
dstPtr[j] = av_bswap16(srcPtr[j]); |
} |
srcPtr += srcstr; |
dstPtr += dststr; |
} |
} |
return srcSliceH; |
} |
static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], |
int srcSliceY, int srcSliceH, uint8_t *dst[], |
int dstStride[]) |
{ |
const enum AVPixelFormat srcFormat = c->srcFormat; |
const enum AVPixelFormat dstFormat = c->dstFormat; |
void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels, |
const uint8_t *palette) = NULL; |
int i; |
uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY; |
const uint8_t *srcPtr = src[0]; |
if (srcFormat == AV_PIX_FMT_YA8) { |
switch (dstFormat) { |
case AV_PIX_FMT_RGB32 : conv = gray8aToPacked32; break; |
case AV_PIX_FMT_BGR32 : conv = gray8aToPacked32; break; |
case AV_PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break; |
case AV_PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break; |
case AV_PIX_FMT_RGB24 : conv = gray8aToPacked24; break; |
case AV_PIX_FMT_BGR24 : conv = gray8aToPacked24; break; |
} |
} else if (usePal(srcFormat)) { |
switch (dstFormat) { |
case AV_PIX_FMT_RGB32 : conv = sws_convertPalette8ToPacked32; break; |
case AV_PIX_FMT_BGR32 : conv = sws_convertPalette8ToPacked32; break; |
case AV_PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break; |
case AV_PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break; |
case AV_PIX_FMT_RGB24 : conv = sws_convertPalette8ToPacked24; break; |
case AV_PIX_FMT_BGR24 : conv = sws_convertPalette8ToPacked24; break; |
} |
} |
if (!conv) |
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", |
av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); |
else { |
for (i = 0; i < srcSliceH; i++) { |
conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); |
srcPtr += srcStride[0]; |
dstPtr += dstStride[0]; |
} |
} |
return srcSliceH; |
} |
static void packed16togbra16(const uint8_t *src, int srcStride, |
uint16_t *dst[], int dstStride[], int srcSliceH, |
int src_alpha, int swap, int shift, int width) |
{ |
int x, h, i; |
int dst_alpha = dst[3] != NULL; |
for (h = 0; h < srcSliceH; h++) { |
uint16_t *src_line = (uint16_t *)(src + srcStride * h); |
switch (swap) { |
case 3: |
if (src_alpha && dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[3][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
} |
} else if (dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[3][x] = 0xFFFF; |
} |
} else if (src_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
src_line++; |
} |
} else { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift); |
} |
} |
break; |
case 2: |
if (src_alpha && dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++ >> shift); |
dst[1][x] = av_bswap16(*src_line++ >> shift); |
dst[2][x] = av_bswap16(*src_line++ >> shift); |
dst[3][x] = av_bswap16(*src_line++ >> shift); |
} |
} else if (dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++ >> shift); |
dst[1][x] = av_bswap16(*src_line++ >> shift); |
dst[2][x] = av_bswap16(*src_line++ >> shift); |
dst[3][x] = 0xFFFF; |
} |
} else if (src_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++ >> shift); |
dst[1][x] = av_bswap16(*src_line++ >> shift); |
dst[2][x] = av_bswap16(*src_line++ >> shift); |
src_line++; |
} |
} else { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++ >> shift); |
dst[1][x] = av_bswap16(*src_line++ >> shift); |
dst[2][x] = av_bswap16(*src_line++ >> shift); |
} |
} |
break; |
case 1: |
if (src_alpha && dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++) >> shift; |
dst[1][x] = av_bswap16(*src_line++) >> shift; |
dst[2][x] = av_bswap16(*src_line++) >> shift; |
dst[3][x] = av_bswap16(*src_line++) >> shift; |
} |
} else if (dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++) >> shift; |
dst[1][x] = av_bswap16(*src_line++) >> shift; |
dst[2][x] = av_bswap16(*src_line++) >> shift; |
dst[3][x] = 0xFFFF; |
} |
} else if (src_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++) >> shift; |
dst[1][x] = av_bswap16(*src_line++) >> shift; |
dst[2][x] = av_bswap16(*src_line++) >> shift; |
src_line++; |
} |
} else { |
for (x = 0; x < width; x++) { |
dst[0][x] = av_bswap16(*src_line++) >> shift; |
dst[1][x] = av_bswap16(*src_line++) >> shift; |
dst[2][x] = av_bswap16(*src_line++) >> shift; |
} |
} |
break; |
default: |
if (src_alpha && dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = *src_line++ >> shift; |
dst[1][x] = *src_line++ >> shift; |
dst[2][x] = *src_line++ >> shift; |
dst[3][x] = *src_line++ >> shift; |
} |
} else if (dst_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = *src_line++ >> shift; |
dst[1][x] = *src_line++ >> shift; |
dst[2][x] = *src_line++ >> shift; |
dst[3][x] = 0xFFFF; |
} |
} else if (src_alpha) { |
for (x = 0; x < width; x++) { |
dst[0][x] = *src_line++ >> shift; |
dst[1][x] = *src_line++ >> shift; |
dst[2][x] = *src_line++ >> shift; |
src_line++; |
} |
} else { |
for (x = 0; x < width; x++) { |
dst[0][x] = *src_line++ >> shift; |
dst[1][x] = *src_line++ >> shift; |
dst[2][x] = *src_line++ >> shift; |
} |
} |
} |
for (i = 0; i < 4; i++) |
dst[i] += dstStride[i] >> 1; |
} |
} |
static int Rgb16ToPlanarRgb16Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
uint16_t *dst2013[] = { (uint16_t *)dst[2], (uint16_t *)dst[0], (uint16_t *)dst[1], (uint16_t *)dst[3] }; |
uint16_t *dst1023[] = { (uint16_t *)dst[1], (uint16_t *)dst[0], (uint16_t *)dst[2], (uint16_t *)dst[3] }; |
int stride2013[] = { dstStride[2], dstStride[0], dstStride[1], dstStride[3] }; |
int stride1023[] = { dstStride[1], dstStride[0], dstStride[2], dstStride[3] }; |
const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat); |
const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat); |
int bpc = dst_format->comp[0].depth_minus1 + 1; |
int alpha = src_format->flags & AV_PIX_FMT_FLAG_ALPHA; |
int swap = 0; |
if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) || |
!HAVE_BIGENDIAN && src_format->flags & AV_PIX_FMT_FLAG_BE) |
swap++; |
if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) || |
!HAVE_BIGENDIAN && dst_format->flags & AV_PIX_FMT_FLAG_BE) |
swap += 2; |
if ((dst_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) != |
(AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) || bpc < 9) { |
av_log(c, AV_LOG_ERROR, "unsupported conversion to planar RGB %s -> %s\n", |
src_format->name, dst_format->name); |
return srcSliceH; |
} |
switch (c->srcFormat) { |
case AV_PIX_FMT_RGB48LE: |
case AV_PIX_FMT_RGB48BE: |
case AV_PIX_FMT_RGBA64LE: |
case AV_PIX_FMT_RGBA64BE: |
packed16togbra16(src[0] + srcSliceY * srcStride[0], srcStride[0], |
dst2013, stride2013, srcSliceH, alpha, swap, |
16 - bpc, c->srcW); |
break; |
case AV_PIX_FMT_BGR48LE: |
case AV_PIX_FMT_BGR48BE: |
case AV_PIX_FMT_BGRA64LE: |
case AV_PIX_FMT_BGRA64BE: |
packed16togbra16(src[0] + srcSliceY * srcStride[0], srcStride[0], |
dst1023, stride1023, srcSliceH, alpha, swap, |
16 - bpc, c->srcW); |
break; |
default: |
av_log(c, AV_LOG_ERROR, |
"unsupported conversion to planar RGB %s -> %s\n", |
src_format->name, dst_format->name); |
} |
return srcSliceH; |
} |
static void gbr16ptopacked16(const uint16_t *src[], int srcStride[], |
uint8_t *dst, int dstStride, int srcSliceH, |
int alpha, int swap, int bpp, int width) |
{ |
int x, h, i; |
int src_alpha = src[3] != NULL; |
int scale_high = 16 - bpp, scale_low = (bpp - 8) * 2; |
for (h = 0; h < srcSliceH; h++) { |
uint16_t *dest = (uint16_t *)(dst + dstStride * h); |
uint16_t component; |
switch(swap) { |
case 3: |
if (alpha && !src_alpha) { |
for (x = 0; x < width; x++) { |
component = av_bswap16(src[0][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[1][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[2][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
*dest++ = 0xffff; |
} |
} else if (alpha && src_alpha) { |
for (x = 0; x < width; x++) { |
component = av_bswap16(src[0][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[1][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[2][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[3][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
} |
} else { |
for (x = 0; x < width; x++) { |
component = av_bswap16(src[0][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[1][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
component = av_bswap16(src[2][x]); |
*dest++ = av_bswap16(component << scale_high | component >> scale_low); |
} |
} |
break; |
case 2: |
if (alpha && !src_alpha) { |
for (x = 0; x < width; x++) { |
*dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); |
*dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); |
*dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); |
*dest++ = 0xffff; |
} |
} else if (alpha && src_alpha) { |
for (x = 0; x < width; x++) { |
*dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); |
*dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); |
*dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); |
*dest++ = av_bswap16(src[3][x] << scale_high | src[3][x] >> scale_low); |
} |
} else { |
for (x = 0; x < width; x++) { |
*dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low); |
*dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low); |
*dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low); |
} |
} |
break; |
case 1: |
if (alpha && !src_alpha) { |
for (x = 0; x < width; x++) { |
*dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; |
*dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; |
*dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; |
*dest++ = 0xffff; |
} |
} else if (alpha && src_alpha) { |
for (x = 0; x < width; x++) { |
*dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; |
*dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; |
*dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; |
*dest++ = av_bswap16(src[3][x]) << scale_high | av_bswap16(src[3][x]) >> scale_low; |
} |
} else { |
for (x = 0; x < width; x++) { |
*dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low; |
*dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low; |
*dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low; |
} |
} |
break; |
default: |
if (alpha && !src_alpha) { |
for (x = 0; x < width; x++) { |
*dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; |
*dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; |
*dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; |
*dest++ = 0xffff; |
} |
} else if (alpha && src_alpha) { |
for (x = 0; x < width; x++) { |
*dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; |
*dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; |
*dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; |
*dest++ = src[3][x] << scale_high | src[3][x] >> scale_low; |
} |
} else { |
for (x = 0; x < width; x++) { |
*dest++ = src[0][x] << scale_high | src[0][x] >> scale_low; |
*dest++ = src[1][x] << scale_high | src[1][x] >> scale_low; |
*dest++ = src[2][x] << scale_high | src[2][x] >> scale_low; |
} |
} |
} |
for (i = 0; i < 3 + src_alpha; i++) |
src[i] += srcStride[i] >> 1; |
} |
} |
static int planarRgb16ToRgb16Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
const uint16_t *src102[] = { (uint16_t *)src[1], (uint16_t *)src[0], (uint16_t *)src[2], (uint16_t *)src[3] }; |
const uint16_t *src201[] = { (uint16_t *)src[2], (uint16_t *)src[0], (uint16_t *)src[1], (uint16_t *)src[3] }; |
int stride102[] = { srcStride[1], srcStride[0], srcStride[2], srcStride[3] }; |
int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] }; |
const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat); |
const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat); |
int bits_per_sample = src_format->comp[0].depth_minus1 + 1; |
int swap = 0; |
if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) || |
!HAVE_BIGENDIAN && src_format->flags & AV_PIX_FMT_FLAG_BE) |
swap++; |
if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) || |
!HAVE_BIGENDIAN && dst_format->flags & AV_PIX_FMT_FLAG_BE) |
swap += 2; |
if ((src_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) != |
(AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) || |
bits_per_sample <= 8) { |
av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", |
src_format->name, dst_format->name); |
return srcSliceH; |
} |
switch (c->dstFormat) { |
case AV_PIX_FMT_BGR48LE: |
case AV_PIX_FMT_BGR48BE: |
gbr16ptopacked16(src102, stride102, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, 0, swap, bits_per_sample, c->srcW); |
break; |
case AV_PIX_FMT_RGB48LE: |
case AV_PIX_FMT_RGB48BE: |
gbr16ptopacked16(src201, stride201, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, 0, swap, bits_per_sample, c->srcW); |
break; |
case AV_PIX_FMT_RGBA64LE: |
case AV_PIX_FMT_RGBA64BE: |
gbr16ptopacked16(src201, stride201, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, 1, swap, bits_per_sample, c->srcW); |
break; |
case AV_PIX_FMT_BGRA64LE: |
case AV_PIX_FMT_BGRA64BE: |
gbr16ptopacked16(src102, stride102, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, 1, swap, bits_per_sample, c->srcW); |
break; |
default: |
av_log(c, AV_LOG_ERROR, |
"unsupported planar RGB conversion %s -> %s\n", |
src_format->name, dst_format->name); |
} |
return srcSliceH; |
} |
static void gbr24ptopacked24(const uint8_t *src[], int srcStride[], |
uint8_t *dst, int dstStride, int srcSliceH, |
int width) |
{ |
int x, h, i; |
for (h = 0; h < srcSliceH; h++) { |
uint8_t *dest = dst + dstStride * h; |
for (x = 0; x < width; x++) { |
*dest++ = src[0][x]; |
*dest++ = src[1][x]; |
*dest++ = src[2][x]; |
} |
for (i = 0; i < 3; i++) |
src[i] += srcStride[i]; |
} |
} |
static void gbr24ptopacked32(const uint8_t *src[], int srcStride[], |
uint8_t *dst, int dstStride, int srcSliceH, |
int alpha_first, int width) |
{ |
int x, h, i; |
for (h = 0; h < srcSliceH; h++) { |
uint8_t *dest = dst + dstStride * h; |
if (alpha_first) { |
for (x = 0; x < width; x++) { |
*dest++ = 0xff; |
*dest++ = src[0][x]; |
*dest++ = src[1][x]; |
*dest++ = src[2][x]; |
} |
} else { |
for (x = 0; x < width; x++) { |
*dest++ = src[0][x]; |
*dest++ = src[1][x]; |
*dest++ = src[2][x]; |
*dest++ = 0xff; |
} |
} |
for (i = 0; i < 3; i++) |
src[i] += srcStride[i]; |
} |
} |
static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int alpha_first = 0; |
const uint8_t *src102[] = { src[1], src[0], src[2] }; |
const uint8_t *src201[] = { src[2], src[0], src[1] }; |
int stride102[] = { srcStride[1], srcStride[0], srcStride[2] }; |
int stride201[] = { srcStride[2], srcStride[0], srcStride[1] }; |
if (c->srcFormat != AV_PIX_FMT_GBRP) { |
av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", |
av_get_pix_fmt_name(c->srcFormat), |
av_get_pix_fmt_name(c->dstFormat)); |
return srcSliceH; |
} |
switch (c->dstFormat) { |
case AV_PIX_FMT_BGR24: |
gbr24ptopacked24(src102, stride102, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, c->srcW); |
break; |
case AV_PIX_FMT_RGB24: |
gbr24ptopacked24(src201, stride201, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, c->srcW); |
break; |
case AV_PIX_FMT_ARGB: |
alpha_first = 1; |
case AV_PIX_FMT_RGBA: |
gbr24ptopacked32(src201, stride201, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, alpha_first, c->srcW); |
break; |
case AV_PIX_FMT_ABGR: |
alpha_first = 1; |
case AV_PIX_FMT_BGRA: |
gbr24ptopacked32(src102, stride102, |
dst[0] + srcSliceY * dstStride[0], dstStride[0], |
srcSliceH, alpha_first, c->srcW); |
break; |
default: |
av_log(c, AV_LOG_ERROR, |
"unsupported planar RGB conversion %s -> %s\n", |
av_get_pix_fmt_name(c->srcFormat), |
av_get_pix_fmt_name(c->dstFormat)); |
} |
return srcSliceH; |
} |
static int planarRgbToplanarRgbWrapper(SwsContext *c, |
const uint8_t *src[], int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, |
dst[0], dstStride[0]); |
copyPlane(src[1], srcStride[1], srcSliceY, srcSliceH, c->srcW, |
dst[1], dstStride[1]); |
copyPlane(src[2], srcStride[2], srcSliceY, srcSliceH, c->srcW, |
dst[2], dstStride[2]); |
if (dst[3]) |
fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); |
return srcSliceH; |
} |
static void packedtogbr24p(const uint8_t *src, int srcStride, |
uint8_t *dst[], int dstStride[], int srcSliceH, |
int alpha_first, int inc_size, int width) |
{ |
uint8_t *dest[3]; |
int x, h; |
dest[0] = dst[0]; |
dest[1] = dst[1]; |
dest[2] = dst[2]; |
if (alpha_first) |
src++; |
for (h = 0; h < srcSliceH; h++) { |
for (x = 0; x < width; x++) { |
dest[0][x] = src[0]; |
dest[1][x] = src[1]; |
dest[2][x] = src[2]; |
src += inc_size; |
} |
src += srcStride - width * inc_size; |
dest[0] += dstStride[0]; |
dest[1] += dstStride[1]; |
dest[2] += dstStride[2]; |
} |
} |
static int rgbToPlanarRgbWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int alpha_first = 0; |
int stride102[] = { dstStride[1], dstStride[0], dstStride[2] }; |
int stride201[] = { dstStride[2], dstStride[0], dstStride[1] }; |
uint8_t *dst102[] = { dst[1] + srcSliceY * dstStride[1], |
dst[0] + srcSliceY * dstStride[0], |
dst[2] + srcSliceY * dstStride[2] }; |
uint8_t *dst201[] = { dst[2] + srcSliceY * dstStride[2], |
dst[0] + srcSliceY * dstStride[0], |
dst[1] + srcSliceY * dstStride[1] }; |
switch (c->srcFormat) { |
case AV_PIX_FMT_RGB24: |
packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst201, |
stride201, srcSliceH, alpha_first, 3, c->srcW); |
break; |
case AV_PIX_FMT_BGR24: |
packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst102, |
stride102, srcSliceH, alpha_first, 3, c->srcW); |
break; |
case AV_PIX_FMT_ARGB: |
alpha_first = 1; |
case AV_PIX_FMT_RGBA: |
packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst201, |
stride201, srcSliceH, alpha_first, 4, c->srcW); |
break; |
case AV_PIX_FMT_ABGR: |
alpha_first = 1; |
case AV_PIX_FMT_BGRA: |
packedtogbr24p((const uint8_t *) src[0], srcStride[0], dst102, |
stride102, srcSliceH, alpha_first, 4, c->srcW); |
break; |
default: |
av_log(c, AV_LOG_ERROR, |
"unsupported planar RGB conversion %s -> %s\n", |
av_get_pix_fmt_name(c->srcFormat), |
av_get_pix_fmt_name(c->dstFormat)); |
} |
return srcSliceH; |
} |
#define BAYER_GBRG |
#define BAYER_8 |
#define BAYER_RENAME(x) bayer_gbrg8_to_##x |
#include "bayer_template.c" |
#define BAYER_GBRG |
#define BAYER_16LE |
#define BAYER_RENAME(x) bayer_gbrg16le_to_##x |
#include "bayer_template.c" |
#define BAYER_GBRG |
#define BAYER_16BE |
#define BAYER_RENAME(x) bayer_gbrg16be_to_##x |
#include "bayer_template.c" |
#define BAYER_GRBG |
#define BAYER_8 |
#define BAYER_RENAME(x) bayer_grbg8_to_##x |
#include "bayer_template.c" |
#define BAYER_GRBG |
#define BAYER_16LE |
#define BAYER_RENAME(x) bayer_grbg16le_to_##x |
#include "bayer_template.c" |
#define BAYER_GRBG |
#define BAYER_16BE |
#define BAYER_RENAME(x) bayer_grbg16be_to_##x |
#include "bayer_template.c" |
#define BAYER_BGGR |
#define BAYER_8 |
#define BAYER_RENAME(x) bayer_bggr8_to_##x |
#include "bayer_template.c" |
#define BAYER_BGGR |
#define BAYER_16LE |
#define BAYER_RENAME(x) bayer_bggr16le_to_##x |
#include "bayer_template.c" |
#define BAYER_BGGR |
#define BAYER_16BE |
#define BAYER_RENAME(x) bayer_bggr16be_to_##x |
#include "bayer_template.c" |
#define BAYER_RGGB |
#define BAYER_8 |
#define BAYER_RENAME(x) bayer_rggb8_to_##x |
#include "bayer_template.c" |
#define BAYER_RGGB |
#define BAYER_16LE |
#define BAYER_RENAME(x) bayer_rggb16le_to_##x |
#include "bayer_template.c" |
#define BAYER_RGGB |
#define BAYER_16BE |
#define BAYER_RENAME(x) bayer_rggb16be_to_##x |
#include "bayer_template.c" |
static int bayer_to_rgb24_wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t* dst[], int dstStride[]) |
{ |
uint8_t *dstPtr= dst[0]; |
const uint8_t *srcPtr= src[0]; |
int i; |
void (*copy) (const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width); |
void (*interpolate)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width); |
switch(c->srcFormat) { |
#define CASE(pixfmt, prefix) \ |
case pixfmt: copy = bayer_##prefix##_to_rgb24_copy; \ |
interpolate = bayer_##prefix##_to_rgb24_interpolate; \ |
break; |
CASE(AV_PIX_FMT_BAYER_BGGR8, bggr8) |
CASE(AV_PIX_FMT_BAYER_BGGR16LE, bggr16le) |
CASE(AV_PIX_FMT_BAYER_BGGR16BE, bggr16be) |
CASE(AV_PIX_FMT_BAYER_RGGB8, rggb8) |
CASE(AV_PIX_FMT_BAYER_RGGB16LE, rggb16le) |
CASE(AV_PIX_FMT_BAYER_RGGB16BE, rggb16be) |
CASE(AV_PIX_FMT_BAYER_GBRG8, gbrg8) |
CASE(AV_PIX_FMT_BAYER_GBRG16LE, gbrg16le) |
CASE(AV_PIX_FMT_BAYER_GBRG16BE, gbrg16be) |
CASE(AV_PIX_FMT_BAYER_GRBG8, grbg8) |
CASE(AV_PIX_FMT_BAYER_GRBG16LE, grbg16le) |
CASE(AV_PIX_FMT_BAYER_GRBG16BE, grbg16be) |
#undef CASE |
default: return 0; |
} |
copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW); |
srcPtr += 2 * srcStride[0]; |
dstPtr += 2 * dstStride[0]; |
for (i = 2; i < srcSliceH - 2; i += 2) { |
interpolate(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW); |
srcPtr += 2 * srcStride[0]; |
dstPtr += 2 * dstStride[0]; |
} |
copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW); |
return srcSliceH; |
} |
static int bayer_to_yv12_wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, |
int srcSliceH, uint8_t* dst[], int dstStride[]) |
{ |
const uint8_t *srcPtr= src[0]; |
uint8_t *dstY= dst[0]; |
uint8_t *dstU= dst[1]; |
uint8_t *dstV= dst[2]; |
int i; |
void (*copy) (const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv); |
void (*interpolate)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv); |
switch(c->srcFormat) { |
#define CASE(pixfmt, prefix) \ |
case pixfmt: copy = bayer_##prefix##_to_yv12_copy; \ |
interpolate = bayer_##prefix##_to_yv12_interpolate; \ |
break; |
CASE(AV_PIX_FMT_BAYER_BGGR8, bggr8) |
CASE(AV_PIX_FMT_BAYER_BGGR16LE, bggr16le) |
CASE(AV_PIX_FMT_BAYER_BGGR16BE, bggr16be) |
CASE(AV_PIX_FMT_BAYER_RGGB8, rggb8) |
CASE(AV_PIX_FMT_BAYER_RGGB16LE, rggb16le) |
CASE(AV_PIX_FMT_BAYER_RGGB16BE, rggb16be) |
CASE(AV_PIX_FMT_BAYER_GBRG8, gbrg8) |
CASE(AV_PIX_FMT_BAYER_GBRG16LE, gbrg16le) |
CASE(AV_PIX_FMT_BAYER_GBRG16BE, gbrg16be) |
CASE(AV_PIX_FMT_BAYER_GRBG8, grbg8) |
CASE(AV_PIX_FMT_BAYER_GRBG16LE, grbg16le) |
CASE(AV_PIX_FMT_BAYER_GRBG16BE, grbg16be) |
#undef CASE |
default: return 0; |
} |
copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table); |
srcPtr += 2 * srcStride[0]; |
dstY += 2 * dstStride[0]; |
dstU += dstStride[1]; |
dstV += dstStride[1]; |
for (i = 2; i < srcSliceH - 2; i += 2) { |
interpolate(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table); |
srcPtr += 2 * srcStride[0]; |
dstY += 2 * dstStride[0]; |
dstU += dstStride[1]; |
dstV += dstStride[1]; |
} |
copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table); |
return srcSliceH; |
} |
#define isRGBA32(x) ( \ |
(x) == AV_PIX_FMT_ARGB \ |
|| (x) == AV_PIX_FMT_RGBA \ |
|| (x) == AV_PIX_FMT_BGRA \ |
|| (x) == AV_PIX_FMT_ABGR \ |
) |
#define isRGBA64(x) ( \ |
(x) == AV_PIX_FMT_RGBA64LE \ |
|| (x) == AV_PIX_FMT_RGBA64BE \ |
|| (x) == AV_PIX_FMT_BGRA64LE \ |
|| (x) == AV_PIX_FMT_BGRA64BE \ |
) |
#define isRGB48(x) ( \ |
(x) == AV_PIX_FMT_RGB48LE \ |
|| (x) == AV_PIX_FMT_RGB48BE \ |
|| (x) == AV_PIX_FMT_BGR48LE \ |
|| (x) == AV_PIX_FMT_BGR48BE \ |
) |
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ |
typedef void (* rgbConvFn) (const uint8_t *, uint8_t *, int); |
static rgbConvFn findRgbConvFn(SwsContext *c) |
{ |
const enum AVPixelFormat srcFormat = c->srcFormat; |
const enum AVPixelFormat dstFormat = c->dstFormat; |
const int srcId = c->srcFormatBpp; |
const int dstId = c->dstFormatBpp; |
rgbConvFn conv = NULL; |
#define IS_NOT_NE(bpp, desc) \ |
(((bpp + 7) >> 3) == 2 && \ |
(!(desc->flags & AV_PIX_FMT_FLAG_BE) != !HAVE_BIGENDIAN)) |
#define CONV_IS(src, dst) (srcFormat == AV_PIX_FMT_##src && dstFormat == AV_PIX_FMT_##dst) |
if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) { |
if ( CONV_IS(ABGR, RGBA) |
|| CONV_IS(ARGB, BGRA) |
|| CONV_IS(BGRA, ARGB) |
|| CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210; |
else if (CONV_IS(ABGR, ARGB) |
|| CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321; |
else if (CONV_IS(ABGR, BGRA) |
|| CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230; |
else if (CONV_IS(BGRA, RGBA) |
|| CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103; |
else if (CONV_IS(BGRA, ABGR) |
|| CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012; |
} else if (isRGB48(srcFormat) && isRGB48(dstFormat)) { |
if (CONV_IS(RGB48LE, BGR48LE) |
|| CONV_IS(BGR48LE, RGB48LE) |
|| CONV_IS(RGB48BE, BGR48BE) |
|| CONV_IS(BGR48BE, RGB48BE)) conv = rgb48tobgr48_nobswap; |
else if (CONV_IS(RGB48LE, BGR48BE) |
|| CONV_IS(BGR48LE, RGB48BE) |
|| CONV_IS(RGB48BE, BGR48LE) |
|| CONV_IS(BGR48BE, RGB48LE)) conv = rgb48tobgr48_bswap; |
} else if (isRGBA64(srcFormat) && isRGB48(dstFormat)) { |
if (CONV_IS(RGBA64LE, BGR48LE) |
|| CONV_IS(BGRA64LE, RGB48LE) |
|| CONV_IS(RGBA64BE, BGR48BE) |
|| CONV_IS(BGRA64BE, RGB48BE)) conv = rgb64tobgr48_nobswap; |
else if (CONV_IS(RGBA64LE, BGR48BE) |
|| CONV_IS(BGRA64LE, RGB48BE) |
|| CONV_IS(RGBA64BE, BGR48LE) |
|| CONV_IS(BGRA64BE, RGB48LE)) conv = rgb64tobgr48_bswap; |
else if (CONV_IS(RGBA64LE, RGB48LE) |
|| CONV_IS(BGRA64LE, BGR48LE) |
|| CONV_IS(RGBA64BE, RGB48BE) |
|| CONV_IS(BGRA64BE, BGR48BE)) conv = rgb64to48_nobswap; |
else if (CONV_IS(RGBA64LE, RGB48BE) |
|| CONV_IS(BGRA64LE, BGR48BE) |
|| CONV_IS(RGBA64BE, RGB48LE) |
|| CONV_IS(BGRA64BE, BGR48LE)) conv = rgb64to48_bswap; |
} else |
/* BGR -> BGR */ |
if ((isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) || |
(isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) { |
switch (srcId | (dstId << 16)) { |
case 0x000F000C: conv = rgb12to15; break; |
case 0x000F0010: conv = rgb16to15; break; |
case 0x000F0018: conv = rgb24to15; break; |
case 0x000F0020: conv = rgb32to15; break; |
case 0x0010000F: conv = rgb15to16; break; |
case 0x00100018: conv = rgb24to16; break; |
case 0x00100020: conv = rgb32to16; break; |
case 0x0018000F: conv = rgb15to24; break; |
case 0x00180010: conv = rgb16to24; break; |
case 0x00180020: conv = rgb32to24; break; |
case 0x0020000F: conv = rgb15to32; break; |
case 0x00200010: conv = rgb16to32; break; |
case 0x00200018: conv = rgb24to32; break; |
} |
} else if ((isBGRinInt(srcFormat) && isRGBinInt(dstFormat)) || |
(isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) { |
switch (srcId | (dstId << 16)) { |
case 0x000C000C: conv = rgb12tobgr12; break; |
case 0x000F000F: conv = rgb15tobgr15; break; |
case 0x000F0010: conv = rgb16tobgr15; break; |
case 0x000F0018: conv = rgb24tobgr15; break; |
case 0x000F0020: conv = rgb32tobgr15; break; |
case 0x0010000F: conv = rgb15tobgr16; break; |
case 0x00100010: conv = rgb16tobgr16; break; |
case 0x00100018: conv = rgb24tobgr16; break; |
case 0x00100020: conv = rgb32tobgr16; break; |
case 0x0018000F: conv = rgb15tobgr24; break; |
case 0x00180010: conv = rgb16tobgr24; break; |
case 0x00180018: conv = rgb24tobgr24; break; |
case 0x00180020: conv = rgb32tobgr24; break; |
case 0x0020000F: conv = rgb15tobgr32; break; |
case 0x00200010: conv = rgb16tobgr32; break; |
case 0x00200018: conv = rgb24tobgr32; break; |
} |
} |
if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && !isRGBA32(srcFormat) && ALT32_CORR<0) |
return NULL; |
// Maintain symmetry between endianness |
if (c->flags & SWS_BITEXACT) |
if ((dstFormat == AV_PIX_FMT_RGB32 || dstFormat == AV_PIX_FMT_BGR32 ) && !isRGBA32(srcFormat) && ALT32_CORR>0) |
return NULL; |
return conv; |
} |
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ |
static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], |
int srcSliceY, int srcSliceH, uint8_t *dst[], |
int dstStride[]) |
{ |
const enum AVPixelFormat srcFormat = c->srcFormat; |
const enum AVPixelFormat dstFormat = c->dstFormat; |
const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat); |
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); |
const int srcBpp = (c->srcFormatBpp + 7) >> 3; |
const int dstBpp = (c->dstFormatBpp + 7) >> 3; |
rgbConvFn conv = findRgbConvFn(c); |
if (!conv) { |
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", |
av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); |
} else { |
const uint8_t *srcPtr = src[0]; |
uint8_t *dstPtr = dst[0]; |
int src_bswap = IS_NOT_NE(c->srcFormatBpp, desc_src); |
int dst_bswap = IS_NOT_NE(c->dstFormatBpp, desc_dst); |
if ((srcFormat == AV_PIX_FMT_RGB32_1 || srcFormat == AV_PIX_FMT_BGR32_1) && |
!isRGBA32(dstFormat)) |
srcPtr += ALT32_CORR; |
if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && |
!isRGBA32(srcFormat)) { |
int i; |
av_assert0(ALT32_CORR == 1); |
for (i = 0; i < srcSliceH; i++) |
dstPtr[dstStride[0] * (srcSliceY + i)] = 255; |
dstPtr += ALT32_CORR; |
} |
if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 && |
!(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap) |
conv(srcPtr, dstPtr + dstStride[0] * srcSliceY, |
(srcSliceH - 1) * srcStride[0] + c->srcW * srcBpp); |
else { |
int i, j; |
dstPtr += dstStride[0] * srcSliceY; |
for (i = 0; i < srcSliceH; i++) { |
if(src_bswap) { |
for(j=0; j<c->srcW; j++) |
((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]); |
conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp); |
}else |
conv(srcPtr, dstPtr, c->srcW * srcBpp); |
if(dst_bswap) |
for(j=0; j<c->srcW; j++) |
((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]); |
srcPtr += srcStride[0]; |
dstPtr += dstStride[0]; |
} |
} |
} |
return srcSliceH; |
} |
static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
ff_rgb24toyv12( |
src[0], |
dst[0] + srcSliceY * dstStride[0], |
dst[1] + (srcSliceY >> 1) * dstStride[1], |
dst[2] + (srcSliceY >> 1) * dstStride[2], |
c->srcW, srcSliceH, |
dstStride[0], dstStride[1], srcStride[0], |
c->input_rgb2yuv_table); |
if (dst[3]) |
fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); |
return srcSliceH; |
} |
static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, |
dst[0], dstStride[0]); |
planar2x(src[1], dst[1] + dstStride[1] * (srcSliceY >> 1), c->chrSrcW, |
srcSliceH >> 2, srcStride[1], dstStride[1]); |
planar2x(src[2], dst[2] + dstStride[2] * (srcSliceY >> 1), c->chrSrcW, |
srcSliceH >> 2, srcStride[2], dstStride[2]); |
if (dst[3]) |
fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); |
return srcSliceH; |
} |
/* unscaled copy like stuff (assumes nearly identical formats) */ |
static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
if (dstStride[0] == srcStride[0] && srcStride[0] > 0) |
memcpy(dst[0] + dstStride[0] * srcSliceY, src[0], srcSliceH * dstStride[0]); |
else { |
int i; |
const uint8_t *srcPtr = src[0]; |
uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY; |
int length = 0; |
/* universal length finder */ |
while (length + c->srcW <= FFABS(dstStride[0]) && |
length + c->srcW <= FFABS(srcStride[0])) |
length += c->srcW; |
av_assert1(length != 0); |
for (i = 0; i < srcSliceH; i++) { |
memcpy(dstPtr, srcPtr, length); |
srcPtr += srcStride[0]; |
dstPtr += dstStride[0]; |
} |
} |
return srcSliceH; |
} |
#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ |
uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ |
int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ |
for (i = 0; i < height; i++) {\ |
const uint8_t *dither= dithers[src_depth-9][i&7];\ |
for (j = 0; j < length-7; j+=8){\ |
dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ |
dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ |
dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ |
dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ |
dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ |
dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ |
dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ |
dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ |
}\ |
for (; j < length; j++)\ |
dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ |
dst += dstStride;\ |
src += srcStride;\ |
} |
static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], |
int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat); |
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); |
int plane, i, j; |
for (plane = 0; plane < 4; plane++) { |
int length = (plane == 0 || plane == 3) ? c->srcW : FF_CEIL_RSHIFT(c->srcW, c->chrDstHSubSample); |
int y = (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample); |
int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample); |
const uint8_t *srcPtr = src[plane]; |
uint8_t *dstPtr = dst[plane] + dstStride[plane] * y; |
int shiftonly = plane == 1 || plane == 2 || (!c->srcRange && plane == 0); |
if (!dst[plane]) |
continue; |
// ignore palette for GRAY8 |
if (plane == 1 && !dst[2]) continue; |
if (!src[plane] || (plane == 1 && !src[2])) { |
if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) { |
fillPlane16(dst[plane], dstStride[plane], length, height, y, |
plane == 3, desc_dst->comp[plane].depth_minus1, |
isBE(c->dstFormat)); |
} else { |
fillPlane(dst[plane], dstStride[plane], length, height, y, |
(plane == 3) ? 255 : 128); |
} |
} else { |
if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) |
|| (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) |
) { |
const int src_depth = desc_src->comp[plane].depth_minus1 + 1; |
const int dst_depth = desc_dst->comp[plane].depth_minus1 + 1; |
const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; |
uint16_t *dstPtr2 = (uint16_t*)dstPtr; |
if (dst_depth == 8) { |
if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ |
DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) |
} else { |
DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) |
} |
} else if (src_depth == 8) { |
for (i = 0; i < height; i++) { |
#define COPY816(w)\ |
if (shiftonly) {\ |
for (j = 0; j < length; j++)\ |
w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\ |
} else {\ |
for (j = 0; j < length; j++)\ |
w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\ |
(srcPtr[j]>>(2*8-dst_depth)));\ |
} |
if(isBE(c->dstFormat)){ |
COPY816(AV_WB16) |
} else { |
COPY816(AV_WL16) |
} |
dstPtr2 += dstStride[plane]/2; |
srcPtr += srcStride[plane]; |
} |
} else if (src_depth <= dst_depth) { |
for (i = 0; i < height; i++) { |
j = 0; |
if(isBE(c->srcFormat) == HAVE_BIGENDIAN && |
isBE(c->dstFormat) == HAVE_BIGENDIAN && |
shiftonly) { |
unsigned shift = dst_depth - src_depth; |
#if HAVE_FAST_64BIT |
#define FAST_COPY_UP(shift) \ |
for (; j < length - 3; j += 4) { \ |
uint64_t v = AV_RN64A(srcPtr2 + j); \ |
AV_WN64A(dstPtr2 + j, v << shift); \ |
} |
#else |
#define FAST_COPY_UP(shift) \ |
for (; j < length - 1; j += 2) { \ |
uint32_t v = AV_RN32A(srcPtr2 + j); \ |
AV_WN32A(dstPtr2 + j, v << shift); \ |
} |
#endif |
switch (shift) |
{ |
case 6: FAST_COPY_UP(6); break; |
case 7: FAST_COPY_UP(7); break; |
} |
} |
#define COPY_UP(r,w) \ |
if(shiftonly){\ |
for (; j < length; j++){ \ |
unsigned int v= r(&srcPtr2[j]);\ |
w(&dstPtr2[j], v<<(dst_depth-src_depth));\ |
}\ |
}else{\ |
for (; j < length; j++){ \ |
unsigned int v= r(&srcPtr2[j]);\ |
w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ |
(v>>(2*src_depth-dst_depth)));\ |
}\ |
} |
if(isBE(c->srcFormat)){ |
if(isBE(c->dstFormat)){ |
COPY_UP(AV_RB16, AV_WB16) |
} else { |
COPY_UP(AV_RB16, AV_WL16) |
} |
} else { |
if(isBE(c->dstFormat)){ |
COPY_UP(AV_RL16, AV_WB16) |
} else { |
COPY_UP(AV_RL16, AV_WL16) |
} |
} |
dstPtr2 += dstStride[plane]/2; |
srcPtr2 += srcStride[plane]/2; |
} |
} else { |
if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ |
if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ |
DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) |
} else { |
DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) |
} |
}else{ |
if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ |
DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) |
} else { |
DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) |
} |
} |
} |
} else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && |
isBE(c->srcFormat) != isBE(c->dstFormat)) { |
for (i = 0; i < height; i++) { |
for (j = 0; j < length; j++) |
((uint16_t *) dstPtr)[j] = av_bswap16(((const uint16_t *) srcPtr)[j]); |
srcPtr += srcStride[plane]; |
dstPtr += dstStride[plane]; |
} |
} else if (dstStride[plane] == srcStride[plane] && |
srcStride[plane] > 0 && srcStride[plane] == length) { |
memcpy(dst[plane] + dstStride[plane] * y, src[plane], |
height * dstStride[plane]); |
} else { |
if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) |
length *= 2; |
else if (!desc_src->comp[0].depth_minus1) |
length >>= 3; // monowhite/black |
for (i = 0; i < height; i++) { |
memcpy(dstPtr, srcPtr, length); |
srcPtr += srcStride[plane]; |
dstPtr += dstStride[plane]; |
} |
} |
} |
} |
return srcSliceH; |
} |
#define IS_DIFFERENT_ENDIANESS(src_fmt, dst_fmt, pix_fmt) \ |
((src_fmt == pix_fmt ## BE && dst_fmt == pix_fmt ## LE) || \ |
(src_fmt == pix_fmt ## LE && dst_fmt == pix_fmt ## BE)) |
void ff_get_unscaled_swscale(SwsContext *c) |
{ |
const enum AVPixelFormat srcFormat = c->srcFormat; |
const enum AVPixelFormat dstFormat = c->dstFormat; |
const int flags = c->flags; |
const int dstH = c->dstH; |
int needsDither; |
needsDither = isAnyRGB(dstFormat) && |
c->dstFormatBpp < 24 && |
(c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat))); |
/* yv12_to_nv12 */ |
if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) && |
(dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)) { |
c->swscale = planarToNv12Wrapper; |
} |
/* nv12_to_yv12 */ |
if (dstFormat == AV_PIX_FMT_YUV420P && |
(srcFormat == AV_PIX_FMT_NV12 || srcFormat == AV_PIX_FMT_NV21)) { |
c->swscale = nv12ToPlanarWrapper; |
} |
/* yuv2bgr */ |
if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P || |
srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) && |
!(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) { |
c->swscale = ff_yuv2rgb_get_func_ptr(c); |
} |
if (srcFormat == AV_PIX_FMT_YUV410P && !(dstH & 3) && |
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && |
!(flags & SWS_BITEXACT)) { |
c->swscale = yvu9ToYv12Wrapper; |
} |
/* bgr24toYV12 */ |
if (srcFormat == AV_PIX_FMT_BGR24 && |
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && |
!(flags & SWS_ACCURATE_RND)) |
c->swscale = bgr24ToYv12Wrapper; |
/* RGB/BGR -> RGB/BGR (no dither needed forms) */ |
if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) |
&& (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) |
c->swscale = rgbToRgbWrapper; |
/* RGB to planar RGB */ |
if ((srcFormat == AV_PIX_FMT_GBRP && dstFormat == AV_PIX_FMT_GBRAP) || |
(srcFormat == AV_PIX_FMT_GBRAP && dstFormat == AV_PIX_FMT_GBRP)) |
c->swscale = planarRgbToplanarRgbWrapper; |
#define isByteRGB(f) ( \ |
f == AV_PIX_FMT_RGB32 || \ |
f == AV_PIX_FMT_RGB32_1 || \ |
f == AV_PIX_FMT_RGB24 || \ |
f == AV_PIX_FMT_BGR32 || \ |
f == AV_PIX_FMT_BGR32_1 || \ |
f == AV_PIX_FMT_BGR24) |
if (srcFormat == AV_PIX_FMT_GBRP && isPlanar(srcFormat) && isByteRGB(dstFormat)) |
c->swscale = planarRgbToRgbWrapper; |
if ((srcFormat == AV_PIX_FMT_RGB48LE || srcFormat == AV_PIX_FMT_RGB48BE || |
srcFormat == AV_PIX_FMT_BGR48LE || srcFormat == AV_PIX_FMT_BGR48BE || |
srcFormat == AV_PIX_FMT_RGBA64LE || srcFormat == AV_PIX_FMT_RGBA64BE || |
srcFormat == AV_PIX_FMT_BGRA64LE || srcFormat == AV_PIX_FMT_BGRA64BE) && |
(dstFormat == AV_PIX_FMT_GBRP9LE || dstFormat == AV_PIX_FMT_GBRP9BE || |
dstFormat == AV_PIX_FMT_GBRP10LE || dstFormat == AV_PIX_FMT_GBRP10BE || |
dstFormat == AV_PIX_FMT_GBRP12LE || dstFormat == AV_PIX_FMT_GBRP12BE || |
dstFormat == AV_PIX_FMT_GBRP14LE || dstFormat == AV_PIX_FMT_GBRP14BE || |
dstFormat == AV_PIX_FMT_GBRP16LE || dstFormat == AV_PIX_FMT_GBRP16BE || |
dstFormat == AV_PIX_FMT_GBRAP16LE || dstFormat == AV_PIX_FMT_GBRAP16BE )) |
c->swscale = Rgb16ToPlanarRgb16Wrapper; |
if ((srcFormat == AV_PIX_FMT_GBRP9LE || srcFormat == AV_PIX_FMT_GBRP9BE || |
srcFormat == AV_PIX_FMT_GBRP16LE || srcFormat == AV_PIX_FMT_GBRP16BE || |
srcFormat == AV_PIX_FMT_GBRP10LE || srcFormat == AV_PIX_FMT_GBRP10BE || |
srcFormat == AV_PIX_FMT_GBRP12LE || srcFormat == AV_PIX_FMT_GBRP12BE || |
srcFormat == AV_PIX_FMT_GBRP14LE || srcFormat == AV_PIX_FMT_GBRP14BE || |
srcFormat == AV_PIX_FMT_GBRAP16LE || srcFormat == AV_PIX_FMT_GBRAP16BE) && |
(dstFormat == AV_PIX_FMT_RGB48LE || dstFormat == AV_PIX_FMT_RGB48BE || |
dstFormat == AV_PIX_FMT_BGR48LE || dstFormat == AV_PIX_FMT_BGR48BE || |
dstFormat == AV_PIX_FMT_RGBA64LE || dstFormat == AV_PIX_FMT_RGBA64BE || |
dstFormat == AV_PIX_FMT_BGRA64LE || dstFormat == AV_PIX_FMT_BGRA64BE)) |
c->swscale = planarRgb16ToRgb16Wrapper; |
if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth_minus1 == 7 && |
isPackedRGB(srcFormat) && dstFormat == AV_PIX_FMT_GBRP) |
c->swscale = rgbToPlanarRgbWrapper; |
if (isBayer(srcFormat)) { |
if (dstFormat == AV_PIX_FMT_RGB24) |
c->swscale = bayer_to_rgb24_wrapper; |
else if (dstFormat == AV_PIX_FMT_YUV420P) |
c->swscale = bayer_to_yv12_wrapper; |
else if (!isBayer(dstFormat)) { |
av_log(c, AV_LOG_ERROR, "unsupported bayer conversion\n"); |
av_assert0(0); |
} |
} |
/* bswap 16 bits per pixel/component packed formats */ |
if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_BGGR16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_RGGB16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_GBRG16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_GRBG16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR48) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YA16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_AYUV64) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP9) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP10) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP12) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP14) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_XYZ12) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P9) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P10) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P12) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P14) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P9) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P10) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV440P10) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV440P12) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P14) || |
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16)) |
c->swscale = packed_16bpc_bswap; |
if (usePal(srcFormat) && isByteRGB(dstFormat)) |
c->swscale = palToRgbWrapper; |
if (srcFormat == AV_PIX_FMT_YUV422P) { |
if (dstFormat == AV_PIX_FMT_YUYV422) |
c->swscale = yuv422pToYuy2Wrapper; |
else if (dstFormat == AV_PIX_FMT_UYVY422) |
c->swscale = yuv422pToUyvyWrapper; |
} |
/* LQ converters if -sws 0 or -sws 4*/ |
if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) { |
/* yv12_to_yuy2 */ |
if (srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) { |
if (dstFormat == AV_PIX_FMT_YUYV422) |
c->swscale = planarToYuy2Wrapper; |
else if (dstFormat == AV_PIX_FMT_UYVY422) |
c->swscale = planarToUyvyWrapper; |
} |
} |
if (srcFormat == AV_PIX_FMT_YUYV422 && |
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P)) |
c->swscale = yuyvToYuv420Wrapper; |
if (srcFormat == AV_PIX_FMT_UYVY422 && |
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P)) |
c->swscale = uyvyToYuv420Wrapper; |
if (srcFormat == AV_PIX_FMT_YUYV422 && dstFormat == AV_PIX_FMT_YUV422P) |
c->swscale = yuyvToYuv422Wrapper; |
if (srcFormat == AV_PIX_FMT_UYVY422 && dstFormat == AV_PIX_FMT_YUV422P) |
c->swscale = uyvyToYuv422Wrapper; |
#define isPlanarGray(x) (isGray(x) && (x) != AV_PIX_FMT_YA8 && (x) != AV_PIX_FMT_YA16LE && (x) != AV_PIX_FMT_YA16BE) |
/* simple copy */ |
if ( srcFormat == dstFormat || |
(srcFormat == AV_PIX_FMT_YUVA420P && dstFormat == AV_PIX_FMT_YUV420P) || |
(srcFormat == AV_PIX_FMT_YUV420P && dstFormat == AV_PIX_FMT_YUVA420P) || |
(isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) || |
(isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) || |
(isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) || |
(isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) && |
c->chrDstHSubSample == c->chrSrcHSubSample && |
c->chrDstVSubSample == c->chrSrcVSubSample && |
dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && |
srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21)) |
{ |
if (isPacked(c->srcFormat)) |
c->swscale = packedCopyWrapper; |
else /* Planar YUV or gray */ |
c->swscale = planarCopyWrapper; |
} |
if (ARCH_PPC) |
ff_get_unscaled_swscale_ppc(c); |
// if (ARCH_ARM) |
// ff_get_unscaled_swscale_arm(c); |
} |
/* Convert the palette to the same packed 32-bit format as the palette */ |
void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, |
int num_pixels, const uint8_t *palette) |
{ |
int i; |
for (i = 0; i < num_pixels; i++) |
((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]]; |
} |
/* Palette format: ABCD -> dst format: ABC */ |
void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, |
int num_pixels, const uint8_t *palette) |
{ |
int i; |
for (i = 0; i < num_pixels; i++) { |
//FIXME slow? |
dst[0] = palette[src[i] * 4 + 0]; |
dst[1] = palette[src[i] * 4 + 1]; |
dst[2] = palette[src[i] * 4 + 2]; |
dst += 3; |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/swscaleres.rc |
---|
0,0 → 1,55 |
/* |
* Windows resource file for libswscale |
* |
* Copyright (C) 2012 James Almer |
* Copyright (C) 2013 Tiancheng "Timothy" Gu |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <windows.h> |
#include "libswscale/version.h" |
#include "libavutil/ffversion.h" |
#include "config.h" |
1 VERSIONINFO |
FILEVERSION LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO, 0 |
PRODUCTVERSION LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO, 0 |
FILEFLAGSMASK VS_FFI_FILEFLAGSMASK |
FILEOS VOS_NT_WINDOWS32 |
FILETYPE VFT_DLL |
{ |
BLOCK "StringFileInfo" |
{ |
BLOCK "040904B0" |
{ |
VALUE "CompanyName", "FFmpeg Project" |
VALUE "FileDescription", "FFmpeg image rescaling library" |
VALUE "FileVersion", AV_STRINGIFY(LIBSWSCALE_VERSION) |
VALUE "InternalName", "libswscale" |
VALUE "LegalCopyright", "Copyright (C) 2000-" AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project" |
VALUE "OriginalFilename", "swscale" BUILDSUF "-" AV_STRINGIFY(LIBSWSCALE_VERSION_MAJOR) SLIBSUF |
VALUE "ProductName", "FFmpeg" |
VALUE "ProductVersion", FFMPEG_VERSION |
} |
} |
BLOCK "VarFileInfo" |
{ |
VALUE "Translation", 0x0409, 0x04B0 |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/utils.c |
---|
0,0 → 1,2386 |
/* |
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "config.h" |
#define _SVID_SOURCE // needed for MAP_ANONYMOUS |
#define _DARWIN_C_SOURCE // needed for MAP_ANON |
#include <inttypes.h> |
#include <math.h> |
#include <stdio.h> |
#include <string.h> |
#if HAVE_SYS_MMAN_H |
#include <sys/mman.h> |
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) |
#define MAP_ANONYMOUS MAP_ANON |
#endif |
#endif |
#if HAVE_VIRTUALALLOC |
#define WIN32_LEAN_AND_MEAN |
#include <windows.h> |
#endif |
#include "libavutil/attributes.h" |
#include "libavutil/avassert.h" |
#include "libavutil/avutil.h" |
#include "libavutil/bswap.h" |
#include "libavutil/cpu.h" |
#include "libavutil/imgutils.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/mathematics.h" |
#include "libavutil/opt.h" |
#include "libavutil/pixdesc.h" |
#include "libavutil/ppc/cpu.h" |
#include "libavutil/x86/asm.h" |
#include "libavutil/x86/cpu.h" |
#include "rgb2rgb.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
static void handle_formats(SwsContext *c); |
unsigned swscale_version(void) |
{ |
av_assert0(LIBSWSCALE_VERSION_MICRO >= 100); |
return LIBSWSCALE_VERSION_INT; |
} |
const char *swscale_configuration(void) |
{ |
return FFMPEG_CONFIGURATION; |
} |
const char *swscale_license(void) |
{ |
#define LICENSE_PREFIX "libswscale license: " |
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; |
} |
typedef struct FormatEntry { |
uint8_t is_supported_in :1; |
uint8_t is_supported_out :1; |
uint8_t is_supported_endianness :1; |
} FormatEntry; |
static const FormatEntry format_entries[AV_PIX_FMT_NB] = { |
[AV_PIX_FMT_YUV420P] = { 1, 1 }, |
[AV_PIX_FMT_YUYV422] = { 1, 1 }, |
[AV_PIX_FMT_RGB24] = { 1, 1 }, |
[AV_PIX_FMT_BGR24] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P] = { 1, 1 }, |
[AV_PIX_FMT_YUV410P] = { 1, 1 }, |
[AV_PIX_FMT_YUV411P] = { 1, 1 }, |
[AV_PIX_FMT_GRAY8] = { 1, 1 }, |
[AV_PIX_FMT_MONOWHITE] = { 1, 1 }, |
[AV_PIX_FMT_MONOBLACK] = { 1, 1 }, |
[AV_PIX_FMT_PAL8] = { 1, 0 }, |
[AV_PIX_FMT_YUVJ420P] = { 1, 1 }, |
[AV_PIX_FMT_YUVJ411P] = { 1, 1 }, |
[AV_PIX_FMT_YUVJ422P] = { 1, 1 }, |
[AV_PIX_FMT_YUVJ444P] = { 1, 1 }, |
[AV_PIX_FMT_YVYU422] = { 1, 1 }, |
[AV_PIX_FMT_UYVY422] = { 1, 1 }, |
[AV_PIX_FMT_UYYVYY411] = { 0, 0 }, |
[AV_PIX_FMT_BGR8] = { 1, 1 }, |
[AV_PIX_FMT_BGR4] = { 0, 1 }, |
[AV_PIX_FMT_BGR4_BYTE] = { 1, 1 }, |
[AV_PIX_FMT_RGB8] = { 1, 1 }, |
[AV_PIX_FMT_RGB4] = { 0, 1 }, |
[AV_PIX_FMT_RGB4_BYTE] = { 1, 1 }, |
[AV_PIX_FMT_NV12] = { 1, 1 }, |
[AV_PIX_FMT_NV21] = { 1, 1 }, |
[AV_PIX_FMT_ARGB] = { 1, 1 }, |
[AV_PIX_FMT_RGBA] = { 1, 1 }, |
[AV_PIX_FMT_ABGR] = { 1, 1 }, |
[AV_PIX_FMT_BGRA] = { 1, 1 }, |
[AV_PIX_FMT_0RGB] = { 1, 1 }, |
[AV_PIX_FMT_RGB0] = { 1, 1 }, |
[AV_PIX_FMT_0BGR] = { 1, 1 }, |
[AV_PIX_FMT_BGR0] = { 1, 1 }, |
[AV_PIX_FMT_GRAY16BE] = { 1, 1 }, |
[AV_PIX_FMT_GRAY16LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV440P] = { 1, 1 }, |
[AV_PIX_FMT_YUVJ440P] = { 1, 1 }, |
[AV_PIX_FMT_YUV440P10LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV440P10BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV440P12LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV440P12BE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA420P] = { 1, 1 }, |
[AV_PIX_FMT_YUVA422P] = { 1, 1 }, |
[AV_PIX_FMT_YUVA444P] = { 1, 1 }, |
[AV_PIX_FMT_YUVA420P9BE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA420P9LE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA422P9BE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA422P9LE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA444P9BE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA444P9LE] = { 1, 1 }, |
[AV_PIX_FMT_YUVA420P10BE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA420P10LE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA422P10BE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA422P10LE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA444P10BE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA444P10LE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA420P16BE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA420P16LE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA422P16BE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA422P16LE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA444P16BE]= { 1, 1 }, |
[AV_PIX_FMT_YUVA444P16LE]= { 1, 1 }, |
[AV_PIX_FMT_RGB48BE] = { 1, 1 }, |
[AV_PIX_FMT_RGB48LE] = { 1, 1 }, |
[AV_PIX_FMT_RGBA64BE] = { 1, 1, 1 }, |
[AV_PIX_FMT_RGBA64LE] = { 1, 1, 1 }, |
[AV_PIX_FMT_RGB565BE] = { 1, 1 }, |
[AV_PIX_FMT_RGB565LE] = { 1, 1 }, |
[AV_PIX_FMT_RGB555BE] = { 1, 1 }, |
[AV_PIX_FMT_RGB555LE] = { 1, 1 }, |
[AV_PIX_FMT_BGR565BE] = { 1, 1 }, |
[AV_PIX_FMT_BGR565LE] = { 1, 1 }, |
[AV_PIX_FMT_BGR555BE] = { 1, 1 }, |
[AV_PIX_FMT_BGR555LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P16LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P16BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P16LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P16BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P16LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P16BE] = { 1, 1 }, |
[AV_PIX_FMT_RGB444LE] = { 1, 1 }, |
[AV_PIX_FMT_RGB444BE] = { 1, 1 }, |
[AV_PIX_FMT_BGR444LE] = { 1, 1 }, |
[AV_PIX_FMT_BGR444BE] = { 1, 1 }, |
[AV_PIX_FMT_YA8] = { 1, 1 }, |
[AV_PIX_FMT_YA16BE] = { 1, 0 }, |
[AV_PIX_FMT_YA16LE] = { 1, 0 }, |
[AV_PIX_FMT_BGR48BE] = { 1, 1 }, |
[AV_PIX_FMT_BGR48LE] = { 1, 1 }, |
[AV_PIX_FMT_BGRA64BE] = { 1, 1, 1 }, |
[AV_PIX_FMT_BGRA64LE] = { 1, 1, 1 }, |
[AV_PIX_FMT_YUV420P9BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P9LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P10BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P10LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P12BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P12LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P14BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV420P14LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P9BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P9LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P10BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P10LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P12BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P12LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P14BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV422P14LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P9BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P9LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P10BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P10LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P12BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P12LE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P14BE] = { 1, 1 }, |
[AV_PIX_FMT_YUV444P14LE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP] = { 1, 1 }, |
[AV_PIX_FMT_GBRP9LE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP9BE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP10LE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP10BE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP12LE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP12BE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP14LE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP14BE] = { 1, 1 }, |
[AV_PIX_FMT_GBRP16LE] = { 1, 0 }, |
[AV_PIX_FMT_GBRP16BE] = { 1, 0 }, |
[AV_PIX_FMT_GBRAP] = { 1, 1 }, |
[AV_PIX_FMT_GBRAP16LE] = { 1, 0 }, |
[AV_PIX_FMT_GBRAP16BE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_BGGR8] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_RGGB8] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_GBRG8] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_GRBG8] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_BGGR16LE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_BGGR16BE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_RGGB16LE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_RGGB16BE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_GBRG16LE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_GBRG16BE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_GRBG16LE] = { 1, 0 }, |
[AV_PIX_FMT_BAYER_GRBG16BE] = { 1, 0 }, |
[AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 }, |
[AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 }, |
[AV_PIX_FMT_AYUV64LE] = { 1, 1}, |
}; |
int sws_isSupportedInput(enum AVPixelFormat pix_fmt) |
{ |
return (unsigned)pix_fmt < AV_PIX_FMT_NB ? |
format_entries[pix_fmt].is_supported_in : 0; |
} |
int sws_isSupportedOutput(enum AVPixelFormat pix_fmt) |
{ |
return (unsigned)pix_fmt < AV_PIX_FMT_NB ? |
format_entries[pix_fmt].is_supported_out : 0; |
} |
int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt) |
{ |
return (unsigned)pix_fmt < AV_PIX_FMT_NB ? |
format_entries[pix_fmt].is_supported_endianness : 0; |
} |
static double getSplineCoeff(double a, double b, double c, double d, |
double dist) |
{ |
if (dist <= 1.0) |
return ((d * dist + c) * dist + b) * dist + a; |
else |
return getSplineCoeff(0.0, |
b + 2.0 * c + 3.0 * d, |
c + 3.0 * d, |
-b - 3.0 * c - 6.0 * d, |
dist - 1.0); |
} |
static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir) |
{ |
if (pos == -1 || pos <= -513) { |
pos = (128 << chr_subsample) - 128; |
} |
pos += 128; // relative to ideal left edge |
return pos >> chr_subsample; |
} |
typedef struct { |
int flag; ///< flag associated to the algorithm |
const char *description; ///< human-readable description |
int size_factor; ///< size factor used when initing the filters |
} ScaleAlgorithm; |
static const ScaleAlgorithm scale_algorithms[] = { |
{ SWS_AREA, "area averaging", 1 /* downscale only, for upscale it is bilinear */ }, |
{ SWS_BICUBIC, "bicubic", 4 }, |
{ SWS_BICUBLIN, "luma bicubic / chroma bilinear", -1 }, |
{ SWS_BILINEAR, "bilinear", 2 }, |
{ SWS_FAST_BILINEAR, "fast bilinear", -1 }, |
{ SWS_GAUSS, "Gaussian", 8 /* infinite ;) */ }, |
{ SWS_LANCZOS, "Lanczos", -1 /* custom */ }, |
{ SWS_POINT, "nearest neighbor / point", -1 }, |
{ SWS_SINC, "sinc", 20 /* infinite ;) */ }, |
{ SWS_SPLINE, "bicubic spline", 20 /* infinite :)*/ }, |
{ SWS_X, "experimental", 8 }, |
}; |
static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos, |
int *outFilterSize, int xInc, int srcW, |
int dstW, int filterAlign, int one, |
int flags, int cpu_flags, |
SwsVector *srcFilter, SwsVector *dstFilter, |
double param[2], int srcPos, int dstPos) |
{ |
int i; |
int filterSize; |
int filter2Size; |
int minFilterSize; |
int64_t *filter = NULL; |
int64_t *filter2 = NULL; |
const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8)); |
int ret = -1; |
emms_c(); // FIXME should not be required but IS (even for non-MMX versions) |
// NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end |
FF_ALLOC_ARRAY_OR_GOTO(NULL, *filterPos, (dstW + 3), sizeof(**filterPos), fail); |
if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled |
int i; |
filterSize = 1; |
FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter, |
dstW, sizeof(*filter) * filterSize, fail); |
for (i = 0; i < dstW; i++) { |
filter[i * filterSize] = fone; |
(*filterPos)[i] = i; |
} |
} else if (flags & SWS_POINT) { // lame looking point sampling mode |
int i; |
int64_t xDstInSrc; |
filterSize = 1; |
FF_ALLOC_ARRAY_OR_GOTO(NULL, filter, |
dstW, sizeof(*filter) * filterSize, fail); |
xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); |
for (i = 0; i < dstW; i++) { |
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; |
(*filterPos)[i] = xx; |
filter[i] = fone; |
xDstInSrc += xInc; |
} |
} else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) || |
(flags & SWS_FAST_BILINEAR)) { // bilinear upscale |
int i; |
int64_t xDstInSrc; |
filterSize = 2; |
FF_ALLOC_ARRAY_OR_GOTO(NULL, filter, |
dstW, sizeof(*filter) * filterSize, fail); |
xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7); |
for (i = 0; i < dstW; i++) { |
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16; |
int j; |
(*filterPos)[i] = xx; |
// bilinear upscale / linear interpolate / area averaging |
for (j = 0; j < filterSize; j++) { |
int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16); |
if (coeff < 0) |
coeff = 0; |
filter[i * filterSize + j] = coeff; |
xx++; |
} |
xDstInSrc += xInc; |
} |
} else { |
int64_t xDstInSrc; |
int sizeFactor = -1; |
for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) { |
if (flags & scale_algorithms[i].flag && scale_algorithms[i].size_factor > 0) { |
sizeFactor = scale_algorithms[i].size_factor; |
break; |
} |
} |
if (flags & SWS_LANCZOS) |
sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6; |
av_assert0(sizeFactor > 0); |
if (xInc <= 1 << 16) |
filterSize = 1 + sizeFactor; // upscale |
else |
filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW; |
filterSize = FFMIN(filterSize, srcW - 2); |
filterSize = FFMAX(filterSize, 1); |
FF_ALLOC_ARRAY_OR_GOTO(NULL, filter, |
dstW, sizeof(*filter) * filterSize, fail); |
xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7); |
for (i = 0; i < dstW; i++) { |
int xx = (xDstInSrc - (filterSize - 2) * (1LL<<16)) / (1 << 17); |
int j; |
(*filterPos)[i] = xx; |
for (j = 0; j < filterSize; j++) { |
int64_t d = (FFABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13; |
double floatd; |
int64_t coeff; |
if (xInc > 1 << 16) |
d = d * dstW / srcW; |
floatd = d * (1.0 / (1 << 30)); |
if (flags & SWS_BICUBIC) { |
int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24); |
int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24); |
if (d >= 1LL << 31) { |
coeff = 0.0; |
} else { |
int64_t dd = (d * d) >> 30; |
int64_t ddd = (dd * d) >> 30; |
if (d < 1LL << 30) |
coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + |
(-18 * (1 << 24) + 12 * B + 6 * C) * dd + |
(6 * (1 << 24) - 2 * B) * (1 << 30); |
else |
coeff = (-B - 6 * C) * ddd + |
(6 * B + 30 * C) * dd + |
(-12 * B - 48 * C) * d + |
(8 * B + 24 * C) * (1 << 30); |
} |
coeff /= (1LL<<54)/fone; |
} |
#if 0 |
else if (flags & SWS_X) { |
double p = param ? param * 0.01 : 0.3; |
coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0; |
coeff *= pow(2.0, -p * d * d); |
} |
#endif |
else if (flags & SWS_X) { |
double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; |
double c; |
if (floatd < 1.0) |
c = cos(floatd * M_PI); |
else |
c = -1.0; |
if (c < 0.0) |
c = -pow(-c, A); |
else |
c = pow(c, A); |
coeff = (c * 0.5 + 0.5) * fone; |
} else if (flags & SWS_AREA) { |
int64_t d2 = d - (1 << 29); |
if (d2 * xInc < -(1LL << (29 + 16))) |
coeff = 1.0 * (1LL << (30 + 16)); |
else if (d2 * xInc < (1LL << (29 + 16))) |
coeff = -d2 * xInc + (1LL << (29 + 16)); |
else |
coeff = 0.0; |
coeff *= fone >> (30 + 16); |
} else if (flags & SWS_GAUSS) { |
double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; |
coeff = (pow(2.0, -p * floatd * floatd)) * fone; |
} else if (flags & SWS_SINC) { |
coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone; |
} else if (flags & SWS_LANCZOS) { |
double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; |
coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) / |
(floatd * floatd * M_PI * M_PI / p) : 1.0) * fone; |
if (floatd > p) |
coeff = 0; |
} else if (flags & SWS_BILINEAR) { |
coeff = (1 << 30) - d; |
if (coeff < 0) |
coeff = 0; |
coeff *= fone >> 30; |
} else if (flags & SWS_SPLINE) { |
double p = -2.196152422706632; |
coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone; |
} else { |
av_assert0(0); |
} |
filter[i * filterSize + j] = coeff; |
xx++; |
} |
xDstInSrc += 2 * xInc; |
} |
} |
/* apply src & dst Filter to filter -> filter2 |
* av_free(filter); |
*/ |
av_assert0(filterSize > 0); |
filter2Size = filterSize; |
if (srcFilter) |
filter2Size += srcFilter->length - 1; |
if (dstFilter) |
filter2Size += dstFilter->length - 1; |
av_assert0(filter2Size > 0); |
FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter2, dstW, filter2Size * sizeof(*filter2), fail); |
for (i = 0; i < dstW; i++) { |
int j, k; |
if (srcFilter) { |
for (k = 0; k < srcFilter->length; k++) { |
for (j = 0; j < filterSize; j++) |
filter2[i * filter2Size + k + j] += |
srcFilter->coeff[k] * filter[i * filterSize + j]; |
} |
} else { |
for (j = 0; j < filterSize; j++) |
filter2[i * filter2Size + j] = filter[i * filterSize + j]; |
} |
// FIXME dstFilter |
(*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2; |
} |
av_freep(&filter); |
/* try to reduce the filter-size (step1 find size and shift left) */ |
// Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). |
minFilterSize = 0; |
for (i = dstW - 1; i >= 0; i--) { |
int min = filter2Size; |
int j; |
int64_t cutOff = 0.0; |
/* get rid of near zero elements on the left by shifting left */ |
for (j = 0; j < filter2Size; j++) { |
int k; |
cutOff += FFABS(filter2[i * filter2Size]); |
if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone) |
break; |
/* preserve monotonicity because the core can't handle the |
* filter otherwise */ |
if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1]) |
break; |
// move filter coefficients left |
for (k = 1; k < filter2Size; k++) |
filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k]; |
filter2[i * filter2Size + k - 1] = 0; |
(*filterPos)[i]++; |
} |
cutOff = 0; |
/* count near zeros on the right */ |
for (j = filter2Size - 1; j > 0; j--) { |
cutOff += FFABS(filter2[i * filter2Size + j]); |
if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone) |
break; |
min--; |
} |
if (min > minFilterSize) |
minFilterSize = min; |
} |
if (PPC_ALTIVEC(cpu_flags)) { |
// we can handle the special case 4, so we don't want to go the full 8 |
if (minFilterSize < 5) |
filterAlign = 4; |
/* We really don't want to waste our time doing useless computation, so |
* fall back on the scalar C code for very small filters. |
* Vectorizing is worth it only if you have a decent-sized vector. */ |
if (minFilterSize < 3) |
filterAlign = 1; |
} |
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { |
// special case for unscaled vertical filtering |
if (minFilterSize == 1 && filterAlign == 2) |
filterAlign = 1; |
} |
av_assert0(minFilterSize > 0); |
filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1)); |
av_assert0(filterSize > 0); |
filter = av_malloc_array(dstW, filterSize * sizeof(*filter)); |
if (!filter) |
goto fail; |
if (filterSize >= MAX_FILTER_SIZE * 16 / |
((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16)) { |
ret = RETCODE_USE_CASCADE; |
goto fail; |
} |
*outFilterSize = filterSize; |
if (flags & SWS_PRINT_INFO) |
av_log(NULL, AV_LOG_VERBOSE, |
"SwScaler: reducing / aligning filtersize %d -> %d\n", |
filter2Size, filterSize); |
/* try to reduce the filter-size (step2 reduce it) */ |
for (i = 0; i < dstW; i++) { |
int j; |
for (j = 0; j < filterSize; j++) { |
if (j >= filter2Size) |
filter[i * filterSize + j] = 0; |
else |
filter[i * filterSize + j] = filter2[i * filter2Size + j]; |
if ((flags & SWS_BITEXACT) && j >= minFilterSize) |
filter[i * filterSize + j] = 0; |
} |
} |
// FIXME try to align filterPos if possible |
// fix borders |
for (i = 0; i < dstW; i++) { |
int j; |
if ((*filterPos)[i] < 0) { |
// move filter coefficients left to compensate for filterPos |
for (j = 1; j < filterSize; j++) { |
int left = FFMAX(j + (*filterPos)[i], 0); |
filter[i * filterSize + left] += filter[i * filterSize + j]; |
filter[i * filterSize + j] = 0; |
} |
(*filterPos)[i]= 0; |
} |
if ((*filterPos)[i] + filterSize > srcW) { |
int shift = (*filterPos)[i] + FFMIN(filterSize - srcW, 0); |
int64_t acc = 0; |
for (j = filterSize - 1; j >= 0; j--) { |
if ((*filterPos)[i] + j >= srcW) { |
acc += filter[i * filterSize + j]; |
filter[i * filterSize + j] = 0; |
} |
} |
for (j = filterSize - 1; j >= 0; j--) { |
if (j < shift) { |
filter[i * filterSize + j] = 0; |
} else { |
filter[i * filterSize + j] = filter[i * filterSize + j - shift]; |
} |
} |
(*filterPos)[i]-= shift; |
filter[i * filterSize + srcW - 1 - (*filterPos)[i]] += acc; |
} |
av_assert0((*filterPos)[i] >= 0); |
av_assert0((*filterPos)[i] < srcW); |
if ((*filterPos)[i] + filterSize > srcW) { |
for (j = 0; j < filterSize; j++) { |
av_assert0((*filterPos)[i] + j < srcW || !filter[i * filterSize + j]); |
} |
} |
} |
// Note the +1 is for the MMX scaler which reads over the end |
/* align at 16 for AltiVec (needed by hScale_altivec_real) */ |
FF_ALLOCZ_ARRAY_OR_GOTO(NULL, *outFilter, |
(dstW + 3), *outFilterSize * sizeof(int16_t), fail); |
/* normalize & store in outFilter */ |
for (i = 0; i < dstW; i++) { |
int j; |
int64_t error = 0; |
int64_t sum = 0; |
for (j = 0; j < filterSize; j++) { |
sum += filter[i * filterSize + j]; |
} |
sum = (sum + one / 2) / one; |
if (!sum) { |
av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n"); |
sum = 1; |
} |
for (j = 0; j < *outFilterSize; j++) { |
int64_t v = filter[i * filterSize + j] + error; |
int intV = ROUNDED_DIV(v, sum); |
(*outFilter)[i * (*outFilterSize) + j] = intV; |
error = v - intV * sum; |
} |
} |
(*filterPos)[dstW + 0] = |
(*filterPos)[dstW + 1] = |
(*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will |
* read over the end */ |
for (i = 0; i < *outFilterSize; i++) { |
int k = (dstW - 1) * (*outFilterSize) + i; |
(*outFilter)[k + 1 * (*outFilterSize)] = |
(*outFilter)[k + 2 * (*outFilterSize)] = |
(*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k]; |
} |
ret = 0; |
fail: |
if(ret < 0) |
av_log(NULL, ret == RETCODE_USE_CASCADE ? AV_LOG_DEBUG : AV_LOG_ERROR, "sws: initFilter failed\n"); |
av_free(filter); |
av_free(filter2); |
return ret; |
} |
static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange) |
{ |
int64_t W, V, Z, Cy, Cu, Cv; |
int64_t vr = table[0]; |
int64_t ub = table[1]; |
int64_t ug = -table[2]; |
int64_t vg = -table[3]; |
int64_t ONE = 65536; |
int64_t cy = ONE; |
uint8_t *p = (uint8_t*)c->input_rgb2yuv_table; |
int i; |
static const int8_t map[] = { |
BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX, |
RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX, |
RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX, |
BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX, |
BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX, |
RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX, |
RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX, |
BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX, |
BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX, |
RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX, |
RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX, |
BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX, |
RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, |
BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, |
GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , |
-1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, |
RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, |
BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, |
GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , |
-1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, |
RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, |
BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, |
GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , |
-1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30 |
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31 |
BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32 |
BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33 |
BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34 |
}; |
dstRange = 0; //FIXME range = 1 is handled elsewhere |
if (!dstRange) { |
cy = cy * 255 / 219; |
} else { |
vr = vr * 224 / 255; |
ub = ub * 224 / 255; |
ug = ug * 224 / 255; |
vg = vg * 224 / 255; |
} |
W = ROUNDED_DIV(ONE*ONE*ug, ub); |
V = ROUNDED_DIV(ONE*ONE*vg, vr); |
Z = ONE*ONE-W-V; |
Cy = ROUNDED_DIV(cy*Z, ONE); |
Cu = ROUNDED_DIV(ub*Z, ONE); |
Cv = ROUNDED_DIV(vr*Z, ONE); |
c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cy); |
c->input_rgb2yuv_table[GY_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cy); |
c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cy); |
c->input_rgb2yuv_table[RU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cu); |
c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cu); |
c->input_rgb2yuv_table[BU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W) , Cu); |
c->input_rgb2yuv_table[RV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z) , Cv); |
c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cv); |
c->input_rgb2yuv_table[BV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cv); |
if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) { |
c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); |
} |
for(i=0; i<FF_ARRAY_ELEMS(map); i++) |
AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0); |
} |
static void fill_xyztables(struct SwsContext *c) |
{ |
int i; |
double xyzgamma = XYZ_GAMMA; |
double rgbgamma = 1.0 / RGB_GAMMA; |
double xyzgammainv = 1.0 / XYZ_GAMMA; |
double rgbgammainv = RGB_GAMMA; |
static const int16_t xyz2rgb_matrix[3][4] = { |
{13270, -6295, -2041}, |
{-3969, 7682, 170}, |
{ 228, -835, 4329} }; |
static const int16_t rgb2xyz_matrix[3][4] = { |
{1689, 1464, 739}, |
{ 871, 2929, 296}, |
{ 79, 488, 3891} }; |
static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096]; |
memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix)); |
memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix)); |
c->xyzgamma = xyzgamma_tab; |
c->rgbgamma = rgbgamma_tab; |
c->xyzgammainv = xyzgammainv_tab; |
c->rgbgammainv = rgbgammainv_tab; |
if (rgbgamma_tab[4095]) |
return; |
/* set gamma vectors */ |
for (i = 0; i < 4096; i++) { |
xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0); |
rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0); |
xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0); |
rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0); |
} |
} |
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], |
int srcRange, const int table[4], int dstRange, |
int brightness, int contrast, int saturation) |
{ |
const AVPixFmtDescriptor *desc_dst; |
const AVPixFmtDescriptor *desc_src; |
int need_reinit = 0; |
handle_formats(c); |
desc_dst = av_pix_fmt_desc_get(c->dstFormat); |
desc_src = av_pix_fmt_desc_get(c->srcFormat); |
if(!isYUV(c->dstFormat) && !isGray(c->dstFormat)) |
dstRange = 0; |
if(!isYUV(c->srcFormat) && !isGray(c->srcFormat)) |
srcRange = 0; |
if (c->srcRange != srcRange || |
c->dstRange != dstRange || |
c->brightness != brightness || |
c->contrast != contrast || |
c->saturation != saturation || |
memcmp(c->srcColorspaceTable, inv_table, sizeof(int) * 4) || |
memcmp(c->dstColorspaceTable, table, sizeof(int) * 4) |
) |
need_reinit = 1; |
memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4); |
memmove(c->dstColorspaceTable, table, sizeof(int) * 4); |
c->brightness = brightness; |
c->contrast = contrast; |
c->saturation = saturation; |
c->srcRange = srcRange; |
c->dstRange = dstRange; |
//The srcBpc check is possibly wrong but we seem to lack a definitive reference to test this |
//and what we have in ticket 2939 looks better with this check |
if (need_reinit && (c->srcBpc == 8 || !isYUV(c->srcFormat))) |
ff_sws_init_range_convert(c); |
c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); |
c->srcFormatBpp = av_get_bits_per_pixel(desc_src); |
if (c->cascaded_context[0]) |
return sws_setColorspaceDetails(c->cascaded_context[0],inv_table, srcRange,table, dstRange, brightness, contrast, saturation); |
if (!need_reinit) |
return 0; |
if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) { |
if (!c->cascaded_context[0] && |
memcmp(c->dstColorspaceTable, c->srcColorspaceTable, sizeof(int) * 4) && |
c->srcW && c->srcH && c->dstW && c->dstH) { |
enum AVPixelFormat tmp_format; |
int tmp_width, tmp_height; |
int srcW = c->srcW; |
int srcH = c->srcH; |
int dstW = c->dstW; |
int dstH = c->dstH; |
int ret; |
av_log(c, AV_LOG_VERBOSE, "YUV color matrix differs for YUV->YUV, using intermediate RGB to convert\n"); |
if (isNBPS(c->dstFormat) || is16BPS(c->dstFormat)) { |
if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) { |
tmp_format = AV_PIX_FMT_BGRA64; |
} else { |
tmp_format = AV_PIX_FMT_BGR48; |
} |
} else { |
if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) { |
tmp_format = AV_PIX_FMT_BGRA; |
} else { |
tmp_format = AV_PIX_FMT_BGR24; |
} |
} |
if (srcW*srcH > dstW*dstH) { |
tmp_width = dstW; |
tmp_height = dstH; |
} else { |
tmp_width = srcW; |
tmp_height = srcH; |
} |
ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride, |
tmp_width, tmp_height, tmp_format, 64); |
if (ret < 0) |
return ret; |
c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, c->srcFormat, |
tmp_width, tmp_height, tmp_format, |
c->flags, c->param); |
if (!c->cascaded_context[0]) |
return -1; |
c->cascaded_context[0]->alphablend = c->alphablend; |
ret = sws_init_context(c->cascaded_context[0], NULL , NULL); |
if (ret < 0) |
return ret; |
//we set both src and dst depending on that the RGB side will be ignored |
sws_setColorspaceDetails(c->cascaded_context[0], inv_table, |
srcRange, table, dstRange, |
brightness, contrast, saturation); |
c->cascaded_context[1] = sws_getContext(tmp_width, tmp_height, tmp_format, |
dstW, dstH, c->dstFormat, |
c->flags, NULL, NULL, c->param); |
if (!c->cascaded_context[1]) |
return -1; |
sws_setColorspaceDetails(c->cascaded_context[1], inv_table, |
srcRange, table, dstRange, |
0, 1 << 16, 1 << 16); |
return 0; |
} |
return -1; |
} |
if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) { |
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, |
contrast, saturation); |
// FIXME factorize |
if (ARCH_PPC) |
ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness, |
contrast, saturation); |
} |
fill_rgb2yuv_table(c, table, dstRange); |
return 0; |
} |
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, |
int *srcRange, int **table, int *dstRange, |
int *brightness, int *contrast, int *saturation) |
{ |
if (!c ) |
return -1; |
*inv_table = c->srcColorspaceTable; |
*table = c->dstColorspaceTable; |
*srcRange = c->srcRange; |
*dstRange = c->dstRange; |
*brightness = c->brightness; |
*contrast = c->contrast; |
*saturation = c->saturation; |
return 0; |
} |
static int handle_jpeg(enum AVPixelFormat *format) |
{ |
switch (*format) { |
case AV_PIX_FMT_YUVJ420P: |
*format = AV_PIX_FMT_YUV420P; |
return 1; |
case AV_PIX_FMT_YUVJ411P: |
*format = AV_PIX_FMT_YUV411P; |
return 1; |
case AV_PIX_FMT_YUVJ422P: |
*format = AV_PIX_FMT_YUV422P; |
return 1; |
case AV_PIX_FMT_YUVJ444P: |
*format = AV_PIX_FMT_YUV444P; |
return 1; |
case AV_PIX_FMT_YUVJ440P: |
*format = AV_PIX_FMT_YUV440P; |
return 1; |
case AV_PIX_FMT_GRAY8: |
case AV_PIX_FMT_GRAY16LE: |
case AV_PIX_FMT_GRAY16BE: |
return 1; |
default: |
return 0; |
} |
} |
static int handle_0alpha(enum AVPixelFormat *format) |
{ |
switch (*format) { |
case AV_PIX_FMT_0BGR : *format = AV_PIX_FMT_ABGR ; return 1; |
case AV_PIX_FMT_BGR0 : *format = AV_PIX_FMT_BGRA ; return 4; |
case AV_PIX_FMT_0RGB : *format = AV_PIX_FMT_ARGB ; return 1; |
case AV_PIX_FMT_RGB0 : *format = AV_PIX_FMT_RGBA ; return 4; |
default: return 0; |
} |
} |
static int handle_xyz(enum AVPixelFormat *format) |
{ |
switch (*format) { |
case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1; |
case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1; |
default: return 0; |
} |
} |
static void handle_formats(SwsContext *c) |
{ |
c->src0Alpha |= handle_0alpha(&c->srcFormat); |
c->dst0Alpha |= handle_0alpha(&c->dstFormat); |
c->srcXYZ |= handle_xyz(&c->srcFormat); |
c->dstXYZ |= handle_xyz(&c->dstFormat); |
if (c->srcXYZ || c->dstXYZ) |
fill_xyztables(c); |
} |
SwsContext *sws_alloc_context(void) |
{ |
SwsContext *c = av_mallocz(sizeof(SwsContext)); |
av_assert0(offsetof(SwsContext, redDither) + DITHER32_INT == offsetof(SwsContext, dither32)); |
if (c) { |
c->av_class = &sws_context_class; |
av_opt_set_defaults(c); |
} |
return c; |
} |
static uint16_t * alloc_gamma_tbl(double e) |
{ |
int i = 0; |
uint16_t * tbl; |
tbl = (uint16_t*)av_malloc(sizeof(uint16_t) * 1 << 16); |
if (!tbl) |
return NULL; |
for (i = 0; i < 65536; ++i) { |
tbl[i] = pow(i / 65535.0, e) * 65535.0; |
} |
return tbl; |
} |
static enum AVPixelFormat alphaless_fmt(enum AVPixelFormat fmt) |
{ |
switch(fmt) { |
case AV_PIX_FMT_ARGB: return AV_PIX_FMT_RGB24; |
case AV_PIX_FMT_RGBA: return AV_PIX_FMT_RGB24; |
case AV_PIX_FMT_ABGR: return AV_PIX_FMT_BGR24; |
case AV_PIX_FMT_BGRA: return AV_PIX_FMT_BGR24; |
case AV_PIX_FMT_YA8: return AV_PIX_FMT_GRAY8; |
case AV_PIX_FMT_YUVA420P: return AV_PIX_FMT_YUV420P; |
case AV_PIX_FMT_YUVA422P: return AV_PIX_FMT_YUV422P; |
case AV_PIX_FMT_YUVA444P: return AV_PIX_FMT_YUV444P; |
case AV_PIX_FMT_GBRAP: return AV_PIX_FMT_GBRP; |
case AV_PIX_FMT_GBRAP16LE: return AV_PIX_FMT_GBRP16; |
case AV_PIX_FMT_GBRAP16BE: return AV_PIX_FMT_GBRP16; |
case AV_PIX_FMT_RGBA64LE: return AV_PIX_FMT_RGB48; |
case AV_PIX_FMT_RGBA64BE: return AV_PIX_FMT_RGB48; |
case AV_PIX_FMT_BGRA64LE: return AV_PIX_FMT_BGR48; |
case AV_PIX_FMT_BGRA64BE: return AV_PIX_FMT_BGR48; |
case AV_PIX_FMT_YA16BE: return AV_PIX_FMT_GRAY16; |
case AV_PIX_FMT_YA16LE: return AV_PIX_FMT_GRAY16; |
case AV_PIX_FMT_YUVA420P9BE: return AV_PIX_FMT_YUV420P9; |
case AV_PIX_FMT_YUVA422P9BE: return AV_PIX_FMT_YUV422P9; |
case AV_PIX_FMT_YUVA444P9BE: return AV_PIX_FMT_YUV444P9; |
case AV_PIX_FMT_YUVA420P9LE: return AV_PIX_FMT_YUV420P9; |
case AV_PIX_FMT_YUVA422P9LE: return AV_PIX_FMT_YUV422P9; |
case AV_PIX_FMT_YUVA444P9LE: return AV_PIX_FMT_YUV444P9; |
case AV_PIX_FMT_YUVA420P10BE: return AV_PIX_FMT_YUV420P10; |
case AV_PIX_FMT_YUVA422P10BE: return AV_PIX_FMT_YUV422P10; |
case AV_PIX_FMT_YUVA444P10BE: return AV_PIX_FMT_YUV444P10; |
case AV_PIX_FMT_YUVA420P10LE: return AV_PIX_FMT_YUV420P10; |
case AV_PIX_FMT_YUVA422P10LE: return AV_PIX_FMT_YUV422P10; |
case AV_PIX_FMT_YUVA444P10LE: return AV_PIX_FMT_YUV444P10; |
case AV_PIX_FMT_YUVA420P16BE: return AV_PIX_FMT_YUV420P16; |
case AV_PIX_FMT_YUVA422P16BE: return AV_PIX_FMT_YUV422P16; |
case AV_PIX_FMT_YUVA444P16BE: return AV_PIX_FMT_YUV444P16; |
case AV_PIX_FMT_YUVA420P16LE: return AV_PIX_FMT_YUV420P16; |
case AV_PIX_FMT_YUVA422P16LE: return AV_PIX_FMT_YUV422P16; |
case AV_PIX_FMT_YUVA444P16LE: return AV_PIX_FMT_YUV444P16; |
// case AV_PIX_FMT_AYUV64LE: |
// case AV_PIX_FMT_AYUV64BE: |
// case AV_PIX_FMT_PAL8: |
default: return AV_PIX_FMT_NONE; |
} |
} |
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, |
SwsFilter *dstFilter) |
{ |
int i, j; |
int usesVFilter, usesHFilter; |
int unscaled; |
SwsFilter dummyFilter = { NULL, NULL, NULL, NULL }; |
int srcW = c->srcW; |
int srcH = c->srcH; |
int dstW = c->dstW; |
int dstH = c->dstH; |
int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16); |
int flags, cpu_flags; |
enum AVPixelFormat srcFormat = c->srcFormat; |
enum AVPixelFormat dstFormat = c->dstFormat; |
const AVPixFmtDescriptor *desc_src; |
const AVPixFmtDescriptor *desc_dst; |
int ret = 0; |
enum AVPixelFormat tmpFmt; |
cpu_flags = av_get_cpu_flags(); |
flags = c->flags; |
emms_c(); |
if (!rgb15to16) |
sws_rgb2rgb_init(); |
unscaled = (srcW == dstW && srcH == dstH); |
c->srcRange |= handle_jpeg(&c->srcFormat); |
c->dstRange |= handle_jpeg(&c->dstFormat); |
if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat) |
av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); |
if (!c->contrast && !c->saturation && !c->dstFormatBpp) |
sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange, |
ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], |
c->dstRange, 0, 1 << 16, 1 << 16); |
handle_formats(c); |
srcFormat = c->srcFormat; |
dstFormat = c->dstFormat; |
desc_src = av_pix_fmt_desc_get(srcFormat); |
desc_dst = av_pix_fmt_desc_get(dstFormat); |
if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) && |
av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) { |
if (!sws_isSupportedInput(srcFormat)) { |
av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", |
av_get_pix_fmt_name(srcFormat)); |
return AVERROR(EINVAL); |
} |
if (!sws_isSupportedOutput(dstFormat)) { |
av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", |
av_get_pix_fmt_name(dstFormat)); |
return AVERROR(EINVAL); |
} |
} |
av_assert2(desc_src && desc_dst); |
i = flags & (SWS_POINT | |
SWS_AREA | |
SWS_BILINEAR | |
SWS_FAST_BILINEAR | |
SWS_BICUBIC | |
SWS_X | |
SWS_GAUSS | |
SWS_LANCZOS | |
SWS_SINC | |
SWS_SPLINE | |
SWS_BICUBLIN); |
/* provide a default scaler if not set by caller */ |
if (!i) { |
if (dstW < srcW && dstH < srcH) |
flags |= SWS_BICUBIC; |
else if (dstW > srcW && dstH > srcH) |
flags |= SWS_BICUBIC; |
else |
flags |= SWS_BICUBIC; |
c->flags = flags; |
} else if (i & (i - 1)) { |
av_log(c, AV_LOG_ERROR, |
"Exactly one scaler algorithm must be chosen, got %X\n", i); |
return AVERROR(EINVAL); |
} |
/* sanity check */ |
if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) { |
/* FIXME check if these are enough and try to lower them after |
* fixing the relevant parts of the code */ |
av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n", |
srcW, srcH, dstW, dstH); |
return AVERROR(EINVAL); |
} |
if (flags & SWS_FAST_BILINEAR) { |
if (srcW < 8 || dstW < 8) { |
flags ^= SWS_FAST_BILINEAR | SWS_BILINEAR; |
c->flags = flags; |
} |
} |
if (!dstFilter) |
dstFilter = &dummyFilter; |
if (!srcFilter) |
srcFilter = &dummyFilter; |
c->lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; |
c->lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; |
c->dstFormatBpp = av_get_bits_per_pixel(desc_dst); |
c->srcFormatBpp = av_get_bits_per_pixel(desc_src); |
c->vRounder = 4 * 0x0001000100010001ULL; |
usesVFilter = (srcFilter->lumV && srcFilter->lumV->length > 1) || |
(srcFilter->chrV && srcFilter->chrV->length > 1) || |
(dstFilter->lumV && dstFilter->lumV->length > 1) || |
(dstFilter->chrV && dstFilter->chrV->length > 1); |
usesHFilter = (srcFilter->lumH && srcFilter->lumH->length > 1) || |
(srcFilter->chrH && srcFilter->chrH->length > 1) || |
(dstFilter->lumH && dstFilter->lumH->length > 1) || |
(dstFilter->chrH && dstFilter->chrH->length > 1); |
av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample); |
av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample); |
if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) { |
if (dstW&1) { |
av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n"); |
flags |= SWS_FULL_CHR_H_INT; |
c->flags = flags; |
} |
if ( c->chrSrcHSubSample == 0 |
&& c->chrSrcVSubSample == 0 |
&& c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER |
&& !(c->flags & SWS_FAST_BILINEAR) |
) { |
av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n"); |
flags |= SWS_FULL_CHR_H_INT; |
c->flags = flags; |
} |
} |
if (c->dither == SWS_DITHER_AUTO) { |
if (flags & SWS_ERROR_DIFFUSION) |
c->dither = SWS_DITHER_ED; |
} |
if(dstFormat == AV_PIX_FMT_BGR4_BYTE || |
dstFormat == AV_PIX_FMT_RGB4_BYTE || |
dstFormat == AV_PIX_FMT_BGR8 || |
dstFormat == AV_PIX_FMT_RGB8) { |
if (c->dither == SWS_DITHER_AUTO) |
c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER; |
if (!(flags & SWS_FULL_CHR_H_INT)) { |
if (c->dither == SWS_DITHER_ED || c->dither == SWS_DITHER_A_DITHER || c->dither == SWS_DITHER_X_DITHER) { |
av_log(c, AV_LOG_DEBUG, |
"Desired dithering only supported in full chroma interpolation for destination format '%s'\n", |
av_get_pix_fmt_name(dstFormat)); |
flags |= SWS_FULL_CHR_H_INT; |
c->flags = flags; |
} |
} |
if (flags & SWS_FULL_CHR_H_INT) { |
if (c->dither == SWS_DITHER_BAYER) { |
av_log(c, AV_LOG_DEBUG, |
"Ordered dither is not supported in full chroma interpolation for destination format '%s'\n", |
av_get_pix_fmt_name(dstFormat)); |
c->dither = SWS_DITHER_ED; |
} |
} |
} |
if (isPlanarRGB(dstFormat)) { |
if (!(flags & SWS_FULL_CHR_H_INT)) { |
av_log(c, AV_LOG_DEBUG, |
"%s output is not supported with half chroma resolution, switching to full\n", |
av_get_pix_fmt_name(dstFormat)); |
flags |= SWS_FULL_CHR_H_INT; |
c->flags = flags; |
} |
} |
/* reuse chroma for 2 pixels RGB/BGR unless user wants full |
* chroma interpolation */ |
if (flags & SWS_FULL_CHR_H_INT && |
isAnyRGB(dstFormat) && |
!isPlanarRGB(dstFormat) && |
dstFormat != AV_PIX_FMT_RGBA64LE && |
dstFormat != AV_PIX_FMT_RGBA64BE && |
dstFormat != AV_PIX_FMT_BGRA64LE && |
dstFormat != AV_PIX_FMT_BGRA64BE && |
dstFormat != AV_PIX_FMT_RGB48LE && |
dstFormat != AV_PIX_FMT_RGB48BE && |
dstFormat != AV_PIX_FMT_BGR48LE && |
dstFormat != AV_PIX_FMT_BGR48BE && |
dstFormat != AV_PIX_FMT_RGBA && |
dstFormat != AV_PIX_FMT_ARGB && |
dstFormat != AV_PIX_FMT_BGRA && |
dstFormat != AV_PIX_FMT_ABGR && |
dstFormat != AV_PIX_FMT_RGB24 && |
dstFormat != AV_PIX_FMT_BGR24 && |
dstFormat != AV_PIX_FMT_BGR4_BYTE && |
dstFormat != AV_PIX_FMT_RGB4_BYTE && |
dstFormat != AV_PIX_FMT_BGR8 && |
dstFormat != AV_PIX_FMT_RGB8 |
) { |
av_log(c, AV_LOG_WARNING, |
"full chroma interpolation for destination format '%s' not yet implemented\n", |
av_get_pix_fmt_name(dstFormat)); |
flags &= ~SWS_FULL_CHR_H_INT; |
c->flags = flags; |
} |
if (isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT)) |
c->chrDstHSubSample = 1; |
// drop some chroma lines if the user wants it |
c->vChrDrop = (flags & SWS_SRC_V_CHR_DROP_MASK) >> |
SWS_SRC_V_CHR_DROP_SHIFT; |
c->chrSrcVSubSample += c->vChrDrop; |
/* drop every other pixel for chroma calculation unless user |
* wants full chroma */ |
if (isAnyRGB(srcFormat) && !(flags & SWS_FULL_CHR_H_INP) && |
srcFormat != AV_PIX_FMT_RGB8 && srcFormat != AV_PIX_FMT_BGR8 && |
srcFormat != AV_PIX_FMT_RGB4 && srcFormat != AV_PIX_FMT_BGR4 && |
srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE && |
srcFormat != AV_PIX_FMT_GBRP9BE && srcFormat != AV_PIX_FMT_GBRP9LE && |
srcFormat != AV_PIX_FMT_GBRP10BE && srcFormat != AV_PIX_FMT_GBRP10LE && |
srcFormat != AV_PIX_FMT_GBRP12BE && srcFormat != AV_PIX_FMT_GBRP12LE && |
srcFormat != AV_PIX_FMT_GBRP14BE && srcFormat != AV_PIX_FMT_GBRP14LE && |
srcFormat != AV_PIX_FMT_GBRP16BE && srcFormat != AV_PIX_FMT_GBRP16LE && |
((dstW >> c->chrDstHSubSample) <= (srcW >> 1) || |
(flags & SWS_FAST_BILINEAR))) |
c->chrSrcHSubSample = 1; |
// Note the FF_CEIL_RSHIFT is so that we always round toward +inf. |
c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample); |
c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample); |
c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample); |
c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample); |
FF_ALLOCZ_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); |
c->srcBpc = 1 + desc_src->comp[0].depth_minus1; |
if (c->srcBpc < 8) |
c->srcBpc = 8; |
c->dstBpc = 1 + desc_dst->comp[0].depth_minus1; |
if (c->dstBpc < 8) |
c->dstBpc = 8; |
if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8) |
c->srcBpc = 16; |
if (c->dstBpc == 16) |
dst_stride <<= 1; |
if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) { |
c->canMMXEXTBeUsed = dstW >= srcW && (dstW & 31) == 0 && |
c->chrDstW >= c->chrSrcW && |
(srcW & 15) == 0; |
if (!c->canMMXEXTBeUsed && dstW >= srcW && c->chrDstW >= c->chrSrcW && (srcW & 15) == 0 |
&& (flags & SWS_FAST_BILINEAR)) { |
if (flags & SWS_PRINT_INFO) |
av_log(c, AV_LOG_INFO, |
"output width is not a multiple of 32 -> no MMXEXT scaler\n"); |
} |
if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) |
c->canMMXEXTBeUsed = 0; |
} else |
c->canMMXEXTBeUsed = 0; |
c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; |
c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; |
/* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src |
* to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do |
* correct scaling. |
* n-2 is the last chrominance sample available. |
* This is not perfect, but no one should notice the difference, the more |
* correct variant would be like the vertical one, but that would require |
* some special code for the first and last pixel */ |
if (flags & SWS_FAST_BILINEAR) { |
if (c->canMMXEXTBeUsed) { |
c->lumXInc += 20; |
c->chrXInc += 20; |
} |
// we don't use the x86 asm scaler if MMX is available |
else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) { |
c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; |
c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; |
} |
} |
// hardcoded for now |
c->gamma_value = 2.2; |
tmpFmt = AV_PIX_FMT_RGBA64LE; |
if (!unscaled && c->gamma_flag && (srcFormat != tmpFmt || dstFormat != tmpFmt)) { |
SwsContext *c2; |
c->cascaded_context[0] = NULL; |
ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride, |
srcW, srcH, tmpFmt, 64); |
if (ret < 0) |
return ret; |
c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat, |
srcW, srcH, tmpFmt, |
flags, NULL, NULL, c->param); |
if (!c->cascaded_context[0]) { |
return -1; |
} |
c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFmt, |
dstW, dstH, tmpFmt, |
flags, srcFilter, dstFilter, c->param); |
if (!c->cascaded_context[1]) |
return -1; |
c2 = c->cascaded_context[1]; |
c2->is_internal_gamma = 1; |
c2->gamma = alloc_gamma_tbl( c->gamma_value); |
c2->inv_gamma = alloc_gamma_tbl(1.f/c->gamma_value); |
if (!c2->gamma || !c2->inv_gamma) |
return AVERROR(ENOMEM); |
// is_internal_flag is set after creating the context |
// to properly create the gamma convert FilterDescriptor |
// we have to re-initialize it |
ff_free_filters(c2); |
if (ff_init_filters(c2) < 0) { |
sws_freeContext(c2); |
return -1; |
} |
c->cascaded_context[2] = NULL; |
if (dstFormat != tmpFmt) { |
ret = av_image_alloc(c->cascaded1_tmp, c->cascaded1_tmpStride, |
dstW, dstH, tmpFmt, 64); |
if (ret < 0) |
return ret; |
c->cascaded_context[2] = sws_getContext(dstW, dstH, tmpFmt, |
dstW, dstH, dstFormat, |
flags, NULL, NULL, c->param); |
if (!c->cascaded_context[2]) |
return -1; |
} |
return 0; |
} |
if (isBayer(srcFormat)) { |
if (!unscaled || |
(dstFormat != AV_PIX_FMT_RGB24 && dstFormat != AV_PIX_FMT_YUV420P)) { |
enum AVPixelFormat tmpFormat = AV_PIX_FMT_RGB24; |
ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride, |
srcW, srcH, tmpFormat, 64); |
if (ret < 0) |
return ret; |
c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat, |
srcW, srcH, tmpFormat, |
flags, srcFilter, NULL, c->param); |
if (!c->cascaded_context[0]) |
return -1; |
c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFormat, |
dstW, dstH, dstFormat, |
flags, NULL, dstFilter, c->param); |
if (!c->cascaded_context[1]) |
return -1; |
return 0; |
} |
} |
if (CONFIG_SWSCALE_ALPHA && isALPHA(srcFormat) && !isALPHA(dstFormat)) { |
enum AVPixelFormat tmpFormat = alphaless_fmt(srcFormat); |
if (tmpFormat != AV_PIX_FMT_NONE && c->alphablend != SWS_ALPHA_BLEND_NONE) |
if (!unscaled || |
dstFormat != tmpFormat || |
usesHFilter || usesVFilter || |
c->srcRange != c->dstRange |
) { |
ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride, |
srcW, srcH, tmpFormat, 64); |
if (ret < 0) |
return ret; |
c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, srcFormat, |
srcW, srcH, tmpFormat, |
flags, c->param); |
if (!c->cascaded_context[0]) |
return -1; |
c->cascaded_context[0]->alphablend = c->alphablend; |
ret = sws_init_context(c->cascaded_context[0], NULL , NULL); |
if (ret < 0) |
return ret; |
c->cascaded_context[1] = sws_getContext(srcW, srcH, tmpFormat, |
dstW, dstH, dstFormat, |
flags, srcFilter, dstFilter, c->param); |
if (!c->cascaded_context[1]) |
return -1; |
return 0; |
} |
} |
#define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS) |
/* precalculate horizontal scaler filter coefficients */ |
{ |
#if HAVE_MMXEXT_INLINE |
// can't downscale !!! |
if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { |
c->lumMmxextFilterCodeSize = ff_init_hscaler_mmxext(dstW, c->lumXInc, NULL, |
NULL, NULL, 8); |
c->chrMmxextFilterCodeSize = ff_init_hscaler_mmxext(c->chrDstW, c->chrXInc, |
NULL, NULL, NULL, 4); |
#if USE_MMAP |
c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, |
PROT_READ | PROT_WRITE, |
MAP_PRIVATE | MAP_ANONYMOUS, |
-1, 0); |
c->chrMmxextFilterCode = mmap(NULL, c->chrMmxextFilterCodeSize, |
PROT_READ | PROT_WRITE, |
MAP_PRIVATE | MAP_ANONYMOUS, |
-1, 0); |
#elif HAVE_VIRTUALALLOC |
c->lumMmxextFilterCode = VirtualAlloc(NULL, |
c->lumMmxextFilterCodeSize, |
MEM_COMMIT, |
PAGE_EXECUTE_READWRITE); |
c->chrMmxextFilterCode = VirtualAlloc(NULL, |
c->chrMmxextFilterCodeSize, |
MEM_COMMIT, |
PAGE_EXECUTE_READWRITE); |
#else |
c->lumMmxextFilterCode = av_malloc(c->lumMmxextFilterCodeSize); |
c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize); |
#endif |
#ifdef MAP_ANONYMOUS |
if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED) |
#else |
if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode) |
#endif |
{ |
av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); |
return AVERROR(ENOMEM); |
} |
FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail); |
FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail); |
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); |
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); |
ff_init_hscaler_mmxext( dstW, c->lumXInc, c->lumMmxextFilterCode, |
c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); |
ff_init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, |
c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); |
#if USE_MMAP |
if ( mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1 |
|| mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) { |
av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n"); |
goto fail; |
} |
#endif |
} else |
#endif /* HAVE_MMXEXT_INLINE */ |
{ |
const int filterAlign = X86_MMX(cpu_flags) ? 4 : |
PPC_ALTIVEC(cpu_flags) ? 8 : 1; |
if ((ret = initFilter(&c->hLumFilter, &c->hLumFilterPos, |
&c->hLumFilterSize, c->lumXInc, |
srcW, dstW, filterAlign, 1 << 14, |
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, |
cpu_flags, srcFilter->lumH, dstFilter->lumH, |
c->param, |
get_local_pos(c, 0, 0, 0), |
get_local_pos(c, 0, 0, 0))) < 0) |
goto fail; |
if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos, |
&c->hChrFilterSize, c->chrXInc, |
c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, |
(flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, |
cpu_flags, srcFilter->chrH, dstFilter->chrH, |
c->param, |
get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0), |
get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0) |
goto fail; |
} |
} // initialize horizontal stuff |
/* precalculate vertical scaler filter coefficients */ |
{ |
const int filterAlign = X86_MMX(cpu_flags) ? 2 : |
PPC_ALTIVEC(cpu_flags) ? 8 : 1; |
if ((ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, |
c->lumYInc, srcH, dstH, filterAlign, (1 << 12), |
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, |
cpu_flags, srcFilter->lumV, dstFilter->lumV, |
c->param, |
get_local_pos(c, 0, 0, 1), |
get_local_pos(c, 0, 0, 1))) < 0) |
goto fail; |
if ((ret = initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, |
c->chrYInc, c->chrSrcH, c->chrDstH, |
filterAlign, (1 << 12), |
(flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, |
cpu_flags, srcFilter->chrV, dstFilter->chrV, |
c->param, |
get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1), |
get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1))) < 0) |
goto fail; |
#if HAVE_ALTIVEC |
FF_ALLOC_OR_GOTO(c, c->vYCoeffsBank, sizeof(vector signed short) * c->vLumFilterSize * c->dstH, fail); |
FF_ALLOC_OR_GOTO(c, c->vCCoeffsBank, sizeof(vector signed short) * c->vChrFilterSize * c->chrDstH, fail); |
for (i = 0; i < c->vLumFilterSize * c->dstH; i++) { |
int j; |
short *p = (short *)&c->vYCoeffsBank[i]; |
for (j = 0; j < 8; j++) |
p[j] = c->vLumFilter[i]; |
} |
for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) { |
int j; |
short *p = (short *)&c->vCCoeffsBank[i]; |
for (j = 0; j < 8; j++) |
p[j] = c->vChrFilter[i]; |
} |
#endif |
} |
// calculate buffer sizes so that they won't run out while handling these damn slices |
c->vLumBufSize = c->vLumFilterSize; |
c->vChrBufSize = c->vChrFilterSize; |
for (i = 0; i < dstH; i++) { |
int chrI = (int64_t)i * c->chrDstH / dstH; |
int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1, |
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1) |
<< c->chrSrcVSubSample)); |
nextSlice >>= c->chrSrcVSubSample; |
nextSlice <<= c->chrSrcVSubSample; |
if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice) |
c->vLumBufSize = nextSlice - c->vLumFilterPos[i]; |
if (c->vChrFilterPos[chrI] + c->vChrBufSize < |
(nextSlice >> c->chrSrcVSubSample)) |
c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) - |
c->vChrFilterPos[chrI]; |
} |
for (i = 0; i < 4; i++) |
FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail); |
/* Allocate pixbufs (we use dynamic allocation because otherwise we would |
* need to allocate several megabytes to handle all possible cases) */ |
FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); |
FF_ALLOCZ_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail); |
FF_ALLOCZ_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail); |
if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) |
FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail); |
/* Note we need at least one pixel more at the end because of the MMX code |
* (just in case someone wants to replace the 4000/8000). */ |
/* align at 16 bytes for AltiVec */ |
for (i = 0; i < c->vLumBufSize; i++) { |
FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i + c->vLumBufSize], |
dst_stride + 16, fail); |
c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize]; |
} |
// 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) |
c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); |
c->uv_offx2 = dst_stride + 16; |
for (i = 0; i < c->vChrBufSize; i++) { |
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize], |
dst_stride * 2 + 32, fail); |
c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize]; |
c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize] |
= c->chrUPixBuf[i] + (dst_stride >> 1) + 8; |
} |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) |
for (i = 0; i < c->vLumBufSize; i++) { |
FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i + c->vLumBufSize], |
dst_stride + 16, fail); |
c->alpPixBuf[i] = c->alpPixBuf[i + c->vLumBufSize]; |
} |
// try to avoid drawing green stuff between the right end and the stride end |
for (i = 0; i < c->vChrBufSize; i++) |
if(desc_dst->comp[0].depth_minus1 == 15){ |
av_assert0(c->dstBpc > 14); |
for(j=0; j<dst_stride/2+1; j++) |
((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18; |
} else |
for(j=0; j<dst_stride+1; j++) |
((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14; |
av_assert0(c->chrDstH <= dstH); |
if (flags & SWS_PRINT_INFO) { |
const char *scaler = NULL, *cpucaps; |
for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) { |
if (flags & scale_algorithms[i].flag) { |
scaler = scale_algorithms[i].description; |
break; |
} |
} |
if (!scaler) |
scaler = "ehh flags invalid?!"; |
av_log(c, AV_LOG_INFO, "%s scaler, from %s to %s%s ", |
scaler, |
av_get_pix_fmt_name(srcFormat), |
#ifdef DITHER1XBPP |
dstFormat == AV_PIX_FMT_BGR555 || dstFormat == AV_PIX_FMT_BGR565 || |
dstFormat == AV_PIX_FMT_RGB444BE || dstFormat == AV_PIX_FMT_RGB444LE || |
dstFormat == AV_PIX_FMT_BGR444BE || dstFormat == AV_PIX_FMT_BGR444LE ? |
"dithered " : "", |
#else |
"", |
#endif |
av_get_pix_fmt_name(dstFormat)); |
if (INLINE_MMXEXT(cpu_flags)) |
cpucaps = "MMXEXT"; |
else if (INLINE_AMD3DNOW(cpu_flags)) |
cpucaps = "3DNOW"; |
else if (INLINE_MMX(cpu_flags)) |
cpucaps = "MMX"; |
else if (PPC_ALTIVEC(cpu_flags)) |
cpucaps = "AltiVec"; |
else |
cpucaps = "C"; |
av_log(c, AV_LOG_INFO, "using %s\n", cpucaps); |
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); |
av_log(c, AV_LOG_DEBUG, |
"lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", |
c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); |
av_log(c, AV_LOG_DEBUG, |
"chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", |
c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, |
c->chrXInc, c->chrYInc); |
} |
/* alpha blend special case, note this has been split via cascaded contexts if its scaled */ |
if (unscaled && !usesHFilter && !usesVFilter && |
c->alphablend != SWS_ALPHA_BLEND_NONE && |
isALPHA(srcFormat) && |
(c->srcRange == c->dstRange || isAnyRGB(dstFormat)) && |
alphaless_fmt(srcFormat) == dstFormat |
) { |
c->swscale = ff_sws_alphablendaway; |
if (flags & SWS_PRINT_INFO) |
av_log(c, AV_LOG_INFO, |
"using alpha blendaway %s -> %s special converter\n", |
av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); |
return 0; |
} |
/* unscaled special cases */ |
if (unscaled && !usesHFilter && !usesVFilter && |
(c->srcRange == c->dstRange || isAnyRGB(dstFormat))) { |
ff_get_unscaled_swscale(c); |
if (c->swscale) { |
if (flags & SWS_PRINT_INFO) |
av_log(c, AV_LOG_INFO, |
"using unscaled %s -> %s special converter\n", |
av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); |
return 0; |
} |
} |
c->swscale = ff_getSwsFunc(c); |
return ff_init_filters(c); |
fail: // FIXME replace things by appropriate error codes |
if (ret == RETCODE_USE_CASCADE) { |
int tmpW = sqrt(srcW * (int64_t)dstW); |
int tmpH = sqrt(srcH * (int64_t)dstH); |
enum AVPixelFormat tmpFormat = AV_PIX_FMT_YUV420P; |
if (isALPHA(srcFormat)) |
tmpFormat = AV_PIX_FMT_YUVA420P; |
if (srcW*(int64_t)srcH <= 4LL*dstW*dstH) |
return AVERROR(EINVAL); |
ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride, |
tmpW, tmpH, tmpFormat, 64); |
if (ret < 0) |
return ret; |
c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat, |
tmpW, tmpH, tmpFormat, |
flags, srcFilter, NULL, c->param); |
if (!c->cascaded_context[0]) |
return -1; |
c->cascaded_context[1] = sws_getContext(tmpW, tmpH, tmpFormat, |
dstW, dstH, dstFormat, |
flags, NULL, dstFilter, c->param); |
if (!c->cascaded_context[1]) |
return -1; |
return 0; |
} |
return -1; |
} |
SwsContext *sws_alloc_set_opts(int srcW, int srcH, enum AVPixelFormat srcFormat, |
int dstW, int dstH, enum AVPixelFormat dstFormat, |
int flags, const double *param) |
{ |
SwsContext *c; |
if (!(c = sws_alloc_context())) |
return NULL; |
c->flags = flags; |
c->srcW = srcW; |
c->srcH = srcH; |
c->dstW = dstW; |
c->dstH = dstH; |
c->srcFormat = srcFormat; |
c->dstFormat = dstFormat; |
if (param) { |
c->param[0] = param[0]; |
c->param[1] = param[1]; |
} |
return c; |
} |
SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, |
int dstW, int dstH, enum AVPixelFormat dstFormat, |
int flags, SwsFilter *srcFilter, |
SwsFilter *dstFilter, const double *param) |
{ |
SwsContext *c; |
c = sws_alloc_set_opts(srcW, srcH, srcFormat, |
dstW, dstH, dstFormat, |
flags, param); |
if (!c) |
return NULL; |
if (sws_init_context(c, srcFilter, dstFilter) < 0) { |
sws_freeContext(c); |
return NULL; |
} |
return c; |
} |
static int isnan_vec(SwsVector *a) |
{ |
int i; |
for (i=0; i<a->length; i++) |
if (isnan(a->coeff[i])) |
return 1; |
return 0; |
} |
static void makenan_vec(SwsVector *a) |
{ |
int i; |
for (i=0; i<a->length; i++) |
a->coeff[i] = NAN; |
} |
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, |
float lumaSharpen, float chromaSharpen, |
float chromaHShift, float chromaVShift, |
int verbose) |
{ |
SwsFilter *filter = av_malloc(sizeof(SwsFilter)); |
if (!filter) |
return NULL; |
if (lumaGBlur != 0.0) { |
filter->lumH = sws_getGaussianVec(lumaGBlur, 3.0); |
filter->lumV = sws_getGaussianVec(lumaGBlur, 3.0); |
} else { |
filter->lumH = sws_getIdentityVec(); |
filter->lumV = sws_getIdentityVec(); |
} |
if (chromaGBlur != 0.0) { |
filter->chrH = sws_getGaussianVec(chromaGBlur, 3.0); |
filter->chrV = sws_getGaussianVec(chromaGBlur, 3.0); |
} else { |
filter->chrH = sws_getIdentityVec(); |
filter->chrV = sws_getIdentityVec(); |
} |
if (!filter->lumH || !filter->lumV || !filter->chrH || !filter->chrV) |
goto fail; |
if (chromaSharpen != 0.0) { |
SwsVector *id = sws_getIdentityVec(); |
if (!id) |
goto fail; |
sws_scaleVec(filter->chrH, -chromaSharpen); |
sws_scaleVec(filter->chrV, -chromaSharpen); |
sws_addVec(filter->chrH, id); |
sws_addVec(filter->chrV, id); |
sws_freeVec(id); |
} |
if (lumaSharpen != 0.0) { |
SwsVector *id = sws_getIdentityVec(); |
if (!id) |
goto fail; |
sws_scaleVec(filter->lumH, -lumaSharpen); |
sws_scaleVec(filter->lumV, -lumaSharpen); |
sws_addVec(filter->lumH, id); |
sws_addVec(filter->lumV, id); |
sws_freeVec(id); |
} |
if (chromaHShift != 0.0) |
sws_shiftVec(filter->chrH, (int)(chromaHShift + 0.5)); |
if (chromaVShift != 0.0) |
sws_shiftVec(filter->chrV, (int)(chromaVShift + 0.5)); |
sws_normalizeVec(filter->chrH, 1.0); |
sws_normalizeVec(filter->chrV, 1.0); |
sws_normalizeVec(filter->lumH, 1.0); |
sws_normalizeVec(filter->lumV, 1.0); |
if (isnan_vec(filter->chrH) || |
isnan_vec(filter->chrV) || |
isnan_vec(filter->lumH) || |
isnan_vec(filter->lumV)) |
goto fail; |
if (verbose) |
sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG); |
if (verbose) |
sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG); |
return filter; |
fail: |
sws_freeVec(filter->lumH); |
sws_freeVec(filter->lumV); |
sws_freeVec(filter->chrH); |
sws_freeVec(filter->chrV); |
av_freep(&filter); |
return NULL; |
} |
SwsVector *sws_allocVec(int length) |
{ |
SwsVector *vec; |
if(length <= 0 || length > INT_MAX/ sizeof(double)) |
return NULL; |
vec = av_malloc(sizeof(SwsVector)); |
if (!vec) |
return NULL; |
vec->length = length; |
vec->coeff = av_malloc(sizeof(double) * length); |
if (!vec->coeff) |
av_freep(&vec); |
return vec; |
} |
SwsVector *sws_getGaussianVec(double variance, double quality) |
{ |
const int length = (int)(variance * quality + 0.5) | 1; |
int i; |
double middle = (length - 1) * 0.5; |
SwsVector *vec; |
if(variance < 0 || quality < 0) |
return NULL; |
vec = sws_allocVec(length); |
if (!vec) |
return NULL; |
for (i = 0; i < length; i++) { |
double dist = i - middle; |
vec->coeff[i] = exp(-dist * dist / (2 * variance * variance)) / |
sqrt(2 * variance * M_PI); |
} |
sws_normalizeVec(vec, 1.0); |
return vec; |
} |
SwsVector *sws_getConstVec(double c, int length) |
{ |
int i; |
SwsVector *vec = sws_allocVec(length); |
if (!vec) |
return NULL; |
for (i = 0; i < length; i++) |
vec->coeff[i] = c; |
return vec; |
} |
SwsVector *sws_getIdentityVec(void) |
{ |
return sws_getConstVec(1.0, 1); |
} |
static double sws_dcVec(SwsVector *a) |
{ |
int i; |
double sum = 0; |
for (i = 0; i < a->length; i++) |
sum += a->coeff[i]; |
return sum; |
} |
void sws_scaleVec(SwsVector *a, double scalar) |
{ |
int i; |
for (i = 0; i < a->length; i++) |
a->coeff[i] *= scalar; |
} |
void sws_normalizeVec(SwsVector *a, double height) |
{ |
sws_scaleVec(a, height / sws_dcVec(a)); |
} |
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b) |
{ |
int length = a->length + b->length - 1; |
int i, j; |
SwsVector *vec = sws_getConstVec(0.0, length); |
if (!vec) |
return NULL; |
for (i = 0; i < a->length; i++) { |
for (j = 0; j < b->length; j++) { |
vec->coeff[i + j] += a->coeff[i] * b->coeff[j]; |
} |
} |
return vec; |
} |
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b) |
{ |
int length = FFMAX(a->length, b->length); |
int i; |
SwsVector *vec = sws_getConstVec(0.0, length); |
if (!vec) |
return NULL; |
for (i = 0; i < a->length; i++) |
vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i]; |
for (i = 0; i < b->length; i++) |
vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] += b->coeff[i]; |
return vec; |
} |
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b) |
{ |
int length = FFMAX(a->length, b->length); |
int i; |
SwsVector *vec = sws_getConstVec(0.0, length); |
if (!vec) |
return NULL; |
for (i = 0; i < a->length; i++) |
vec->coeff[i + (length - 1) / 2 - (a->length - 1) / 2] += a->coeff[i]; |
for (i = 0; i < b->length; i++) |
vec->coeff[i + (length - 1) / 2 - (b->length - 1) / 2] -= b->coeff[i]; |
return vec; |
} |
/* shift left / or right if "shift" is negative */ |
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift) |
{ |
int length = a->length + FFABS(shift) * 2; |
int i; |
SwsVector *vec = sws_getConstVec(0.0, length); |
if (!vec) |
return NULL; |
for (i = 0; i < a->length; i++) { |
vec->coeff[i + (length - 1) / 2 - |
(a->length - 1) / 2 - shift] = a->coeff[i]; |
} |
return vec; |
} |
void sws_shiftVec(SwsVector *a, int shift) |
{ |
SwsVector *shifted = sws_getShiftedVec(a, shift); |
if (!shifted) { |
makenan_vec(a); |
return; |
} |
av_free(a->coeff); |
a->coeff = shifted->coeff; |
a->length = shifted->length; |
av_free(shifted); |
} |
void sws_addVec(SwsVector *a, SwsVector *b) |
{ |
SwsVector *sum = sws_sumVec(a, b); |
if (!sum) { |
makenan_vec(a); |
return; |
} |
av_free(a->coeff); |
a->coeff = sum->coeff; |
a->length = sum->length; |
av_free(sum); |
} |
void sws_subVec(SwsVector *a, SwsVector *b) |
{ |
SwsVector *diff = sws_diffVec(a, b); |
if (!diff) { |
makenan_vec(a); |
return; |
} |
av_free(a->coeff); |
a->coeff = diff->coeff; |
a->length = diff->length; |
av_free(diff); |
} |
void sws_convVec(SwsVector *a, SwsVector *b) |
{ |
SwsVector *conv = sws_getConvVec(a, b); |
if (!conv) { |
makenan_vec(a); |
return; |
} |
av_free(a->coeff); |
a->coeff = conv->coeff; |
a->length = conv->length; |
av_free(conv); |
} |
SwsVector *sws_cloneVec(SwsVector *a) |
{ |
SwsVector *vec = sws_allocVec(a->length); |
if (!vec) |
return NULL; |
memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff)); |
return vec; |
} |
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level) |
{ |
int i; |
double max = 0; |
double min = 0; |
double range; |
for (i = 0; i < a->length; i++) |
if (a->coeff[i] > max) |
max = a->coeff[i]; |
for (i = 0; i < a->length; i++) |
if (a->coeff[i] < min) |
min = a->coeff[i]; |
range = max - min; |
for (i = 0; i < a->length; i++) { |
int x = (int)((a->coeff[i] - min) * 60.0 / range + 0.5); |
av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]); |
for (; x > 0; x--) |
av_log(log_ctx, log_level, " "); |
av_log(log_ctx, log_level, "|\n"); |
} |
} |
void sws_freeVec(SwsVector *a) |
{ |
if (!a) |
return; |
av_freep(&a->coeff); |
a->length = 0; |
av_free(a); |
} |
void sws_freeFilter(SwsFilter *filter) |
{ |
if (!filter) |
return; |
sws_freeVec(filter->lumH); |
sws_freeVec(filter->lumV); |
sws_freeVec(filter->chrH); |
sws_freeVec(filter->chrV); |
av_free(filter); |
} |
void sws_freeContext(SwsContext *c) |
{ |
int i; |
if (!c) |
return; |
if (c->lumPixBuf) { |
for (i = 0; i < c->vLumBufSize; i++) |
av_freep(&c->lumPixBuf[i]); |
av_freep(&c->lumPixBuf); |
} |
if (c->chrUPixBuf) { |
for (i = 0; i < c->vChrBufSize; i++) |
av_freep(&c->chrUPixBuf[i]); |
av_freep(&c->chrUPixBuf); |
av_freep(&c->chrVPixBuf); |
} |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
for (i = 0; i < c->vLumBufSize; i++) |
av_freep(&c->alpPixBuf[i]); |
av_freep(&c->alpPixBuf); |
} |
for (i = 0; i < 4; i++) |
av_freep(&c->dither_error[i]); |
av_freep(&c->vLumFilter); |
av_freep(&c->vChrFilter); |
av_freep(&c->hLumFilter); |
av_freep(&c->hChrFilter); |
#if HAVE_ALTIVEC |
av_freep(&c->vYCoeffsBank); |
av_freep(&c->vCCoeffsBank); |
#endif |
av_freep(&c->vLumFilterPos); |
av_freep(&c->vChrFilterPos); |
av_freep(&c->hLumFilterPos); |
av_freep(&c->hChrFilterPos); |
#if HAVE_MMX_INLINE |
#if USE_MMAP |
if (c->lumMmxextFilterCode) |
munmap(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize); |
if (c->chrMmxextFilterCode) |
munmap(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize); |
#elif HAVE_VIRTUALALLOC |
if (c->lumMmxextFilterCode) |
VirtualFree(c->lumMmxextFilterCode, 0, MEM_RELEASE); |
if (c->chrMmxextFilterCode) |
VirtualFree(c->chrMmxextFilterCode, 0, MEM_RELEASE); |
#else |
av_free(c->lumMmxextFilterCode); |
av_free(c->chrMmxextFilterCode); |
#endif |
c->lumMmxextFilterCode = NULL; |
c->chrMmxextFilterCode = NULL; |
#endif /* HAVE_MMX_INLINE */ |
av_freep(&c->yuvTable); |
av_freep(&c->formatConvBuffer); |
sws_freeContext(c->cascaded_context[0]); |
sws_freeContext(c->cascaded_context[1]); |
sws_freeContext(c->cascaded_context[2]); |
memset(c->cascaded_context, 0, sizeof(c->cascaded_context)); |
av_freep(&c->cascaded_tmp[0]); |
av_freep(&c->cascaded1_tmp[0]); |
av_freep(&c->gamma); |
av_freep(&c->inv_gamma); |
ff_free_filters(c); |
av_free(c); |
} |
struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW, |
int srcH, enum AVPixelFormat srcFormat, |
int dstW, int dstH, |
enum AVPixelFormat dstFormat, int flags, |
SwsFilter *srcFilter, |
SwsFilter *dstFilter, |
const double *param) |
{ |
static const double default_param[2] = { SWS_PARAM_DEFAULT, |
SWS_PARAM_DEFAULT }; |
int64_t src_h_chr_pos = -513, dst_h_chr_pos = -513, |
src_v_chr_pos = -513, dst_v_chr_pos = -513; |
if (!param) |
param = default_param; |
if (context && |
(context->srcW != srcW || |
context->srcH != srcH || |
context->srcFormat != srcFormat || |
context->dstW != dstW || |
context->dstH != dstH || |
context->dstFormat != dstFormat || |
context->flags != flags || |
context->param[0] != param[0] || |
context->param[1] != param[1])) { |
av_opt_get_int(context, "src_h_chr_pos", 0, &src_h_chr_pos); |
av_opt_get_int(context, "src_v_chr_pos", 0, &src_v_chr_pos); |
av_opt_get_int(context, "dst_h_chr_pos", 0, &dst_h_chr_pos); |
av_opt_get_int(context, "dst_v_chr_pos", 0, &dst_v_chr_pos); |
sws_freeContext(context); |
context = NULL; |
} |
if (!context) { |
if (!(context = sws_alloc_context())) |
return NULL; |
context->srcW = srcW; |
context->srcH = srcH; |
context->srcFormat = srcFormat; |
context->dstW = dstW; |
context->dstH = dstH; |
context->dstFormat = dstFormat; |
context->flags = flags; |
context->param[0] = param[0]; |
context->param[1] = param[1]; |
av_opt_set_int(context, "src_h_chr_pos", src_h_chr_pos, 0); |
av_opt_set_int(context, "src_v_chr_pos", src_v_chr_pos, 0); |
av_opt_set_int(context, "dst_h_chr_pos", dst_h_chr_pos, 0); |
av_opt_set_int(context, "dst_v_chr_pos", dst_v_chr_pos, 0); |
if (sws_init_context(context, srcFilter, dstFilter) < 0) { |
sws_freeContext(context); |
return NULL; |
} |
} |
return context; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/version.h |
---|
0,0 → 1,56 |
/* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#ifndef SWSCALE_VERSION_H |
#define SWSCALE_VERSION_H |
/** |
* @file |
* swscale version macros |
*/ |
#include "libavutil/version.h" |
#define LIBSWSCALE_VERSION_MAJOR 3 |
#define LIBSWSCALE_VERSION_MINOR 1 |
#define LIBSWSCALE_VERSION_MICRO 101 |
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ |
LIBSWSCALE_VERSION_MINOR, \ |
LIBSWSCALE_VERSION_MICRO) |
#define LIBSWSCALE_VERSION AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \ |
LIBSWSCALE_VERSION_MINOR, \ |
LIBSWSCALE_VERSION_MICRO) |
#define LIBSWSCALE_BUILD LIBSWSCALE_VERSION_INT |
#define LIBSWSCALE_IDENT "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION) |
/** |
* FF_API_* defines may be placed below to indicate public API that will be |
* dropped at a future version bump. The defines themselves are not part of |
* the public API and may change, break or disappear at any time. |
*/ |
#ifndef FF_API_SWS_CPU_CAPS |
#define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 4) |
#endif |
#ifndef FF_API_ARCH_BFIN |
#define FF_API_ARCH_BFIN (LIBSWSCALE_VERSION_MAJOR < 4) |
#endif |
#endif /* SWSCALE_VERSION_H */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/vscale.c |
---|
0,0 → 1,315 |
/* |
* Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "swscale_internal.h" |
typedef struct VScalerContext |
{ |
uint16_t *filter[2]; |
int32_t *filter_pos; |
int filter_size; |
int isMMX; |
void *pfn; |
yuv2packedX_fn yuv2packedX; |
} VScalerContext; |
static int lum_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
VScalerContext *inst = desc->instance; |
int dstW = desc->dst->width; |
int first = FFMAX(1-inst->filter_size, inst->filter_pos[sliceY]); |
int sp = first - desc->src->plane[0].sliceY; |
int dp = sliceY - desc->dst->plane[0].sliceY; |
uint8_t **src = desc->src->plane[0].line + sp; |
uint8_t **dst = desc->dst->plane[0].line + dp; |
uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : sliceY * inst->filter_size); |
if (inst->filter_size == 1) |
((yuv2planar1_fn)inst->pfn)((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0); |
else |
((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0); |
if (desc->alpha) { |
int sp = first - desc->src->plane[3].sliceY; |
int dp = sliceY - desc->dst->plane[3].sliceY; |
uint8_t **src = desc->src->plane[3].line + sp; |
uint8_t **dst = desc->dst->plane[3].line + dp; |
uint16_t *filter = inst->filter[1] + (inst->isMMX ? 0 : sliceY * inst->filter_size); |
if (inst->filter_size == 1) |
((yuv2planar1_fn)inst->pfn)((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0); |
else |
((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0); |
} |
return 1; |
} |
static int chr_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
const int chrSkipMask = (1 << desc->dst->v_chr_sub_sample) - 1; |
if (sliceY & chrSkipMask) |
return 0; |
else { |
VScalerContext *inst = desc->instance; |
int dstW = FF_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample); |
int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample; |
int first = FFMAX(1-inst->filter_size, inst->filter_pos[chrSliceY]); |
int sp1 = first - desc->src->plane[1].sliceY; |
int sp2 = first - desc->src->plane[2].sliceY; |
int dp1 = chrSliceY - desc->dst->plane[1].sliceY; |
int dp2 = chrSliceY - desc->dst->plane[2].sliceY; |
uint8_t **src1 = desc->src->plane[1].line + sp1; |
uint8_t **src2 = desc->src->plane[2].line + sp2; |
uint8_t **dst1 = desc->dst->plane[1].line + dp1; |
uint8_t **dst2 = desc->dst->plane[2].line + dp2; |
uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : chrSliceY * inst->filter_size); |
if (c->yuv2nv12cX) { |
((yuv2interleavedX_fn)inst->pfn)(c, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW); |
} else if (inst->filter_size == 1) { |
((yuv2planar1_fn)inst->pfn)((const int16_t*)src1[0], dst1[0], dstW, c->chrDither8, 0); |
((yuv2planar1_fn)inst->pfn)((const int16_t*)src2[0], dst2[0], dstW, c->chrDither8, 3); |
} else { |
((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src1, dst1[0], dstW, c->chrDither8, 0); |
((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src2, dst2[0], dstW, c->chrDither8, inst->isMMX ? (c->uv_offx2 >> 1) : 3); |
} |
} |
return 1; |
} |
static int packed_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
VScalerContext *inst = desc->instance; |
int dstW = desc->dst->width; |
int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample; |
int lum_fsize = inst[0].filter_size; |
int chr_fsize = inst[1].filter_size; |
uint16_t *lum_filter = inst[0].filter[0]; |
uint16_t *chr_filter = inst[1].filter[0]; |
int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[chrSliceY]); |
int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]); |
int sp0 = firstLum - desc->src->plane[0].sliceY; |
int sp1 = firstChr - desc->src->plane[1].sliceY; |
int sp2 = firstChr - desc->src->plane[2].sliceY; |
int sp3 = firstLum - desc->src->plane[3].sliceY; |
int dp = sliceY - desc->dst->plane[0].sliceY; |
uint8_t **src0 = desc->src->plane[0].line + sp0; |
uint8_t **src1 = desc->src->plane[1].line + sp1; |
uint8_t **src2 = desc->src->plane[2].line + sp2; |
uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL; |
uint8_t **dst = desc->dst->plane[0].line + dp; |
if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 1) { // unscaled RGB |
((yuv2packed1_fn)inst->pfn)(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2, |
(const int16_t*)(desc->alpha ? *src3 : NULL), *dst, dstW, 0, sliceY); |
} else if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2 && |
chr_filter[2 * sliceY + 1] + chr_filter[2 * chrSliceY] == 4096 && |
chr_filter[2 * sliceY + 1] <= 4096U) { // unscaled RGB |
int chrAlpha = chr_filter[2 * sliceY + 1]; |
((yuv2packed1_fn)inst->pfn)(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2, |
(const int16_t*)(desc->alpha ? *src3 : NULL), *dst, dstW, chrAlpha, sliceY); |
} else if (c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2 && |
lum_filter[2 * sliceY + 1] + lum_filter[2 * sliceY] == 4096 && |
lum_filter[2 * sliceY + 1] <= 4096U && |
chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 && |
chr_filter[2 * chrSliceY + 1] <= 4096U |
) { // bilinear upscale RGB |
int lumAlpha = lum_filter[2 * sliceY + 1]; |
int chrAlpha = chr_filter[2 * sliceY + 1]; |
c->lumMmxFilter[2] = |
c->lumMmxFilter[3] = lum_filter[2 * sliceY] * 0x10001; |
c->chrMmxFilter[2] = |
c->chrMmxFilter[3] = chr_filter[2 * chrSliceY] * 0x10001; |
((yuv2packed2_fn)inst->pfn)(c, (const int16_t**)src0, (const int16_t**)src1, (const int16_t**)src2, (const int16_t**)src3, |
*dst, dstW, lumAlpha, chrAlpha, sliceY); |
} else { // general RGB |
if ((c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2) || |
(c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2)) { |
if (!c->warned_unuseable_bilinear) |
av_log(c, AV_LOG_INFO, "Optimized 2 tap filter code cannot be used\n"); |
c->warned_unuseable_bilinear = 1; |
} |
inst->yuv2packedX(c, lum_filter + sliceY * lum_fsize, |
(const int16_t**)src0, lum_fsize, chr_filter + sliceY * chr_fsize, |
(const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, *dst, dstW, sliceY); |
} |
return 1; |
} |
static int any_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH) |
{ |
VScalerContext *inst = desc->instance; |
int dstW = desc->dst->width; |
int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample; |
int lum_fsize = inst[0].filter_size; |
int chr_fsize = inst[1].filter_size; |
uint16_t *lum_filter = inst[0].filter[0]; |
uint16_t *chr_filter = inst[1].filter[0]; |
int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[chrSliceY]); |
int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]); |
int sp0 = firstLum - desc->src->plane[0].sliceY; |
int sp1 = firstChr - desc->src->plane[1].sliceY; |
int sp2 = firstChr - desc->src->plane[2].sliceY; |
int sp3 = firstLum - desc->src->plane[3].sliceY; |
int dp0 = sliceY - desc->dst->plane[0].sliceY; |
int dp1 = chrSliceY - desc->dst->plane[1].sliceY; |
int dp2 = chrSliceY - desc->dst->plane[2].sliceY; |
int dp3 = sliceY - desc->dst->plane[3].sliceY; |
uint8_t **src0 = desc->src->plane[0].line + sp0; |
uint8_t **src1 = desc->src->plane[1].line + sp1; |
uint8_t **src2 = desc->src->plane[2].line + sp2; |
uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL; |
uint8_t *dst[4] = { desc->dst->plane[0].line[dp0], |
desc->dst->plane[1].line[dp1], |
desc->dst->plane[2].line[dp2], |
desc->alpha ? desc->dst->plane[3].line[dp3] : NULL }; |
av_assert1(!c->yuv2packed1 && !c->yuv2packed2); |
((yuv2anyX_fn)inst->pfn)(c, lum_filter + sliceY * lum_fsize, |
(const int16_t**)src0, lum_fsize, chr_filter + sliceY * chr_fsize, |
(const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, dst, dstW, sliceY); |
return 1; |
} |
int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst) |
{ |
VScalerContext *lumCtx = NULL; |
VScalerContext *chrCtx = NULL; |
if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) { |
lumCtx = av_mallocz(sizeof(VScalerContext)); |
if (!lumCtx) |
return AVERROR(ENOMEM); |
desc[0].process = lum_planar_vscale; |
desc[0].instance = lumCtx; |
desc[0].src = src; |
desc[0].dst = dst; |
desc[0].alpha = c->alpPixBuf != 0; |
if (!isGray(c->dstFormat)) { |
chrCtx = av_mallocz(sizeof(VScalerContext)); |
if (!chrCtx) |
return AVERROR(ENOMEM); |
desc[1].process = chr_planar_vscale; |
desc[1].instance = chrCtx; |
desc[1].src = src; |
desc[1].dst = dst; |
} |
} else { |
lumCtx = av_mallocz_array(sizeof(VScalerContext), 2); |
if (!lumCtx) |
return AVERROR(ENOMEM); |
chrCtx = &lumCtx[1]; |
desc[0].process = c->yuv2packedX ? packed_vscale : any_vscale; |
desc[0].instance = lumCtx; |
desc[0].src = src; |
desc[0].dst = dst; |
desc[0].alpha = c->alpPixBuf != 0; |
} |
ff_init_vscale_pfn(c, c->yuv2plane1, c->yuv2planeX, c->yuv2nv12cX, |
c->yuv2packed1, c->yuv2packed2, c->yuv2packedX, c->yuv2anyX, c->use_mmx_vfilter); |
return 0; |
} |
void ff_init_vscale_pfn(SwsContext *c, |
yuv2planar1_fn yuv2plane1, |
yuv2planarX_fn yuv2planeX, |
yuv2interleavedX_fn yuv2nv12cX, |
yuv2packed1_fn yuv2packed1, |
yuv2packed2_fn yuv2packed2, |
yuv2packedX_fn yuv2packedX, |
yuv2anyX_fn yuv2anyX, int use_mmx) |
{ |
VScalerContext *lumCtx = NULL; |
VScalerContext *chrCtx = NULL; |
int idx = c->numDesc - (c->is_internal_gamma ? 2 : 1); |
if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) { |
if (!isGray(c->dstFormat)) { |
chrCtx = c->desc[idx].instance; |
chrCtx->filter[0] = use_mmx ? (int16_t*)c->chrMmxFilter : c->vChrFilter; |
chrCtx->filter_size = c->vChrFilterSize; |
chrCtx->filter_pos = c->vChrFilterPos; |
chrCtx->isMMX = use_mmx; |
--idx; |
if (yuv2nv12cX) chrCtx->pfn = yuv2nv12cX; |
else if (c->vChrFilterSize == 1) chrCtx->pfn = yuv2plane1; |
else chrCtx->pfn = yuv2planeX; |
} |
lumCtx = c->desc[idx].instance; |
lumCtx->filter[0] = use_mmx ? (int16_t*)c->lumMmxFilter : c->vLumFilter; |
lumCtx->filter[1] = use_mmx ? (int16_t*)c->alpMmxFilter : c->vLumFilter; |
lumCtx->filter_size = c->vLumFilterSize; |
lumCtx->filter_pos = c->vLumFilterPos; |
lumCtx->isMMX = use_mmx; |
if (c->vLumFilterSize == 1) lumCtx->pfn = yuv2plane1; |
else lumCtx->pfn = yuv2planeX; |
} else { |
lumCtx = c->desc[idx].instance; |
chrCtx = &lumCtx[1]; |
lumCtx->filter[0] = c->vLumFilter; |
lumCtx->filter_size = c->vLumFilterSize; |
lumCtx->filter_pos = c->vLumFilterPos; |
chrCtx->filter[0] = c->vChrFilter; |
chrCtx->filter_size = c->vChrFilterSize; |
chrCtx->filter_pos = c->vChrFilterPos; |
lumCtx->isMMX = use_mmx; |
chrCtx->isMMX = use_mmx; |
if (yuv2packedX) { |
if (c->yuv2packed1 && c->vLumFilterSize == 1 && c->vChrFilterSize <= 2) |
lumCtx->pfn = yuv2packed1; |
else if (c->yuv2packed2 && c->vLumFilterSize == 2 && c->vChrFilterSize == 2) |
lumCtx->pfn = yuv2packed2; |
lumCtx->yuv2packedX = yuv2packedX; |
} else |
lumCtx->pfn = yuv2anyX; |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/Makefile |
---|
0,0 → 1,13 |
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) |
OBJS += x86/rgb2rgb.o \ |
x86/swscale.o \ |
x86/yuv2rgb.o \ |
MMX-OBJS += x86/hscale_fast_bilinear_simd.o \ |
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o |
YASM-OBJS += x86/input.o \ |
x86/output.o \ |
x86/scale.o \ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/hscale_fast_bilinear_simd.c |
---|
0,0 → 1,359 |
/* |
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "../swscale_internal.h" |
#include "libavutil/x86/asm.h" |
#include "libavutil/x86/cpu.h" |
#define RET 0xC3 // near return opcode for x86 |
#define PREFETCH "prefetchnta" |
#if HAVE_INLINE_ASM |
av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, |
int16_t *filter, int32_t *filterPos, |
int numSplits) |
{ |
uint8_t *fragmentA; |
x86_reg imm8OfPShufW1A; |
x86_reg imm8OfPShufW2A; |
x86_reg fragmentLengthA; |
uint8_t *fragmentB; |
x86_reg imm8OfPShufW1B; |
x86_reg imm8OfPShufW2B; |
x86_reg fragmentLengthB; |
int fragmentPos; |
int xpos, i; |
// create an optimized horizontal scaling routine |
/* This scaler is made of runtime-generated MMXEXT code using specially tuned |
* pshufw instructions. For every four output pixels, if four input pixels |
* are enough for the fast bilinear scaling, then a chunk of fragmentB is |
* used. If five input pixels are needed, then a chunk of fragmentA is used. |
*/ |
// code fragment |
__asm__ volatile ( |
"jmp 9f \n\t" |
// Begin |
"0: \n\t" |
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" |
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" |
"movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm0 \n\t" |
"pshufw $0xFF, %%mm1, %%mm1 \n\t" |
"1: \n\t" |
"pshufw $0xFF, %%mm0, %%mm0 \n\t" |
"2: \n\t" |
"psubw %%mm1, %%mm0 \n\t" |
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" |
"pmullw %%mm3, %%mm0 \n\t" |
"psllw $7, %%mm1 \n\t" |
"paddw %%mm1, %%mm0 \n\t" |
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" |
"add $8, %%"REG_a" \n\t" |
// End |
"9: \n\t" |
"lea " LOCAL_MANGLE(0b) ", %0 \n\t" |
"lea " LOCAL_MANGLE(1b) ", %1 \n\t" |
"lea " LOCAL_MANGLE(2b) ", %2 \n\t" |
"dec %1 \n\t" |
"dec %2 \n\t" |
"sub %0, %1 \n\t" |
"sub %0, %2 \n\t" |
"lea " LOCAL_MANGLE(9b) ", %3 \n\t" |
"sub %0, %3 \n\t" |
: "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), |
"=r" (fragmentLengthA) |
); |
__asm__ volatile ( |
"jmp 9f \n\t" |
// Begin |
"0: \n\t" |
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" |
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" |
"punpcklbw %%mm7, %%mm0 \n\t" |
"pshufw $0xFF, %%mm0, %%mm1 \n\t" |
"1: \n\t" |
"pshufw $0xFF, %%mm0, %%mm0 \n\t" |
"2: \n\t" |
"psubw %%mm1, %%mm0 \n\t" |
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" |
"pmullw %%mm3, %%mm0 \n\t" |
"psllw $7, %%mm1 \n\t" |
"paddw %%mm1, %%mm0 \n\t" |
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" |
"add $8, %%"REG_a" \n\t" |
// End |
"9: \n\t" |
"lea " LOCAL_MANGLE(0b) ", %0 \n\t" |
"lea " LOCAL_MANGLE(1b) ", %1 \n\t" |
"lea " LOCAL_MANGLE(2b) ", %2 \n\t" |
"dec %1 \n\t" |
"dec %2 \n\t" |
"sub %0, %1 \n\t" |
"sub %0, %2 \n\t" |
"lea " LOCAL_MANGLE(9b) ", %3 \n\t" |
"sub %0, %3 \n\t" |
: "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), |
"=r" (fragmentLengthB) |
); |
xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers |
fragmentPos = 0; |
for (i = 0; i < dstW / numSplits; i++) { |
int xx = xpos >> 16; |
if ((i & 3) == 0) { |
int a = 0; |
int b = ((xpos + xInc) >> 16) - xx; |
int c = ((xpos + xInc * 2) >> 16) - xx; |
int d = ((xpos + xInc * 3) >> 16) - xx; |
int inc = (d + 1 < 4); |
uint8_t *fragment = inc ? fragmentB : fragmentA; |
x86_reg imm8OfPShufW1 = inc ? imm8OfPShufW1B : imm8OfPShufW1A; |
x86_reg imm8OfPShufW2 = inc ? imm8OfPShufW2B : imm8OfPShufW2A; |
x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA; |
int maxShift = 3 - (d + inc); |
int shift = 0; |
if (filterCode) { |
filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9; |
filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9; |
filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9; |
filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9; |
filterPos[i / 2] = xx; |
memcpy(filterCode + fragmentPos, fragment, fragmentLength); |
filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) | |
((b + inc) << 2) | |
((c + inc) << 4) | |
((d + inc) << 6); |
filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) | |
(c << 4) | |
(d << 6); |
if (i + 4 - inc >= dstW) |
shift = maxShift; // avoid overread |
else if ((filterPos[i / 2] & 3) <= maxShift) |
shift = filterPos[i / 2] & 3; // align |
if (shift && i >= shift) { |
filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift; |
filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift; |
filterPos[i / 2] -= shift; |
} |
} |
fragmentPos += fragmentLength; |
if (filterCode) |
filterCode[fragmentPos] = RET; |
} |
xpos += xInc; |
} |
if (filterCode) |
filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part |
return fragmentPos + 1; |
} |
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, |
int dstWidth, const uint8_t *src, |
int srcW, int xInc) |
{ |
int32_t *filterPos = c->hLumFilterPos; |
int16_t *filter = c->hLumFilter; |
void *mmxextFilterCode = c->lumMmxextFilterCode; |
int i; |
#if ARCH_X86_64 |
uint64_t retsave; |
#else |
#if defined(PIC) |
uint64_t ebxsave; |
#endif |
#endif |
__asm__ volatile( |
#if ARCH_X86_64 |
"mov -8(%%rsp), %%"REG_a" \n\t" |
"mov %%"REG_a", %5 \n\t" // retsave |
#else |
#if defined(PIC) |
"mov %%"REG_b", %5 \n\t" // ebxsave |
#endif |
#endif |
"pxor %%mm7, %%mm7 \n\t" |
"mov %0, %%"REG_c" \n\t" |
"mov %1, %%"REG_D" \n\t" |
"mov %2, %%"REG_d" \n\t" |
"mov %3, %%"REG_b" \n\t" |
"xor %%"REG_a", %%"REG_a" \n\t" // i |
PREFETCH" (%%"REG_c") \n\t" |
PREFETCH" 32(%%"REG_c") \n\t" |
PREFETCH" 64(%%"REG_c") \n\t" |
#if ARCH_X86_64 |
#define CALL_MMXEXT_FILTER_CODE \ |
"movl (%%"REG_b"), %%esi \n\t"\ |
"call *%4 \n\t"\ |
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ |
"add %%"REG_S", %%"REG_c" \n\t"\ |
"add %%"REG_a", %%"REG_D" \n\t"\ |
"xor %%"REG_a", %%"REG_a" \n\t"\ |
#else |
#define CALL_MMXEXT_FILTER_CODE \ |
"movl (%%"REG_b"), %%esi \n\t"\ |
"call *%4 \n\t"\ |
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ |
"add %%"REG_a", %%"REG_D" \n\t"\ |
"xor %%"REG_a", %%"REG_a" \n\t"\ |
#endif /* ARCH_X86_64 */ |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
#if ARCH_X86_64 |
"mov %5, %%"REG_a" \n\t" |
"mov %%"REG_a", -8(%%rsp) \n\t" |
#else |
#if defined(PIC) |
"mov %5, %%"REG_b" \n\t" |
#endif |
#endif |
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), |
"m" (mmxextFilterCode) |
#if ARCH_X86_64 |
,"m"(retsave) |
#else |
#if defined(PIC) |
,"m" (ebxsave) |
#endif |
#endif |
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D |
#if ARCH_X86_64 || !defined(PIC) |
,"%"REG_b |
#endif |
); |
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) |
dst[i] = src[srcW-1]*128; |
} |
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, |
int dstWidth, const uint8_t *src1, |
const uint8_t *src2, int srcW, int xInc) |
{ |
int32_t *filterPos = c->hChrFilterPos; |
int16_t *filter = c->hChrFilter; |
void *mmxextFilterCode = c->chrMmxextFilterCode; |
int i; |
#if ARCH_X86_64 |
DECLARE_ALIGNED(8, uint64_t, retsave); |
#else |
#if defined(PIC) |
DECLARE_ALIGNED(8, uint64_t, ebxsave); |
#endif |
#endif |
__asm__ volatile( |
#if ARCH_X86_64 |
"mov -8(%%rsp), %%"REG_a" \n\t" |
"mov %%"REG_a", %7 \n\t" // retsave |
#else |
#if defined(PIC) |
"mov %%"REG_b", %7 \n\t" // ebxsave |
#endif |
#endif |
"pxor %%mm7, %%mm7 \n\t" |
"mov %0, %%"REG_c" \n\t" |
"mov %1, %%"REG_D" \n\t" |
"mov %2, %%"REG_d" \n\t" |
"mov %3, %%"REG_b" \n\t" |
"xor %%"REG_a", %%"REG_a" \n\t" // i |
PREFETCH" (%%"REG_c") \n\t" |
PREFETCH" 32(%%"REG_c") \n\t" |
PREFETCH" 64(%%"REG_c") \n\t" |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
"xor %%"REG_a", %%"REG_a" \n\t" // i |
"mov %5, %%"REG_c" \n\t" // src2 |
"mov %6, %%"REG_D" \n\t" // dst2 |
PREFETCH" (%%"REG_c") \n\t" |
PREFETCH" 32(%%"REG_c") \n\t" |
PREFETCH" 64(%%"REG_c") \n\t" |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
CALL_MMXEXT_FILTER_CODE |
#if ARCH_X86_64 |
"mov %7, %%"REG_a" \n\t" |
"mov %%"REG_a", -8(%%rsp) \n\t" |
#else |
#if defined(PIC) |
"mov %7, %%"REG_b" \n\t" |
#endif |
#endif |
:: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), |
"m" (mmxextFilterCode), "m" (src2), "m"(dst2) |
#if ARCH_X86_64 |
,"m"(retsave) |
#else |
#if defined(PIC) |
,"m" (ebxsave) |
#endif |
#endif |
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D |
#if ARCH_X86_64 || !defined(PIC) |
,"%"REG_b |
#endif |
); |
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { |
dst1[i] = src1[srcW-1]*128; |
dst2[i] = src2[srcW-1]*128; |
} |
} |
#endif //HAVE_INLINE_ASM |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/input.asm |
---|
0,0 → 1,740 |
;****************************************************************************** |
;* x86-optimized input routines; does shuffling of packed |
;* YUV formats into individual planes, and converts RGB |
;* into YUV planes also. |
;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> |
;* |
;* This file is part of FFmpeg. |
;* |
;* FFmpeg is free software; you can redistribute it and/or |
;* modify it under the terms of the GNU Lesser General Public |
;* License as published by the Free Software Foundation; either |
;* version 2.1 of the License, or (at your option) any later version. |
;* |
;* FFmpeg is distributed in the hope that it will be useful, |
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
;* Lesser General Public License for more details. |
;* |
;* You should have received a copy of the GNU Lesser General Public |
;* License along with FFmpeg; if not, write to the Free Software |
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
;****************************************************************************** |
%include "libavutil/x86/x86util.asm" |
SECTION_RODATA |
%define RY 0x20DE |
%define GY 0x4087 |
%define BY 0x0C88 |
%define RU 0xECFF |
%define GU 0xDAC8 |
%define BU 0x3838 |
%define RV 0x3838 |
%define GV 0xD0E3 |
%define BV 0xF6E4 |
rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15 |
rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15 |
%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq |
%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq |
%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq |
%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq |
%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq |
%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq |
%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq |
%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq |
%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq |
%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq |
%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq |
%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq |
%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq |
%define rgba_Ycoeff_br 16*4 + 16*13 + tableq |
%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq |
%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq |
%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq |
%define rgba_Ucoeff_br 16*4 + 16*17 + tableq |
%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq |
%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq |
%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq |
%define rgba_Vcoeff_br 16*4 + 16*21 + tableq |
%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq |
%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq |
; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY |
; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY |
; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY |
; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY |
; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU |
; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU |
; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU |
; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU |
; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV |
; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV |
; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV |
; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV |
; rgba_Ycoeff_rb: times 4 dw RY, BY |
; rgba_Ycoeff_br: times 4 dw BY, RY |
; rgba_Ycoeff_ga: times 4 dw GY, 0 |
; rgba_Ycoeff_ag: times 4 dw 0, GY |
; rgba_Ucoeff_rb: times 4 dw RU, BU |
; rgba_Ucoeff_br: times 4 dw BU, RU |
; rgba_Ucoeff_ga: times 4 dw GU, 0 |
; rgba_Ucoeff_ag: times 4 dw 0, GU |
; rgba_Vcoeff_rb: times 4 dw RV, BV |
; rgba_Vcoeff_br: times 4 dw BV, RV |
; rgba_Vcoeff_ga: times 4 dw GV, 0 |
; rgba_Vcoeff_ag: times 4 dw 0, GV |
shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ |
6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 |
shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \ |
8, 0x80, 9, 0x80, 10, 0x80, 11, 0x80 |
SECTION .text |
;----------------------------------------------------------------------------- |
; RGB to Y/UV. |
; |
; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w); |
; and |
; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, |
; const uint8_t *unused, int w); |
;----------------------------------------------------------------------------- |
; %1 = nr. of XMM registers |
; %2 = rgb or bgr |
%macro RGB24_TO_Y_FN 2-3 |
cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table |
%if mmsize == 8 |
mova m5, [%2_Ycoeff_12x4] |
mova m6, [%2_Ycoeff_3x56] |
%define coeff1 m5 |
%define coeff2 m6 |
%elif ARCH_X86_64 |
mova m8, [%2_Ycoeff_12x4] |
mova m9, [%2_Ycoeff_3x56] |
%define coeff1 m8 |
%define coeff2 m9 |
%else ; x86-32 && mmsize == 16 |
%define coeff1 [%2_Ycoeff_12x4] |
%define coeff2 [%2_Ycoeff_3x56] |
%endif ; x86-32/64 && mmsize == 8/16 |
%if (ARCH_X86_64 || mmsize == 8) && %0 == 3 |
jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body |
%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 |
.body: |
%if cpuflag(ssse3) |
mova m7, [shuf_rgb_12x4] |
%define shuf_rgb1 m7 |
%if ARCH_X86_64 |
mova m10, [shuf_rgb_3x56] |
%define shuf_rgb2 m10 |
%else ; x86-32 |
%define shuf_rgb2 [shuf_rgb_3x56] |
%endif ; x86-32/64 |
%endif ; cpuflag(ssse3) |
%if ARCH_X86_64 |
movsxd wq, wd |
%endif |
add wq, wq |
add dstq, wq |
neg wq |
%if notcpuflag(ssse3) |
pxor m7, m7 |
%endif ; !cpuflag(ssse3) |
mova m4, [rgb_Yrnd] |
.loop: |
%if cpuflag(ssse3) |
movu m0, [srcq+0] ; (byte) { Bx, Gx, Rx }[0-3] |
movu m2, [srcq+12] ; (byte) { Bx, Gx, Rx }[4-7] |
pshufb m1, m0, shuf_rgb2 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } |
pshufb m0, shuf_rgb1 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } |
pshufb m3, m2, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } |
pshufb m2, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } |
%else ; !cpuflag(ssse3) |
movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 } |
movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } |
movd m2, [srcq+6] ; (byte) { B2, G2, R2, B3 } |
movd m3, [srcq+8] ; (byte) { R2, B3, G3, R3 } |
%if mmsize == 16 ; i.e. sse2 |
punpckldq m0, m2 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } |
punpckldq m1, m3 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } |
movd m2, [srcq+12] ; (byte) { B4, G4, R4, B5 } |
movd m3, [srcq+14] ; (byte) { R4, B5, G5, R5 } |
movd m5, [srcq+18] ; (byte) { B6, G6, R6, B7 } |
movd m6, [srcq+20] ; (byte) { R6, B7, G7, R7 } |
punpckldq m2, m5 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } |
punpckldq m3, m6 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } |
%endif ; mmsize == 16 |
punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } |
punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } |
punpcklbw m2, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } |
punpcklbw m3, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } |
%endif ; cpuflag(ssse3) |
add srcq, 3 * mmsize / 2 |
pmaddwd m0, coeff1 ; (dword) { B0*BY + G0*GY, B1*BY, B2*BY + G2*GY, B3*BY } |
pmaddwd m1, coeff2 ; (dword) { R0*RY, G1+GY + R1*RY, R2*RY, G3+GY + R3*RY } |
pmaddwd m2, coeff1 ; (dword) { B4*BY + G4*GY, B5*BY, B6*BY + G6*GY, B7*BY } |
pmaddwd m3, coeff2 ; (dword) { R4*RY, G5+GY + R5*RY, R6*RY, G7+GY + R7*RY } |
paddd m0, m1 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[0-3] |
paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7] |
paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] } |
paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] } |
psrad m0, 9 |
psrad m2, 9 |
packssdw m0, m2 ; (word) { Y[0-7] } |
mova [dstq+wq], m0 |
add wq, mmsize |
jl .loop |
REP_RET |
%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 |
%endmacro |
; %1 = nr. of XMM registers |
; %2 = rgb or bgr |
%macro RGB24_TO_UV_FN 2-3 |
cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table |
%if ARCH_X86_64 |
mova m8, [%2_Ucoeff_12x4] |
mova m9, [%2_Ucoeff_3x56] |
mova m10, [%2_Vcoeff_12x4] |
mova m11, [%2_Vcoeff_3x56] |
%define coeffU1 m8 |
%define coeffU2 m9 |
%define coeffV1 m10 |
%define coeffV2 m11 |
%else ; x86-32 |
%define coeffU1 [%2_Ucoeff_12x4] |
%define coeffU2 [%2_Ucoeff_3x56] |
%define coeffV1 [%2_Vcoeff_12x4] |
%define coeffV2 [%2_Vcoeff_3x56] |
%endif ; x86-32/64 |
%if ARCH_X86_64 && %0 == 3 |
jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToUV %+ SUFFIX).body |
%else ; ARCH_X86_64 && %0 == 3 |
.body: |
%if cpuflag(ssse3) |
mova m7, [shuf_rgb_12x4] |
%define shuf_rgb1 m7 |
%if ARCH_X86_64 |
mova m12, [shuf_rgb_3x56] |
%define shuf_rgb2 m12 |
%else ; x86-32 |
%define shuf_rgb2 [shuf_rgb_3x56] |
%endif ; x86-32/64 |
%endif ; cpuflag(ssse3) |
%if ARCH_X86_64 |
movsxd wq, dword r5m |
%else ; x86-32 |
mov wq, r5m |
%endif |
add wq, wq |
add dstUq, wq |
add dstVq, wq |
neg wq |
mova m6, [rgb_UVrnd] |
%if notcpuflag(ssse3) |
pxor m7, m7 |
%endif |
.loop: |
%if cpuflag(ssse3) |
movu m0, [srcq+0] ; (byte) { Bx, Gx, Rx }[0-3] |
movu m4, [srcq+12] ; (byte) { Bx, Gx, Rx }[4-7] |
pshufb m1, m0, shuf_rgb2 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } |
pshufb m0, shuf_rgb1 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } |
%else ; !cpuflag(ssse3) |
movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 } |
movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } |
movd m4, [srcq+6] ; (byte) { B2, G2, R2, B3 } |
movd m5, [srcq+8] ; (byte) { R2, B3, G3, R3 } |
%if mmsize == 16 |
punpckldq m0, m4 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } |
punpckldq m1, m5 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } |
movd m4, [srcq+12] ; (byte) { B4, G4, R4, B5 } |
movd m5, [srcq+14] ; (byte) { R4, B5, G5, R5 } |
%endif ; mmsize == 16 |
punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } |
punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } |
%endif ; cpuflag(ssse3) |
pmaddwd m2, m0, coeffV1 ; (dword) { B0*BV + G0*GV, B1*BV, B2*BV + G2*GV, B3*BV } |
pmaddwd m3, m1, coeffV2 ; (dword) { R0*BV, G1*GV + R1*BV, R2*BV, G3*GV + R3*BV } |
pmaddwd m0, coeffU1 ; (dword) { B0*BU + G0*GU, B1*BU, B2*BU + G2*GU, B3*BU } |
pmaddwd m1, coeffU2 ; (dword) { R0*BU, G1*GU + R1*BU, R2*BU, G3*GU + R3*BU } |
paddd m0, m1 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[0-3] |
paddd m2, m3 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[0-3] |
%if cpuflag(ssse3) |
pshufb m5, m4, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } |
pshufb m4, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } |
%else ; !cpuflag(ssse3) |
%if mmsize == 16 |
movd m1, [srcq+18] ; (byte) { B6, G6, R6, B7 } |
movd m3, [srcq+20] ; (byte) { R6, B7, G7, R7 } |
punpckldq m4, m1 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } |
punpckldq m5, m3 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } |
%endif ; mmsize == 16 && !cpuflag(ssse3) |
punpcklbw m4, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } |
punpcklbw m5, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } |
%endif ; cpuflag(ssse3) |
add srcq, 3 * mmsize / 2 |
pmaddwd m1, m4, coeffU1 ; (dword) { B4*BU + G4*GU, B5*BU, B6*BU + G6*GU, B7*BU } |
pmaddwd m3, m5, coeffU2 ; (dword) { R4*BU, G5*GU + R5*BU, R6*BU, G7*GU + R7*BU } |
pmaddwd m4, coeffV1 ; (dword) { B4*BV + G4*GV, B5*BV, B6*BV + G6*GV, B7*BV } |
pmaddwd m5, coeffV2 ; (dword) { R4*BV, G5*GV + R5*BV, R6*BV, G7*GV + R7*BV } |
paddd m1, m3 ; (dword) { Bx*BU + Gx*GU + Rx*RU }[4-7] |
paddd m4, m5 ; (dword) { Bx*BV + Gx*GV + Rx*RV }[4-7] |
paddd m0, m6 ; += rgb_UVrnd, i.e. (dword) { U[0-3] } |
paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] } |
paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] } |
paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] } |
psrad m0, 9 |
psrad m2, 9 |
psrad m1, 9 |
psrad m4, 9 |
packssdw m0, m1 ; (word) { U[0-7] } |
packssdw m2, m4 ; (word) { V[0-7] } |
%if mmsize == 8 |
mova [dstUq+wq], m0 |
mova [dstVq+wq], m2 |
%else ; mmsize == 16 |
mova [dstUq+wq], m0 |
mova [dstVq+wq], m2 |
%endif ; mmsize == 8/16 |
add wq, mmsize |
jl .loop |
REP_RET |
%endif ; ARCH_X86_64 && %0 == 3 |
%endmacro |
; %1 = nr. of XMM registers for rgb-to-Y func |
; %2 = nr. of XMM registers for rgb-to-UV func |
%macro RGB24_FUNCS 2 |
RGB24_TO_Y_FN %1, rgb |
RGB24_TO_Y_FN %1, bgr, rgb |
RGB24_TO_UV_FN %2, rgb |
RGB24_TO_UV_FN %2, bgr, rgb |
%endmacro |
%if ARCH_X86_32 |
INIT_MMX mmx |
RGB24_FUNCS 0, 0 |
%endif |
INIT_XMM sse2 |
RGB24_FUNCS 10, 12 |
INIT_XMM ssse3 |
RGB24_FUNCS 11, 13 |
%if HAVE_AVX_EXTERNAL |
INIT_XMM avx |
RGB24_FUNCS 11, 13 |
%endif |
; %1 = nr. of XMM registers |
; %2-5 = rgba, bgra, argb or abgr (in individual characters) |
%macro RGB32_TO_Y_FN 5-6 |
cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table |
mova m5, [rgba_Ycoeff_%2%4] |
mova m6, [rgba_Ycoeff_%3%5] |
%if %0 == 6 |
jmp mangle(private_prefix %+ _ %+ %6 %+ ToY %+ SUFFIX).body |
%else ; %0 == 6 |
.body: |
%if ARCH_X86_64 |
movsxd wq, wd |
%endif |
add wq, wq |
sub wq, mmsize - 1 |
lea srcq, [srcq+wq*2] |
add dstq, wq |
neg wq |
mova m4, [rgb_Yrnd] |
pcmpeqb m7, m7 |
psrlw m7, 8 ; (word) { 0x00ff } x4 |
.loop: |
; FIXME check alignment and use mova |
movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] |
movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] |
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] |
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] |
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] |
pmaddwd m3, m5 ; (dword) { Bx*BY + Rx*RY }[4-7] |
pmaddwd m2, m6 ; (dword) { Gx*GY }[4-7] |
paddd m0, m4 ; += rgb_Yrnd |
paddd m2, m4 ; += rgb_Yrnd |
paddd m0, m1 ; (dword) { Y[0-3] } |
paddd m2, m3 ; (dword) { Y[4-7] } |
psrad m0, 9 |
psrad m2, 9 |
packssdw m0, m2 ; (word) { Y[0-7] } |
mova [dstq+wq], m0 |
add wq, mmsize |
jl .loop |
sub wq, mmsize - 1 |
jz .end |
add srcq, 2*mmsize - 2 |
add dstq, mmsize - 1 |
.loop2: |
movd m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] |
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] |
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] |
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] |
paddd m0, m4 ; += rgb_Yrnd |
paddd m0, m1 ; (dword) { Y[0-3] } |
psrad m0, 9 |
packssdw m0, m0 ; (word) { Y[0-7] } |
movd [dstq+wq], m0 |
add wq, 2 |
jl .loop2 |
.end: |
REP_RET |
%endif ; %0 == 3 |
%endmacro |
; %1 = nr. of XMM registers |
; %2-5 = rgba, bgra, argb or abgr (in individual characters) |
%macro RGB32_TO_UV_FN 5-6 |
cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table |
%if ARCH_X86_64 |
mova m8, [rgba_Ucoeff_%2%4] |
mova m9, [rgba_Ucoeff_%3%5] |
mova m10, [rgba_Vcoeff_%2%4] |
mova m11, [rgba_Vcoeff_%3%5] |
%define coeffU1 m8 |
%define coeffU2 m9 |
%define coeffV1 m10 |
%define coeffV2 m11 |
%else ; x86-32 |
%define coeffU1 [rgba_Ucoeff_%2%4] |
%define coeffU2 [rgba_Ucoeff_%3%5] |
%define coeffV1 [rgba_Vcoeff_%2%4] |
%define coeffV2 [rgba_Vcoeff_%3%5] |
%endif ; x86-64/32 |
%if ARCH_X86_64 && %0 == 6 |
jmp mangle(private_prefix %+ _ %+ %6 %+ ToUV %+ SUFFIX).body |
%else ; ARCH_X86_64 && %0 == 6 |
.body: |
%if ARCH_X86_64 |
movsxd wq, dword r5m |
%else ; x86-32 |
mov wq, r5m |
%endif |
add wq, wq |
sub wq, mmsize - 1 |
add dstUq, wq |
add dstVq, wq |
lea srcq, [srcq+wq*2] |
neg wq |
pcmpeqb m7, m7 |
psrlw m7, 8 ; (word) { 0x00ff } x4 |
mova m6, [rgb_UVrnd] |
.loop: |
; FIXME check alignment and use mova |
movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] |
movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] |
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] |
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] |
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] |
pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3] |
pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3] |
paddd m3, m6 ; += rgb_UVrnd |
paddd m1, m6 ; += rgb_UVrnd |
paddd m2, m3 ; (dword) { V[0-3] } |
paddd m0, m1 ; (dword) { U[0-3] } |
pmaddwd m3, m5, coeffV1 ; (dword) { Bx*BV + Rx*RV }[4-7] |
pmaddwd m1, m4, coeffV2 ; (dword) { Gx*GV }[4-7] |
pmaddwd m5, coeffU1 ; (dword) { Bx*BU + Rx*RU }[4-7] |
pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7] |
paddd m3, m6 ; += rgb_UVrnd |
paddd m5, m6 ; += rgb_UVrnd |
psrad m0, 9 |
paddd m1, m3 ; (dword) { V[4-7] } |
paddd m4, m5 ; (dword) { U[4-7] } |
psrad m2, 9 |
psrad m4, 9 |
psrad m1, 9 |
packssdw m0, m4 ; (word) { U[0-7] } |
packssdw m2, m1 ; (word) { V[0-7] } |
%if mmsize == 8 |
mova [dstUq+wq], m0 |
mova [dstVq+wq], m2 |
%else ; mmsize == 16 |
mova [dstUq+wq], m0 |
mova [dstVq+wq], m2 |
%endif ; mmsize == 8/16 |
add wq, mmsize |
jl .loop |
sub wq, mmsize - 1 |
jz .end |
add srcq , 2*mmsize - 2 |
add dstUq, mmsize - 1 |
add dstVq, mmsize - 1 |
.loop2: |
movd m0, [srcq+wq*2] ; (byte) { Bx, Gx, Rx, xx }[0-3] |
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] |
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] |
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] |
pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3] |
pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3] |
paddd m3, m6 ; += rgb_UVrnd |
paddd m1, m6 ; += rgb_UVrnd |
paddd m2, m3 ; (dword) { V[0-3] } |
paddd m0, m1 ; (dword) { U[0-3] } |
psrad m0, 9 |
psrad m2, 9 |
packssdw m0, m0 ; (word) { U[0-7] } |
packssdw m2, m2 ; (word) { V[0-7] } |
movd [dstUq+wq], m0 |
movd [dstVq+wq], m2 |
add wq, 2 |
jl .loop2 |
.end: |
REP_RET |
%endif ; ARCH_X86_64 && %0 == 3 |
%endmacro |
; %1 = nr. of XMM registers for rgb-to-Y func |
; %2 = nr. of XMM registers for rgb-to-UV func |
%macro RGB32_FUNCS 2 |
RGB32_TO_Y_FN %1, r, g, b, a |
RGB32_TO_Y_FN %1, b, g, r, a, rgba |
RGB32_TO_Y_FN %1, a, r, g, b, rgba |
RGB32_TO_Y_FN %1, a, b, g, r, rgba |
RGB32_TO_UV_FN %2, r, g, b, a |
RGB32_TO_UV_FN %2, b, g, r, a, rgba |
RGB32_TO_UV_FN %2, a, r, g, b, rgba |
RGB32_TO_UV_FN %2, a, b, g, r, rgba |
%endmacro |
%if ARCH_X86_32 |
INIT_MMX mmx |
RGB32_FUNCS 0, 0 |
%endif |
INIT_XMM sse2 |
RGB32_FUNCS 8, 12 |
%if HAVE_AVX_EXTERNAL |
INIT_XMM avx |
RGB32_FUNCS 8, 12 |
%endif |
;----------------------------------------------------------------------------- |
; YUYV/UYVY/NV12/NV21 packed pixel shuffling. |
; |
; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w); |
; and |
; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, |
; const uint8_t *unused, int w); |
;----------------------------------------------------------------------------- |
; %1 = a (aligned) or u (unaligned) |
; %2 = yuyv or uyvy |
%macro LOOP_YUYV_TO_Y 2 |
.loop_%1: |
mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... } |
mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } |
%ifidn %2, yuyv |
pand m0, m2 ; (word) { Y0, Y1, ..., Y7 } |
pand m1, m2 ; (word) { Y8, Y9, ..., Y15 } |
%else ; uyvy |
psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 } |
psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 } |
%endif ; yuyv/uyvy |
packuswb m0, m1 ; (byte) { Y0, ..., Y15 } |
mova [dstq+wq], m0 |
add wq, mmsize |
jl .loop_%1 |
REP_RET |
%endmacro |
; %1 = nr. of XMM registers |
; %2 = yuyv or uyvy |
; %3 = if specified, it means that unaligned and aligned code in loop |
; will be the same (i.e. YUYV+AVX), and thus we don't need to |
; split the loop in an aligned and unaligned case |
%macro YUYV_TO_Y_FN 2-3 |
cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w |
%if ARCH_X86_64 |
movsxd wq, wd |
%endif |
add dstq, wq |
%if mmsize == 16 |
test srcq, 15 |
%endif |
lea srcq, [srcq+wq*2] |
%ifidn %2, yuyv |
pcmpeqb m2, m2 ; (byte) { 0xff } x 16 |
psrlw m2, 8 ; (word) { 0x00ff } x 8 |
%endif ; yuyv |
%if mmsize == 16 |
jnz .loop_u_start |
neg wq |
LOOP_YUYV_TO_Y a, %2 |
.loop_u_start: |
neg wq |
LOOP_YUYV_TO_Y u, %2 |
%else ; mmsize == 8 |
neg wq |
LOOP_YUYV_TO_Y a, %2 |
%endif ; mmsize == 8/16 |
%endmacro |
; %1 = a (aligned) or u (unaligned) |
; %2 = yuyv or uyvy |
%macro LOOP_YUYV_TO_UV 2 |
.loop_%1: |
%ifidn %2, yuyv |
mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } |
mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } |
psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 } |
psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 } |
%else ; uyvy |
%if cpuflag(avx) |
vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 } |
vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 } |
%else |
mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } |
mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } |
pand m0, m2 ; (word) { U0, V0, ..., U3, V3 } |
pand m1, m2 ; (word) { U4, V4, ..., U7, V7 } |
%endif |
%endif ; yuyv/uyvy |
packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 } |
pand m1, m0, m2 ; (word) { U0, U1, ..., U7 } |
psrlw m0, 8 ; (word) { V0, V1, ..., V7 } |
%if mmsize == 16 |
packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 } |
movh [dstUq+wq], m1 |
movhps [dstVq+wq], m1 |
%else ; mmsize == 8 |
packuswb m1, m1 ; (byte) { U0, ... U3 } |
packuswb m0, m0 ; (byte) { V0, ... V3 } |
movh [dstUq+wq], m1 |
movh [dstVq+wq], m0 |
%endif ; mmsize == 8/16 |
add wq, mmsize / 2 |
jl .loop_%1 |
REP_RET |
%endmacro |
; %1 = nr. of XMM registers |
; %2 = yuyv or uyvy |
; %3 = if specified, it means that unaligned and aligned code in loop |
; will be the same (i.e. UYVY+AVX), and thus we don't need to |
; split the loop in an aligned and unaligned case |
%macro YUYV_TO_UV_FN 2-3 |
cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w |
%if ARCH_X86_64 |
movsxd wq, dword r5m |
%else ; x86-32 |
mov wq, r5m |
%endif |
add dstUq, wq |
add dstVq, wq |
%if mmsize == 16 && %0 == 2 |
test srcq, 15 |
%endif |
lea srcq, [srcq+wq*4] |
pcmpeqb m2, m2 ; (byte) { 0xff } x 16 |
psrlw m2, 8 ; (word) { 0x00ff } x 8 |
; NOTE: if uyvy+avx, u/a are identical |
%if mmsize == 16 && %0 == 2 |
jnz .loop_u_start |
neg wq |
LOOP_YUYV_TO_UV a, %2 |
.loop_u_start: |
neg wq |
LOOP_YUYV_TO_UV u, %2 |
%else ; mmsize == 8 |
neg wq |
LOOP_YUYV_TO_UV a, %2 |
%endif ; mmsize == 8/16 |
%endmacro |
; %1 = a (aligned) or u (unaligned) |
; %2 = nv12 or nv21 |
%macro LOOP_NVXX_TO_UV 2 |
.loop_%1: |
mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... } |
mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... } |
pand m2, m0, m5 ; (word) { U0, U1, ..., U7 } |
pand m3, m1, m5 ; (word) { U8, U9, ..., U15 } |
psrlw m0, 8 ; (word) { V0, V1, ..., V7 } |
psrlw m1, 8 ; (word) { V8, V9, ..., V15 } |
packuswb m2, m3 ; (byte) { U0, ..., U15 } |
packuswb m0, m1 ; (byte) { V0, ..., V15 } |
%ifidn %2, nv12 |
mova [dstUq+wq], m2 |
mova [dstVq+wq], m0 |
%else ; nv21 |
mova [dstVq+wq], m2 |
mova [dstUq+wq], m0 |
%endif ; nv12/21 |
add wq, mmsize |
jl .loop_%1 |
REP_RET |
%endmacro |
; %1 = nr. of XMM registers |
; %2 = nv12 or nv21 |
%macro NVXX_TO_UV_FN 2 |
cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w |
%if ARCH_X86_64 |
movsxd wq, dword r5m |
%else ; x86-32 |
mov wq, r5m |
%endif |
add dstUq, wq |
add dstVq, wq |
%if mmsize == 16 |
test srcq, 15 |
%endif |
lea srcq, [srcq+wq*2] |
pcmpeqb m5, m5 ; (byte) { 0xff } x 16 |
psrlw m5, 8 ; (word) { 0x00ff } x 8 |
%if mmsize == 16 |
jnz .loop_u_start |
neg wq |
LOOP_NVXX_TO_UV a, %2 |
.loop_u_start: |
neg wq |
LOOP_NVXX_TO_UV u, %2 |
%else ; mmsize == 8 |
neg wq |
LOOP_NVXX_TO_UV a, %2 |
%endif ; mmsize == 8/16 |
%endmacro |
%if ARCH_X86_32 |
INIT_MMX mmx |
YUYV_TO_Y_FN 0, yuyv |
YUYV_TO_Y_FN 0, uyvy |
YUYV_TO_UV_FN 0, yuyv |
YUYV_TO_UV_FN 0, uyvy |
NVXX_TO_UV_FN 0, nv12 |
NVXX_TO_UV_FN 0, nv21 |
%endif |
INIT_XMM sse2 |
YUYV_TO_Y_FN 3, yuyv |
YUYV_TO_Y_FN 2, uyvy |
YUYV_TO_UV_FN 3, yuyv |
YUYV_TO_UV_FN 3, uyvy |
NVXX_TO_UV_FN 5, nv12 |
NVXX_TO_UV_FN 5, nv21 |
%if HAVE_AVX_EXTERNAL |
INIT_XMM avx |
; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but |
; that's not faster in practice |
YUYV_TO_UV_FN 3, yuyv |
YUYV_TO_UV_FN 3, uyvy, 1 |
NVXX_TO_UV_FN 5, nv12 |
NVXX_TO_UV_FN 5, nv21 |
%endif |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/output.asm |
---|
0,0 → 1,413 |
;****************************************************************************** |
;* x86-optimized vertical line scaling functions |
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> |
;* Kieran Kunhya <kieran@kunhya.com> |
;* |
;* This file is part of FFmpeg. |
;* |
;* FFmpeg is free software; you can redistribute it and/or |
;* modify it under the terms of the GNU Lesser General Public |
;* License as published by the Free Software Foundation; either |
;* version 2.1 of the License, or (at your option) any later version. |
;* |
;* FFmpeg is distributed in the hope that it will be useful, |
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
;* Lesser General Public License for more details. |
;* |
;* You should have received a copy of the GNU Lesser General Public |
;* License along with FFmpeg; if not, write to the Free Software |
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
;****************************************************************************** |
%include "libavutil/x86/x86util.asm" |
SECTION_RODATA |
minshort: times 8 dw 0x8000 |
yuv2yuvX_16_start: times 4 dd 0x4000 - 0x40000000 |
yuv2yuvX_10_start: times 4 dd 0x10000 |
yuv2yuvX_9_start: times 4 dd 0x20000 |
yuv2yuvX_10_upper: times 8 dw 0x3ff |
yuv2yuvX_9_upper: times 8 dw 0x1ff |
pd_4: times 4 dd 4 |
pd_4min0x40000:times 4 dd 4 - (0x40000) |
pw_16: times 8 dw 16 |
pw_32: times 8 dw 32 |
pw_512: times 8 dw 512 |
pw_1024: times 8 dw 1024 |
SECTION .text |
;----------------------------------------------------------------------------- |
; vertical line scaling |
; |
; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW, |
; const uint8_t *dither, int offset) |
; and |
; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize, |
; const int16_t **src, uint8_t *dst, int dstW, |
; const uint8_t *dither, int offset) |
; |
; Scale one or $filterSize lines of source data to generate one line of output |
; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in |
; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple |
; of 2. $offset is either 0 or 3. $dither holds 8 values. |
;----------------------------------------------------------------------------- |
%macro yuv2planeX_fn 3 |
%if ARCH_X86_32 |
%define cntr_reg fltsizeq |
%define movsx mov |
%else |
%define cntr_reg r7 |
%define movsx movsxd |
%endif |
cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset |
%if %1 == 8 || %1 == 9 || %1 == 10 |
pxor m6, m6 |
%endif ; %1 == 8/9/10 |
%if %1 == 8 |
%if ARCH_X86_32 |
%assign pad 0x2c - (stack_offset & 15) |
SUB rsp, pad |
%define m_dith m7 |
%else ; x86-64 |
%define m_dith m9 |
%endif ; x86-32 |
; create registers holding dither |
movq m_dith, [ditherq] ; dither |
test offsetd, offsetd |
jz .no_rot |
%if mmsize == 16 |
punpcklqdq m_dith, m_dith |
%endif ; mmsize == 16 |
PALIGNR m_dith, m_dith, 3, m0 |
.no_rot: |
%if mmsize == 16 |
punpcklbw m_dith, m6 |
%if ARCH_X86_64 |
punpcklwd m8, m_dith, m6 |
pslld m8, 12 |
%else ; x86-32 |
punpcklwd m5, m_dith, m6 |
pslld m5, 12 |
%endif ; x86-32/64 |
punpckhwd m_dith, m6 |
pslld m_dith, 12 |
%if ARCH_X86_32 |
mova [rsp+ 0], m5 |
mova [rsp+16], m_dith |
%endif |
%else ; mmsize == 8 |
punpcklbw m5, m_dith, m6 |
punpckhbw m_dith, m6 |
punpcklwd m4, m5, m6 |
punpckhwd m5, m6 |
punpcklwd m3, m_dith, m6 |
punpckhwd m_dith, m6 |
pslld m4, 12 |
pslld m5, 12 |
pslld m3, 12 |
pslld m_dith, 12 |
mova [rsp+ 0], m4 |
mova [rsp+ 8], m5 |
mova [rsp+16], m3 |
mova [rsp+24], m_dith |
%endif ; mmsize == 8/16 |
%endif ; %1 == 8 |
xor r5, r5 |
.pixelloop: |
%assign %%i 0 |
; the rep here is for the 8bit output mmx case, where dither covers |
; 8 pixels but we can only handle 2 pixels per register, and thus 4 |
; pixels per iteration. In order to not have to keep track of where |
; we are w.r.t. dithering, we unroll the mmx/8bit loop x2. |
%if %1 == 8 |
%assign %%repcnt 16/mmsize |
%else |
%assign %%repcnt 1 |
%endif |
%rep %%repcnt |
%if %1 == 8 |
%if ARCH_X86_32 |
mova m2, [rsp+mmsize*(0+%%i)] |
mova m1, [rsp+mmsize*(1+%%i)] |
%else ; x86-64 |
mova m2, m8 |
mova m1, m_dith |
%endif ; x86-32/64 |
%else ; %1 == 9/10/16 |
mova m1, [yuv2yuvX_%1_start] |
mova m2, m1 |
%endif ; %1 == 8/9/10/16 |
movsx cntr_reg, fltsizem |
.filterloop_ %+ %%i: |
; input pixels |
mov r6, [srcq+gprsize*cntr_reg-2*gprsize] |
%if %1 == 16 |
mova m3, [r6+r5*4] |
mova m5, [r6+r5*4+mmsize] |
%else ; %1 == 8/9/10 |
mova m3, [r6+r5*2] |
%endif ; %1 == 8/9/10/16 |
mov r6, [srcq+gprsize*cntr_reg-gprsize] |
%if %1 == 16 |
mova m4, [r6+r5*4] |
mova m6, [r6+r5*4+mmsize] |
%else ; %1 == 8/9/10 |
mova m4, [r6+r5*2] |
%endif ; %1 == 8/9/10/16 |
; coefficients |
movd m0, [filterq+2*cntr_reg-4] ; coeff[0], coeff[1] |
%if %1 == 16 |
pshuflw m7, m0, 0 ; coeff[0] |
pshuflw m0, m0, 0x55 ; coeff[1] |
pmovsxwd m7, m7 ; word -> dword |
pmovsxwd m0, m0 ; word -> dword |
pmulld m3, m7 |
pmulld m5, m7 |
pmulld m4, m0 |
pmulld m6, m0 |
paddd m2, m3 |
paddd m1, m5 |
paddd m2, m4 |
paddd m1, m6 |
%else ; %1 == 10/9/8 |
punpcklwd m5, m3, m4 |
punpckhwd m3, m4 |
SPLATD m0 |
pmaddwd m5, m0 |
pmaddwd m3, m0 |
paddd m2, m5 |
paddd m1, m3 |
%endif ; %1 == 8/9/10/16 |
sub cntr_reg, 2 |
jg .filterloop_ %+ %%i |
%if %1 == 16 |
psrad m2, 31 - %1 |
psrad m1, 31 - %1 |
%else ; %1 == 10/9/8 |
psrad m2, 27 - %1 |
psrad m1, 27 - %1 |
%endif ; %1 == 8/9/10/16 |
%if %1 == 8 |
packssdw m2, m1 |
packuswb m2, m2 |
movh [dstq+r5*1], m2 |
%else ; %1 == 9/10/16 |
%if %1 == 16 |
packssdw m2, m1 |
paddw m2, [minshort] |
%else ; %1 == 9/10 |
%if cpuflag(sse4) |
packusdw m2, m1 |
%else ; mmxext/sse2 |
packssdw m2, m1 |
pmaxsw m2, m6 |
%endif ; mmxext/sse2/sse4/avx |
pminsw m2, [yuv2yuvX_%1_upper] |
%endif ; %1 == 9/10/16 |
mova [dstq+r5*2], m2 |
%endif ; %1 == 8/9/10/16 |
add r5, mmsize/2 |
sub wd, mmsize/2 |
%assign %%i %%i+2 |
%endrep |
jg .pixelloop |
%if %1 == 8 |
%if ARCH_X86_32 |
ADD rsp, pad |
RET |
%else ; x86-64 |
REP_RET |
%endif ; x86-32/64 |
%else ; %1 == 9/10/16 |
REP_RET |
%endif ; %1 == 8/9/10/16 |
%endmacro |
%if ARCH_X86_32 |
INIT_MMX mmxext |
yuv2planeX_fn 8, 0, 7 |
yuv2planeX_fn 9, 0, 5 |
yuv2planeX_fn 10, 0, 5 |
%endif |
INIT_XMM sse2 |
yuv2planeX_fn 8, 10, 7 |
yuv2planeX_fn 9, 7, 5 |
yuv2planeX_fn 10, 7, 5 |
INIT_XMM sse4 |
yuv2planeX_fn 8, 10, 7 |
yuv2planeX_fn 9, 7, 5 |
yuv2planeX_fn 10, 7, 5 |
yuv2planeX_fn 16, 8, 5 |
%if HAVE_AVX_EXTERNAL |
INIT_XMM avx |
yuv2planeX_fn 8, 10, 7 |
yuv2planeX_fn 9, 7, 5 |
yuv2planeX_fn 10, 7, 5 |
%endif |
; %1=outout-bpc, %2=alignment (u/a) |
%macro yuv2plane1_mainloop 2 |
.loop_%2: |
%if %1 == 8 |
paddsw m0, m2, [srcq+wq*2+mmsize*0] |
paddsw m1, m3, [srcq+wq*2+mmsize*1] |
psraw m0, 7 |
psraw m1, 7 |
packuswb m0, m1 |
mov%2 [dstq+wq], m0 |
%elif %1 == 16 |
paddd m0, m4, [srcq+wq*4+mmsize*0] |
paddd m1, m4, [srcq+wq*4+mmsize*1] |
paddd m2, m4, [srcq+wq*4+mmsize*2] |
paddd m3, m4, [srcq+wq*4+mmsize*3] |
psrad m0, 3 |
psrad m1, 3 |
psrad m2, 3 |
psrad m3, 3 |
%if cpuflag(sse4) ; avx/sse4 |
packusdw m0, m1 |
packusdw m2, m3 |
%else ; mmx/sse2 |
packssdw m0, m1 |
packssdw m2, m3 |
paddw m0, m5 |
paddw m2, m5 |
%endif ; mmx/sse2/sse4/avx |
mov%2 [dstq+wq*2+mmsize*0], m0 |
mov%2 [dstq+wq*2+mmsize*1], m2 |
%else ; %1 == 9/10 |
paddsw m0, m2, [srcq+wq*2+mmsize*0] |
paddsw m1, m2, [srcq+wq*2+mmsize*1] |
psraw m0, 15 - %1 |
psraw m1, 15 - %1 |
pmaxsw m0, m4 |
pmaxsw m1, m4 |
pminsw m0, m3 |
pminsw m1, m3 |
mov%2 [dstq+wq*2+mmsize*0], m0 |
mov%2 [dstq+wq*2+mmsize*1], m1 |
%endif |
add wq, mmsize |
jl .loop_%2 |
%endmacro |
%macro yuv2plane1_fn 3 |
cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset |
movsxdifnidn wq, wd |
add wq, mmsize - 1 |
and wq, ~(mmsize - 1) |
%if %1 == 8 |
add dstq, wq |
%else ; %1 != 8 |
lea dstq, [dstq+wq*2] |
%endif ; %1 == 8 |
%if %1 == 16 |
lea srcq, [srcq+wq*4] |
%else ; %1 != 16 |
lea srcq, [srcq+wq*2] |
%endif ; %1 == 16 |
neg wq |
%if %1 == 8 |
pxor m4, m4 ; zero |
; create registers holding dither |
movq m3, [ditherq] ; dither |
test offsetd, offsetd |
jz .no_rot |
%if mmsize == 16 |
punpcklqdq m3, m3 |
%endif ; mmsize == 16 |
PALIGNR m3, m3, 3, m2 |
.no_rot: |
%if mmsize == 8 |
mova m2, m3 |
punpckhbw m3, m4 ; byte->word |
punpcklbw m2, m4 ; byte->word |
%else |
punpcklbw m3, m4 |
mova m2, m3 |
%endif |
%elif %1 == 9 |
pxor m4, m4 |
mova m3, [pw_512] |
mova m2, [pw_32] |
%elif %1 == 10 |
pxor m4, m4 |
mova m3, [pw_1024] |
mova m2, [pw_16] |
%else ; %1 == 16 |
%if cpuflag(sse4) ; sse4/avx |
mova m4, [pd_4] |
%else ; mmx/sse2 |
mova m4, [pd_4min0x40000] |
mova m5, [minshort] |
%endif ; mmx/sse2/sse4/avx |
%endif ; %1 == .. |
; actual pixel scaling |
%if mmsize == 8 |
yuv2plane1_mainloop %1, a |
%else ; mmsize == 16 |
test dstq, 15 |
jnz .unaligned |
yuv2plane1_mainloop %1, a |
REP_RET |
.unaligned: |
yuv2plane1_mainloop %1, u |
%endif ; mmsize == 8/16 |
REP_RET |
%endmacro |
%if ARCH_X86_32 |
INIT_MMX mmx |
yuv2plane1_fn 8, 0, 5 |
yuv2plane1_fn 16, 0, 3 |
INIT_MMX mmxext |
yuv2plane1_fn 9, 0, 3 |
yuv2plane1_fn 10, 0, 3 |
%endif |
INIT_XMM sse2 |
yuv2plane1_fn 8, 5, 5 |
yuv2plane1_fn 9, 5, 3 |
yuv2plane1_fn 10, 5, 3 |
yuv2plane1_fn 16, 6, 3 |
INIT_XMM sse4 |
yuv2plane1_fn 16, 5, 3 |
%if HAVE_AVX_EXTERNAL |
INIT_XMM avx |
yuv2plane1_fn 8, 5, 5 |
yuv2plane1_fn 9, 5, 3 |
yuv2plane1_fn 10, 5, 3 |
yuv2plane1_fn 16, 5, 3 |
%endif |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/rgb2rgb.c |
---|
0,0 → 1,164 |
/* |
* software RGB to RGB converter |
* pluralize by software PAL8 to RGB converter |
* software YUV to YUV converter |
* software YUV to RGB converter |
* Written by Nick Kurshev. |
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdint.h> |
#include "config.h" |
#include "libavutil/attributes.h" |
#include "libavutil/x86/asm.h" |
#include "libavutil/x86/cpu.h" |
#include "libavutil/cpu.h" |
#include "libavutil/bswap.h" |
#include "libswscale/rgb2rgb.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#if HAVE_INLINE_ASM |
DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL; |
DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL; |
DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL; |
DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL; |
DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL; |
DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL; |
DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ |
DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ |
DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL; |
#define mask16b mask15b |
DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL; |
DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL; |
DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL; |
DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL; |
DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; |
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; |
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; |
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; |
DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; |
DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; |
DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; |
DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2YOffset); |
DECLARE_ALIGNED(8, extern const uint64_t, ff_w1111); |
DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); |
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) |
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) |
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) |
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5)) |
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5)) |
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5)) |
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5)) |
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) |
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) |
// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. |
#define COMPILE_TEMPLATE_MMXEXT 0 |
#define COMPILE_TEMPLATE_AMD3DNOW 0 |
#define COMPILE_TEMPLATE_SSE2 0 |
#define COMPILE_TEMPLATE_AVX 0 |
//MMX versions |
#undef RENAME |
#define RENAME(a) a ## _mmx |
#include "rgb2rgb_template.c" |
// MMXEXT versions |
#undef RENAME |
#undef COMPILE_TEMPLATE_MMXEXT |
#define COMPILE_TEMPLATE_MMXEXT 1 |
#define RENAME(a) a ## _mmxext |
#include "rgb2rgb_template.c" |
//SSE2 versions |
#undef RENAME |
#undef COMPILE_TEMPLATE_SSE2 |
#define COMPILE_TEMPLATE_SSE2 1 |
#define RENAME(a) a ## _sse2 |
#include "rgb2rgb_template.c" |
//AVX versions |
#undef RENAME |
#undef COMPILE_TEMPLATE_AVX |
#define COMPILE_TEMPLATE_AVX 1 |
#define RENAME(a) a ## _avx |
#include "rgb2rgb_template.c" |
//3DNOW versions |
#undef RENAME |
#undef COMPILE_TEMPLATE_MMXEXT |
#undef COMPILE_TEMPLATE_SSE2 |
#undef COMPILE_TEMPLATE_AVX |
#undef COMPILE_TEMPLATE_AMD3DNOW |
#define COMPILE_TEMPLATE_MMXEXT 0 |
#define COMPILE_TEMPLATE_SSE2 0 |
#define COMPILE_TEMPLATE_AVX 0 |
#define COMPILE_TEMPLATE_AMD3DNOW 1 |
#define RENAME(a) a ## _3dnow |
#include "rgb2rgb_template.c" |
/* |
RGB15->RGB16 original by Strepto/Astral |
ported to gcc & bugfixed : A'rpi |
MMXEXT, 3DNOW optimization by Nick Kurshev |
32-bit C version, and and&add trick by Michael Niedermayer |
*/ |
#endif /* HAVE_INLINE_ASM */ |
av_cold void rgb2rgb_init_x86(void) |
{ |
#if HAVE_INLINE_ASM |
int cpu_flags = av_get_cpu_flags(); |
if (INLINE_MMX(cpu_flags)) |
rgb2rgb_init_mmx(); |
if (INLINE_AMD3DNOW(cpu_flags)) |
rgb2rgb_init_3dnow(); |
if (INLINE_MMXEXT(cpu_flags)) |
rgb2rgb_init_mmxext(); |
if (INLINE_SSE2(cpu_flags)) |
rgb2rgb_init_sse2(); |
if (INLINE_AVX(cpu_flags)) |
rgb2rgb_init_avx(); |
#endif /* HAVE_INLINE_ASM */ |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/rgb2rgb_template.c |
---|
0,0 → 1,2601 |
/* |
* software RGB to RGB converter |
* pluralize by software PAL8 to RGB converter |
* software YUV to YUV converter |
* software YUV to RGB converter |
* Written by Nick Kurshev. |
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) |
* lot of big-endian byte order fixes by Alex Beregszaszi |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stddef.h> |
#include <stdint.h> |
#include "libavutil/attributes.h" |
#include "libavutil/x86/asm.h" |
#undef PREFETCH |
#undef MOVNTQ |
#undef EMMS |
#undef SFENCE |
#undef PAVGB |
#if COMPILE_TEMPLATE_AMD3DNOW |
#define PREFETCH "prefetch" |
#define PAVGB "pavgusb" |
#elif COMPILE_TEMPLATE_MMXEXT |
#define PREFETCH "prefetchnta" |
#define PAVGB "pavgb" |
#else |
#define PREFETCH " # nop" |
#endif |
#if COMPILE_TEMPLATE_AMD3DNOW |
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ |
#define EMMS "femms" |
#else |
#define EMMS "emms" |
#endif |
#if COMPILE_TEMPLATE_MMXEXT |
#define MOVNTQ "movntq" |
#define SFENCE "sfence" |
#else |
#define MOVNTQ "movq" |
#define SFENCE " # nop" |
#endif |
#if !COMPILE_TEMPLATE_SSE2 |
#if !COMPILE_TEMPLATE_AMD3DNOW |
static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *dest = dst; |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
mm_end = end - 23; |
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"punpckldq 3(%1), %%mm0 \n\t" |
"movd 6(%1), %%mm1 \n\t" |
"punpckldq 9(%1), %%mm1 \n\t" |
"movd 12(%1), %%mm2 \n\t" |
"punpckldq 15(%1), %%mm2 \n\t" |
"movd 18(%1), %%mm3 \n\t" |
"punpckldq 21(%1), %%mm3 \n\t" |
"por %%mm7, %%mm0 \n\t" |
"por %%mm7, %%mm1 \n\t" |
"por %%mm7, %%mm2 \n\t" |
"por %%mm7, %%mm3 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
MOVNTQ" %%mm1, 8(%0) \n\t" |
MOVNTQ" %%mm2, 16(%0) \n\t" |
MOVNTQ" %%mm3, 24(%0)" |
:: "r"(dest), "r"(s) |
:"memory"); |
dest += 32; |
s += 24; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
*dest++ = *s++; |
*dest++ = *s++; |
*dest++ = *s++; |
*dest++ = 255; |
} |
} |
#define STORE_BGR24_MMX \ |
"psrlq $8, %%mm2 \n\t" \ |
"psrlq $8, %%mm3 \n\t" \ |
"psrlq $8, %%mm6 \n\t" \ |
"psrlq $8, %%mm7 \n\t" \ |
"pand "MANGLE(mask24l)", %%mm0\n\t" \ |
"pand "MANGLE(mask24l)", %%mm1\n\t" \ |
"pand "MANGLE(mask24l)", %%mm4\n\t" \ |
"pand "MANGLE(mask24l)", %%mm5\n\t" \ |
"pand "MANGLE(mask24h)", %%mm2\n\t" \ |
"pand "MANGLE(mask24h)", %%mm3\n\t" \ |
"pand "MANGLE(mask24h)", %%mm6\n\t" \ |
"pand "MANGLE(mask24h)", %%mm7\n\t" \ |
"por %%mm2, %%mm0 \n\t" \ |
"por %%mm3, %%mm1 \n\t" \ |
"por %%mm6, %%mm4 \n\t" \ |
"por %%mm7, %%mm5 \n\t" \ |
\ |
"movq %%mm1, %%mm2 \n\t" \ |
"movq %%mm4, %%mm3 \n\t" \ |
"psllq $48, %%mm2 \n\t" \ |
"psllq $32, %%mm3 \n\t" \ |
"por %%mm2, %%mm0 \n\t" \ |
"psrlq $16, %%mm1 \n\t" \ |
"psrlq $32, %%mm4 \n\t" \ |
"psllq $16, %%mm5 \n\t" \ |
"por %%mm3, %%mm1 \n\t" \ |
"por %%mm5, %%mm4 \n\t" \ |
\ |
MOVNTQ" %%mm0, (%0) \n\t" \ |
MOVNTQ" %%mm1, 8(%0) \n\t" \ |
MOVNTQ" %%mm4, 16(%0)" |
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
uint8_t *dest = dst; |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
mm_end = end - 31; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq 8(%1), %%mm1 \n\t" |
"movq 16(%1), %%mm4 \n\t" |
"movq 24(%1), %%mm5 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm1, %%mm3 \n\t" |
"movq %%mm4, %%mm6 \n\t" |
"movq %%mm5, %%mm7 \n\t" |
STORE_BGR24_MMX |
:: "r"(dest), "r"(s) |
NAMED_CONSTRAINTS_ADD(mask24l,mask24h) |
:"memory"); |
dest += 24; |
s += 32; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
*dest++ = *s++; |
*dest++ = *s++; |
*dest++ = *s++; |
s++; |
} |
} |
/* |
original by Strepto/Astral |
ported to gcc & bugfixed: A'rpi |
MMXEXT, 3DNOW optimization by Nick Kurshev |
32-bit C version, and and&add trick by Michael Niedermayer |
*/ |
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
register const uint8_t* s=src; |
register uint8_t* d=dst; |
register const uint8_t *end; |
const uint8_t *mm_end; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*s)); |
__asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); |
mm_end = end - 15; |
while (s<mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq 8(%1), %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"pand %%mm4, %%mm0 \n\t" |
"pand %%mm4, %%mm2 \n\t" |
"paddw %%mm1, %%mm0 \n\t" |
"paddw %%mm3, %%mm2 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
MOVNTQ" %%mm2, 8(%0)" |
:: "r"(d), "r"(s) |
); |
d+=16; |
s+=16; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
mm_end = end - 3; |
while (s < mm_end) { |
register unsigned x= *((const uint32_t *)s); |
*((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
d+=4; |
s+=4; |
} |
if (s < end) { |
register unsigned short x= *((const uint16_t *)s); |
*((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
} |
} |
static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
register const uint8_t* s=src; |
register uint8_t* d=dst; |
register const uint8_t *end; |
const uint8_t *mm_end; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*s)); |
__asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); |
__asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); |
mm_end = end - 15; |
while (s<mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq 8(%1), %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlq $1, %%mm0 \n\t" |
"psrlq $1, %%mm2 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm3 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm3, %%mm2 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
MOVNTQ" %%mm2, 8(%0)" |
:: "r"(d), "r"(s) |
); |
d+=16; |
s+=16; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
mm_end = end - 3; |
while (s < mm_end) { |
register uint32_t x= *((const uint32_t*)s); |
*((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
s+=4; |
d+=4; |
} |
if (s < end) { |
register uint16_t x= *((const uint16_t*)s); |
*((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
} |
} |
static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
mm_end = end - 15; |
__asm__ volatile( |
"movq %3, %%mm5 \n\t" |
"movq %4, %%mm6 \n\t" |
"movq %5, %%mm7 \n\t" |
"jmp 2f \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 4(%1), %%mm3 \n\t" |
"punpckldq 8(%1), %%mm0 \n\t" |
"punpckldq 12(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"pand %%mm6, %%mm0 \n\t" |
"pand %%mm6, %%mm3 \n\t" |
"pmaddwd %%mm7, %%mm0 \n\t" |
"pmaddwd %%mm7, %%mm3 \n\t" |
"pand %%mm5, %%mm1 \n\t" |
"pand %%mm5, %%mm4 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"psrld $5, %%mm0 \n\t" |
"pslld $11, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
"add $16, %1 \n\t" |
"add $8, %0 \n\t" |
"2: \n\t" |
"cmp %2, %1 \n\t" |
" jb 1b \n\t" |
: "+r" (d), "+r"(s) |
: "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) |
); |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register int rgb = *(const uint32_t*)s; s += 4; |
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); |
} |
} |
static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
__asm__ volatile( |
"movq %0, %%mm7 \n\t" |
"movq %1, %%mm6 \n\t" |
::"m"(red_16mask),"m"(green_16mask)); |
mm_end = end - 15; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 4(%1), %%mm3 \n\t" |
"punpckldq 8(%1), %%mm0 \n\t" |
"punpckldq 12(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"psllq $8, %%mm0 \n\t" |
"psllq $8, %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"psrlq $5, %%mm1 \n\t" |
"psrlq $5, %%mm4 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"psrlq $19, %%mm2 \n\t" |
"psrlq $19, %%mm5 \n\t" |
"pand %2, %%mm2 \n\t" |
"pand %2, %%mm5 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"psllq $16, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
:: "r"(d),"r"(s),"m"(blue_16mask):"memory"); |
d += 4; |
s += 16; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register int rgb = *(const uint32_t*)s; s += 4; |
*d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); |
} |
} |
static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
mm_end = end - 15; |
__asm__ volatile( |
"movq %3, %%mm5 \n\t" |
"movq %4, %%mm6 \n\t" |
"movq %5, %%mm7 \n\t" |
"jmp 2f \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 4(%1), %%mm3 \n\t" |
"punpckldq 8(%1), %%mm0 \n\t" |
"punpckldq 12(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"pand %%mm6, %%mm0 \n\t" |
"pand %%mm6, %%mm3 \n\t" |
"pmaddwd %%mm7, %%mm0 \n\t" |
"pmaddwd %%mm7, %%mm3 \n\t" |
"pand %%mm5, %%mm1 \n\t" |
"pand %%mm5, %%mm4 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"psrld $6, %%mm0 \n\t" |
"pslld $10, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
"add $16, %1 \n\t" |
"add $8, %0 \n\t" |
"2: \n\t" |
"cmp %2, %1 \n\t" |
" jb 1b \n\t" |
: "+r" (d), "+r"(s) |
: "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) |
); |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register int rgb = *(const uint32_t*)s; s += 4; |
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); |
} |
} |
static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
__asm__ volatile( |
"movq %0, %%mm7 \n\t" |
"movq %1, %%mm6 \n\t" |
::"m"(red_15mask),"m"(green_15mask)); |
mm_end = end - 15; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 4(%1), %%mm3 \n\t" |
"punpckldq 8(%1), %%mm0 \n\t" |
"punpckldq 12(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"psllq $7, %%mm0 \n\t" |
"psllq $7, %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"psrlq $6, %%mm1 \n\t" |
"psrlq $6, %%mm4 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"psrlq $19, %%mm2 \n\t" |
"psrlq $19, %%mm5 \n\t" |
"pand %2, %%mm2 \n\t" |
"pand %2, %%mm5 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"psllq $16, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
::"r"(d),"r"(s),"m"(blue_15mask):"memory"); |
d += 4; |
s += 16; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register int rgb = *(const uint32_t*)s; s += 4; |
*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); |
} |
} |
static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
__asm__ volatile( |
"movq %0, %%mm7 \n\t" |
"movq %1, %%mm6 \n\t" |
::"m"(red_16mask),"m"(green_16mask)); |
mm_end = end - 11; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 3(%1), %%mm3 \n\t" |
"punpckldq 6(%1), %%mm0 \n\t" |
"punpckldq 9(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"psrlq $3, %%mm0 \n\t" |
"psrlq $3, %%mm3 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %2, %%mm3 \n\t" |
"psrlq $5, %%mm1 \n\t" |
"psrlq $5, %%mm4 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"psrlq $8, %%mm2 \n\t" |
"psrlq $8, %%mm5 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm7, %%mm5 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"psllq $16, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
::"r"(d),"r"(s),"m"(blue_16mask):"memory"); |
d += 4; |
s += 12; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
const int b = *s++; |
const int g = *s++; |
const int r = *s++; |
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
} |
} |
static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
__asm__ volatile( |
"movq %0, %%mm7 \n\t" |
"movq %1, %%mm6 \n\t" |
::"m"(red_16mask),"m"(green_16mask)); |
mm_end = end - 15; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 3(%1), %%mm3 \n\t" |
"punpckldq 6(%1), %%mm0 \n\t" |
"punpckldq 9(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"psllq $8, %%mm0 \n\t" |
"psllq $8, %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"psrlq $5, %%mm1 \n\t" |
"psrlq $5, %%mm4 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"psrlq $19, %%mm2 \n\t" |
"psrlq $19, %%mm5 \n\t" |
"pand %2, %%mm2 \n\t" |
"pand %2, %%mm5 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"psllq $16, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
::"r"(d),"r"(s),"m"(blue_16mask):"memory"); |
d += 4; |
s += 12; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
const int r = *s++; |
const int g = *s++; |
const int b = *s++; |
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
} |
} |
static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
__asm__ volatile( |
"movq %0, %%mm7 \n\t" |
"movq %1, %%mm6 \n\t" |
::"m"(red_15mask),"m"(green_15mask)); |
mm_end = end - 11; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 3(%1), %%mm3 \n\t" |
"punpckldq 6(%1), %%mm0 \n\t" |
"punpckldq 9(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"psrlq $3, %%mm0 \n\t" |
"psrlq $3, %%mm3 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %2, %%mm3 \n\t" |
"psrlq $6, %%mm1 \n\t" |
"psrlq $6, %%mm4 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"psrlq $9, %%mm2 \n\t" |
"psrlq $9, %%mm5 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm7, %%mm5 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"psllq $16, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
::"r"(d),"r"(s),"m"(blue_15mask):"memory"); |
d += 4; |
s += 12; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
const int b = *s++; |
const int g = *s++; |
const int r = *s++; |
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
} |
} |
static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint8_t *s = src; |
const uint8_t *end; |
const uint8_t *mm_end; |
uint16_t *d = (uint16_t *)dst; |
end = s + src_size; |
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
__asm__ volatile( |
"movq %0, %%mm7 \n\t" |
"movq %1, %%mm6 \n\t" |
::"m"(red_15mask),"m"(green_15mask)); |
mm_end = end - 15; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movd (%1), %%mm0 \n\t" |
"movd 3(%1), %%mm3 \n\t" |
"punpckldq 6(%1), %%mm0 \n\t" |
"punpckldq 9(%1), %%mm3 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"psllq $7, %%mm0 \n\t" |
"psllq $7, %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"psrlq $6, %%mm1 \n\t" |
"psrlq $6, %%mm4 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"psrlq $19, %%mm2 \n\t" |
"psrlq $19, %%mm5 \n\t" |
"pand %2, %%mm2 \n\t" |
"pand %2, %%mm5 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"psllq $16, %%mm3 \n\t" |
"por %%mm3, %%mm0 \n\t" |
MOVNTQ" %%mm0, (%0) \n\t" |
::"r"(d),"r"(s),"m"(blue_15mask):"memory"); |
d += 4; |
s += 12; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
const int r = *s++; |
const int g = *s++; |
const int b = *s++; |
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
} |
} |
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint16_t *end; |
const uint16_t *mm_end; |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t*)src; |
end = s + src_size/2; |
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
mm_end = end - 7; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq (%1), %%mm1 \n\t" |
"movq (%1), %%mm2 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %3, %%mm1 \n\t" |
"pand %4, %%mm2 \n\t" |
"psllq $5, %%mm0 \n\t" |
"pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" |
"pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" |
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"movq %%mm1, %%mm4 \n\t" |
"movq %%mm2, %%mm5 \n\t" |
"punpcklwd %5, %%mm0 \n\t" |
"punpcklwd %5, %%mm1 \n\t" |
"punpcklwd %5, %%mm2 \n\t" |
"punpckhwd %5, %%mm3 \n\t" |
"punpckhwd %5, %%mm4 \n\t" |
"punpckhwd %5, %%mm5 \n\t" |
"psllq $8, %%mm1 \n\t" |
"psllq $16, %%mm2 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"psllq $8, %%mm4 \n\t" |
"psllq $16, %%mm5 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"movq %%mm0, %%mm6 \n\t" |
"movq %%mm3, %%mm7 \n\t" |
"movq 8(%1), %%mm0 \n\t" |
"movq 8(%1), %%mm1 \n\t" |
"movq 8(%1), %%mm2 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %3, %%mm1 \n\t" |
"pand %4, %%mm2 \n\t" |
"psllq $5, %%mm0 \n\t" |
"pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" |
"pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" |
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"movq %%mm1, %%mm4 \n\t" |
"movq %%mm2, %%mm5 \n\t" |
"punpcklwd %5, %%mm0 \n\t" |
"punpcklwd %5, %%mm1 \n\t" |
"punpcklwd %5, %%mm2 \n\t" |
"punpckhwd %5, %%mm3 \n\t" |
"punpckhwd %5, %%mm4 \n\t" |
"punpckhwd %5, %%mm5 \n\t" |
"psllq $8, %%mm1 \n\t" |
"psllq $16, %%mm2 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"psllq $8, %%mm4 \n\t" |
"psllq $16, %%mm5 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm5, %%mm3 \n\t" |
:"=m"(*d) |
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) |
NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi) |
:"memory"); |
/* borrowed 32 to 24 */ |
__asm__ volatile( |
"movq %%mm0, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"movq %%mm6, %%mm0 \n\t" |
"movq %%mm7, %%mm1 \n\t" |
"movq %%mm4, %%mm6 \n\t" |
"movq %%mm5, %%mm7 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm1, %%mm3 \n\t" |
STORE_BGR24_MMX |
:: "r"(d), "m"(*s) |
NAMED_CONSTRAINTS_ADD(mask24l,mask24h) |
:"memory"); |
d += 24; |
s += 8; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register uint16_t bgr; |
bgr = *s++; |
*d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); |
*d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
} |
} |
static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint16_t *end; |
const uint16_t *mm_end; |
uint8_t *d = (uint8_t *)dst; |
const uint16_t *s = (const uint16_t *)src; |
end = s + src_size/2; |
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
mm_end = end - 7; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq (%1), %%mm1 \n\t" |
"movq (%1), %%mm2 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %3, %%mm1 \n\t" |
"pand %4, %%mm2 \n\t" |
"psllq $5, %%mm0 \n\t" |
"psrlq $1, %%mm2 \n\t" |
"pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" |
"pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" |
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"movq %%mm1, %%mm4 \n\t" |
"movq %%mm2, %%mm5 \n\t" |
"punpcklwd %5, %%mm0 \n\t" |
"punpcklwd %5, %%mm1 \n\t" |
"punpcklwd %5, %%mm2 \n\t" |
"punpckhwd %5, %%mm3 \n\t" |
"punpckhwd %5, %%mm4 \n\t" |
"punpckhwd %5, %%mm5 \n\t" |
"psllq $8, %%mm1 \n\t" |
"psllq $16, %%mm2 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"psllq $8, %%mm4 \n\t" |
"psllq $16, %%mm5 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm5, %%mm3 \n\t" |
"movq %%mm0, %%mm6 \n\t" |
"movq %%mm3, %%mm7 \n\t" |
"movq 8(%1), %%mm0 \n\t" |
"movq 8(%1), %%mm1 \n\t" |
"movq 8(%1), %%mm2 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %3, %%mm1 \n\t" |
"pand %4, %%mm2 \n\t" |
"psllq $5, %%mm0 \n\t" |
"psrlq $1, %%mm2 \n\t" |
"pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" |
"pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" |
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"movq %%mm1, %%mm4 \n\t" |
"movq %%mm2, %%mm5 \n\t" |
"punpcklwd %5, %%mm0 \n\t" |
"punpcklwd %5, %%mm1 \n\t" |
"punpcklwd %5, %%mm2 \n\t" |
"punpckhwd %5, %%mm3 \n\t" |
"punpckhwd %5, %%mm4 \n\t" |
"punpckhwd %5, %%mm5 \n\t" |
"psllq $8, %%mm1 \n\t" |
"psllq $16, %%mm2 \n\t" |
"por %%mm1, %%mm0 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"psllq $8, %%mm4 \n\t" |
"psllq $16, %%mm5 \n\t" |
"por %%mm4, %%mm3 \n\t" |
"por %%mm5, %%mm3 \n\t" |
:"=m"(*d) |
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) |
NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi) |
:"memory"); |
/* borrowed 32 to 24 */ |
__asm__ volatile( |
"movq %%mm0, %%mm4 \n\t" |
"movq %%mm3, %%mm5 \n\t" |
"movq %%mm6, %%mm0 \n\t" |
"movq %%mm7, %%mm1 \n\t" |
"movq %%mm4, %%mm6 \n\t" |
"movq %%mm5, %%mm7 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm1, %%mm3 \n\t" |
STORE_BGR24_MMX |
:: "r"(d), "m"(*s) |
NAMED_CONSTRAINTS_ADD(mask24l,mask24h) |
:"memory"); |
d += 24; |
s += 8; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register uint16_t bgr; |
bgr = *s++; |
*d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); |
*d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
} |
} |
/* |
* mm0 = 00 B3 00 B2 00 B1 00 B0 |
* mm1 = 00 G3 00 G2 00 G1 00 G0 |
* mm2 = 00 R3 00 R2 00 R1 00 R0 |
* mm6 = FF FF FF FF FF FF FF FF |
* mm7 = 00 00 00 00 00 00 00 00 |
*/ |
#define PACK_RGB32 \ |
"packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ |
"packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ |
"packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ |
"punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ |
"punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ |
"movq %%mm0, %%mm3 \n\t" \ |
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ |
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ |
MOVNTQ" %%mm0, (%0) \n\t" \ |
MOVNTQ" %%mm3, 8(%0) \n\t" \ |
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint16_t *end; |
const uint16_t *mm_end; |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t *)src; |
end = s + src_size/2; |
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
__asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); |
__asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
mm_end = end - 3; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq (%1), %%mm1 \n\t" |
"movq (%1), %%mm2 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %3, %%mm1 \n\t" |
"pand %4, %%mm2 \n\t" |
"psllq $5, %%mm0 \n\t" |
"pmulhw %5, %%mm0 \n\t" |
"pmulhw %5, %%mm1 \n\t" |
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
PACK_RGB32 |
::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) |
NAMED_CONSTRAINTS_ADD(mul15_hi) |
:"memory"); |
d += 16; |
s += 4; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register uint16_t bgr; |
bgr = *s++; |
*d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); |
*d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); |
*d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
*d++ = 255; |
} |
} |
static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
const uint16_t *end; |
const uint16_t *mm_end; |
uint8_t *d = dst; |
const uint16_t *s = (const uint16_t*)src; |
end = s + src_size/2; |
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
__asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); |
__asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
mm_end = end - 3; |
while (s < mm_end) { |
__asm__ volatile( |
PREFETCH" 32(%1) \n\t" |
"movq (%1), %%mm0 \n\t" |
"movq (%1), %%mm1 \n\t" |
"movq (%1), %%mm2 \n\t" |
"pand %2, %%mm0 \n\t" |
"pand %3, %%mm1 \n\t" |
"pand %4, %%mm2 \n\t" |
"psllq $5, %%mm0 \n\t" |
"psrlq $1, %%mm2 \n\t" |
"pmulhw %5, %%mm0 \n\t" |
"pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" |
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
PACK_RGB32 |
::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) |
NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi) |
:"memory"); |
d += 16; |
s += 4; |
} |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
while (s < end) { |
register uint16_t bgr; |
bgr = *s++; |
*d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); |
*d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); |
*d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
*d++ = 255; |
} |
} |
static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
x86_reg idx = 15 - src_size; |
const uint8_t *s = src-idx; |
uint8_t *d = dst-idx; |
__asm__ volatile( |
"test %0, %0 \n\t" |
"jns 2f \n\t" |
PREFETCH" (%1, %0) \n\t" |
"movq %3, %%mm7 \n\t" |
"pxor %4, %%mm7 \n\t" |
"movq %%mm7, %%mm6 \n\t" |
"pxor %5, %%mm7 \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 32(%1, %0) \n\t" |
"movq (%1, %0), %%mm0 \n\t" |
"movq 8(%1, %0), %%mm1 \n\t" |
# if COMPILE_TEMPLATE_MMXEXT |
"pshufw $177, %%mm0, %%mm3 \n\t" |
"pshufw $177, %%mm1, %%mm5 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm6, %%mm3 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm6, %%mm5 \n\t" |
"por %%mm3, %%mm0 \n\t" |
"por %%mm5, %%mm1 \n\t" |
# else |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm1, %%mm4 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm6, %%mm2 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm6, %%mm4 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"movq %%mm4, %%mm5 \n\t" |
"pslld $16, %%mm2 \n\t" |
"psrld $16, %%mm3 \n\t" |
"pslld $16, %%mm4 \n\t" |
"psrld $16, %%mm5 \n\t" |
"por %%mm2, %%mm0 \n\t" |
"por %%mm4, %%mm1 \n\t" |
"por %%mm3, %%mm0 \n\t" |
"por %%mm5, %%mm1 \n\t" |
# endif |
MOVNTQ" %%mm0, (%2, %0) \n\t" |
MOVNTQ" %%mm1, 8(%2, %0) \n\t" |
"add $16, %0 \n\t" |
"js 1b \n\t" |
SFENCE" \n\t" |
EMMS" \n\t" |
"2: \n\t" |
: "+&r"(idx) |
: "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) |
: "memory"); |
for (; idx<15; idx+=4) { |
register unsigned v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; |
v &= 0xff00ff; |
*(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); |
} |
} |
static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) |
{ |
unsigned i; |
x86_reg mmx_size= 23 - src_size; |
__asm__ volatile ( |
"test %%"REG_a", %%"REG_a" \n\t" |
"jns 2f \n\t" |
"movq "MANGLE(mask24r)", %%mm5 \n\t" |
"movq "MANGLE(mask24g)", %%mm6 \n\t" |
"movq "MANGLE(mask24b)", %%mm7 \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 32(%1, %%"REG_a") \n\t" |
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG |
"movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG |
"movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B |
"psllq $16, %%mm0 \n\t" // 00 BGR BGR |
"pand %%mm5, %%mm0 \n\t" |
"pand %%mm6, %%mm1 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"por %%mm0, %%mm1 \n\t" |
"por %%mm2, %%mm1 \n\t" |
"movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG |
MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG |
"movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B |
"movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm5, %%mm1 \n\t" |
"pand %%mm6, %%mm2 \n\t" |
"por %%mm0, %%mm1 \n\t" |
"por %%mm2, %%mm1 \n\t" |
"movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B |
MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R |
"movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR |
"movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG |
"pand %%mm6, %%mm0 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm5, %%mm2 \n\t" |
"por %%mm0, %%mm1 \n\t" |
"por %%mm2, %%mm1 \n\t" |
MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" |
"add $24, %%"REG_a" \n\t" |
" js 1b \n\t" |
"2: \n\t" |
: "+a" (mmx_size) |
: "r" (src-mmx_size), "r"(dst-mmx_size) |
NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b) |
); |
__asm__ volatile(SFENCE:::"memory"); |
__asm__ volatile(EMMS:::"memory"); |
if (mmx_size==23) return; //finished, was multiple of 8 |
src+= src_size; |
dst+= src_size; |
src_size= 23-mmx_size; |
src-= src_size; |
dst-= src_size; |
for (i=0; i<src_size; i+=3) { |
register uint8_t x; |
x = src[i + 2]; |
dst[i + 1] = src[i + 1]; |
dst[i + 2] = src[i + 0]; |
dst[i + 0] = x; |
} |
} |
static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride, int vertLumPerChroma) |
{ |
int y; |
const x86_reg chromWidth= width>>1; |
for (y=0; y<height; y++) { |
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
__asm__ volatile( |
"xor %%"REG_a", %%"REG_a" \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 32(%1, %%"REG_a", 2) \n\t" |
PREFETCH" 32(%2, %%"REG_a") \n\t" |
PREFETCH" 32(%3, %%"REG_a") \n\t" |
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) |
"movq %%mm0, %%mm2 \n\t" // U(0) |
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) |
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) |
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) |
"movq %%mm3, %%mm4 \n\t" // Y(0) |
"movq %%mm5, %%mm6 \n\t" // Y(8) |
"punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) |
"punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) |
"punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) |
"punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) |
MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" |
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" |
MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" |
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" |
"add $8, %%"REG_a" \n\t" |
"cmp %4, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) |
: "%"REG_a |
); |
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
usrc += chromStride; |
vsrc += chromStride; |
} |
ysrc += lumStride; |
dst += dstStride; |
} |
__asm__(EMMS" \n\t" |
SFENCE" \n\t" |
:::"memory"); |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride) |
{ |
//FIXME interpolate chroma |
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
} |
static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride, int vertLumPerChroma) |
{ |
int y; |
const x86_reg chromWidth= width>>1; |
for (y=0; y<height; y++) { |
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
__asm__ volatile( |
"xor %%"REG_a", %%"REG_a" \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 32(%1, %%"REG_a", 2) \n\t" |
PREFETCH" 32(%2, %%"REG_a") \n\t" |
PREFETCH" 32(%3, %%"REG_a") \n\t" |
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) |
"movq %%mm0, %%mm2 \n\t" // U(0) |
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) |
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) |
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) |
"movq %%mm0, %%mm4 \n\t" // Y(0) |
"movq %%mm2, %%mm6 \n\t" // Y(8) |
"punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) |
"punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) |
"punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) |
"punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) |
MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" |
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" |
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" |
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" |
"add $8, %%"REG_a" \n\t" |
"cmp %4, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) |
: "%"REG_a |
); |
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
usrc += chromStride; |
vsrc += chromStride; |
} |
ysrc += lumStride; |
dst += dstStride; |
} |
__asm__(EMMS" \n\t" |
SFENCE" \n\t" |
:::"memory"); |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16 |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride) |
{ |
//FIXME interpolate chroma |
RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
} |
/** |
* Width should be a multiple of 16. |
*/ |
static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride) |
{ |
RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); |
} |
/** |
* Width should be a multiple of 16. |
*/ |
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
int width, int height, |
int lumStride, int chromStride, int dstStride) |
{ |
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); |
} |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
*/ |
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const x86_reg chromWidth= width>>1; |
for (y=0; y<height; y+=2) { |
__asm__ volatile( |
"xor %%"REG_a", %%"REG_a" \n\t" |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) |
"movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) |
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) |
"psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) |
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) |
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) |
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) |
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) |
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) |
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) |
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) |
"movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) |
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) |
"psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) |
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) |
"pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) |
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) |
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) |
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) |
"psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) |
"psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) |
"pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) |
"pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) |
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) |
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) |
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" |
"add $8, %%"REG_a" \n\t" |
"cmp %4, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
: "memory", "%"REG_a |
); |
ydst += lumStride; |
src += srcStride; |
__asm__ volatile( |
"xor %%"REG_a", %%"REG_a" \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) |
"movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) |
"movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) |
"pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) |
"pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) |
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) |
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) |
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) |
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) |
MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" |
"add $8, %%"REG_a" \n\t" |
"cmp %4, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
: "memory", "%"REG_a |
); |
udst += chromStride; |
vdst += chromStride; |
ydst += lumStride; |
src += srcStride; |
} |
__asm__ volatile(EMMS" \n\t" |
SFENCE" \n\t" |
:::"memory"); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW |
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) |
{ |
int x,y; |
dst[0]= src[0]; |
// first line |
for (x=0; x<srcWidth-1; x++) { |
dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
} |
dst[2*srcWidth-1]= src[srcWidth-1]; |
dst+= dstStride; |
for (y=1; y<srcHeight; y++) { |
const x86_reg mmxSize= srcWidth&~15; |
__asm__ volatile( |
"mov %4, %%"REG_a" \n\t" |
"movq "MANGLE(mmx_ff)", %%mm0 \n\t" |
"movq (%0, %%"REG_a"), %%mm4 \n\t" |
"movq %%mm4, %%mm2 \n\t" |
"psllq $8, %%mm4 \n\t" |
"pand %%mm0, %%mm2 \n\t" |
"por %%mm2, %%mm4 \n\t" |
"movq (%1, %%"REG_a"), %%mm5 \n\t" |
"movq %%mm5, %%mm3 \n\t" |
"psllq $8, %%mm5 \n\t" |
"pand %%mm0, %%mm3 \n\t" |
"por %%mm3, %%mm5 \n\t" |
"1: \n\t" |
"movq (%0, %%"REG_a"), %%mm0 \n\t" |
"movq (%1, %%"REG_a"), %%mm1 \n\t" |
"movq 1(%0, %%"REG_a"), %%mm2 \n\t" |
"movq 1(%1, %%"REG_a"), %%mm3 \n\t" |
PAVGB" %%mm0, %%mm5 \n\t" |
PAVGB" %%mm0, %%mm3 \n\t" |
PAVGB" %%mm0, %%mm5 \n\t" |
PAVGB" %%mm0, %%mm3 \n\t" |
PAVGB" %%mm1, %%mm4 \n\t" |
PAVGB" %%mm1, %%mm2 \n\t" |
PAVGB" %%mm1, %%mm4 \n\t" |
PAVGB" %%mm1, %%mm2 \n\t" |
"movq %%mm5, %%mm7 \n\t" |
"movq %%mm4, %%mm6 \n\t" |
"punpcklbw %%mm3, %%mm5 \n\t" |
"punpckhbw %%mm3, %%mm7 \n\t" |
"punpcklbw %%mm2, %%mm4 \n\t" |
"punpckhbw %%mm2, %%mm6 \n\t" |
MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" |
"add $8, %%"REG_a" \n\t" |
"movq -1(%0, %%"REG_a"), %%mm4 \n\t" |
"movq -1(%1, %%"REG_a"), %%mm5 \n\t" |
" js 1b \n\t" |
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), |
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), |
"g" (-mmxSize) |
NAMED_CONSTRAINTS_ADD(mmx_ff) |
: "%"REG_a |
); |
for (x=mmxSize-1; x<srcWidth-1; x++) { |
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
} |
dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; |
dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; |
dst+=dstStride*2; |
src+=srcStride; |
} |
// last line |
dst[0]= src[0]; |
for (x=0; x<srcWidth-1; x++) { |
dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
} |
dst[2*srcWidth-1]= src[srcWidth-1]; |
__asm__ volatile(EMMS" \n\t" |
SFENCE" \n\t" |
:::"memory"); |
} |
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ |
#if !COMPILE_TEMPLATE_AMD3DNOW |
/** |
* Height should be a multiple of 2 and width should be a multiple of 16. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
* Chrominance data is only taken from every second line, others are ignored. |
* FIXME: Write HQ version. |
*/ |
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const x86_reg chromWidth= width>>1; |
for (y=0; y<height; y+=2) { |
__asm__ volatile( |
"xor %%"REG_a", %%"REG_a" \n\t" |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) |
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) |
"movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) |
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) |
"pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) |
"pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) |
"psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) |
"psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) |
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) |
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) |
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) |
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) |
"movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) |
"pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) |
"pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) |
"psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) |
"psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) |
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) |
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) |
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) |
"psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) |
"psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) |
"pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) |
"pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) |
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) |
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) |
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" |
"add $8, %%"REG_a" \n\t" |
"cmp %4, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
: "memory", "%"REG_a |
); |
ydst += lumStride; |
src += srcStride; |
__asm__ volatile( |
"xor %%"REG_a", %%"REG_a" \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) |
"movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) |
"movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) |
"psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) |
"psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) |
"psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) |
"psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) |
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) |
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) |
MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" |
"add $8, %%"REG_a" \n\t" |
"cmp %4, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
: "memory", "%"REG_a |
); |
udst += chromStride; |
vdst += chromStride; |
ydst += lumStride; |
src += srcStride; |
} |
__asm__ volatile(EMMS" \n\t" |
SFENCE" \n\t" |
:::"memory"); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
/** |
* Height should be a multiple of 2 and width should be a multiple of 2. |
* (If this is a problem for anyone then tell me, and I will fix it.) |
* Chrominance data is only taken from every second line, |
* others are ignored in the C version. |
* FIXME: Write HQ version. |
*/ |
#if HAVE_7REGS |
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
int width, int height, |
int lumStride, int chromStride, int srcStride, |
int32_t *rgb2yuv) |
{ |
#define BGR2Y_IDX "16*4+16*32" |
#define BGR2U_IDX "16*4+16*33" |
#define BGR2V_IDX "16*4+16*34" |
int y; |
const x86_reg chromWidth= width>>1; |
if (height > 2) { |
ff_rgb24toyv12_c(src, ydst, udst, vdst, width, 2, lumStride, chromStride, srcStride, rgb2yuv); |
src += 2*srcStride; |
ydst += 2*lumStride; |
udst += chromStride; |
vdst += chromStride; |
height -= 2; |
} |
for (y=0; y<height-2; y+=2) { |
int i; |
for (i=0; i<2; i++) { |
__asm__ volatile( |
"mov %2, %%"REG_a" \n\t" |
"movq "BGR2Y_IDX"(%3), %%mm6 \n\t" |
"movq "MANGLE(ff_w1111)", %%mm5 \n\t" |
"pxor %%mm7, %%mm7 \n\t" |
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 64(%0, %%"REG_d") \n\t" |
"movd (%0, %%"REG_d"), %%mm0 \n\t" |
"movd 3(%0, %%"REG_d"), %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm0 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"movd 6(%0, %%"REG_d"), %%mm2 \n\t" |
"movd 9(%0, %%"REG_d"), %%mm3 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm3 \n\t" |
"pmaddwd %%mm6, %%mm0 \n\t" |
"pmaddwd %%mm6, %%mm1 \n\t" |
"pmaddwd %%mm6, %%mm2 \n\t" |
"pmaddwd %%mm6, %%mm3 \n\t" |
"psrad $8, %%mm0 \n\t" |
"psrad $8, %%mm1 \n\t" |
"psrad $8, %%mm2 \n\t" |
"psrad $8, %%mm3 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"pmaddwd %%mm5, %%mm0 \n\t" |
"pmaddwd %%mm5, %%mm2 \n\t" |
"packssdw %%mm2, %%mm0 \n\t" |
"psraw $7, %%mm0 \n\t" |
"movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
"movd 15(%0, %%"REG_d"), %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm4 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"movd 18(%0, %%"REG_d"), %%mm2 \n\t" |
"movd 21(%0, %%"REG_d"), %%mm3 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm3 \n\t" |
"pmaddwd %%mm6, %%mm4 \n\t" |
"pmaddwd %%mm6, %%mm1 \n\t" |
"pmaddwd %%mm6, %%mm2 \n\t" |
"pmaddwd %%mm6, %%mm3 \n\t" |
"psrad $8, %%mm4 \n\t" |
"psrad $8, %%mm1 \n\t" |
"psrad $8, %%mm2 \n\t" |
"psrad $8, %%mm3 \n\t" |
"packssdw %%mm1, %%mm4 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"pmaddwd %%mm5, %%mm4 \n\t" |
"pmaddwd %%mm5, %%mm2 \n\t" |
"add $24, %%"REG_d" \n\t" |
"packssdw %%mm2, %%mm4 \n\t" |
"psraw $7, %%mm4 \n\t" |
"packuswb %%mm4, %%mm0 \n\t" |
"paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" |
MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" |
"add $8, %%"REG_a" \n\t" |
" js 1b \n\t" |
: : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) |
NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset) |
: "%"REG_a, "%"REG_d |
); |
ydst += lumStride; |
src += srcStride; |
} |
src -= srcStride*2; |
__asm__ volatile( |
"mov %4, %%"REG_a" \n\t" |
"movq "MANGLE(ff_w1111)", %%mm5 \n\t" |
"movq "BGR2U_IDX"(%5), %%mm6 \n\t" |
"pxor %%mm7, %%mm7 \n\t" |
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" |
"add %%"REG_d", %%"REG_d" \n\t" |
".p2align 4 \n\t" |
"1: \n\t" |
PREFETCH" 64(%0, %%"REG_d") \n\t" |
PREFETCH" 64(%1, %%"REG_d") \n\t" |
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW |
"movq (%0, %%"REG_d"), %%mm0 \n\t" |
"movq (%1, %%"REG_d"), %%mm1 \n\t" |
"movq 6(%0, %%"REG_d"), %%mm2 \n\t" |
"movq 6(%1, %%"REG_d"), %%mm3 \n\t" |
PAVGB" %%mm1, %%mm0 \n\t" |
PAVGB" %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlq $24, %%mm0 \n\t" |
"psrlq $24, %%mm2 \n\t" |
PAVGB" %%mm1, %%mm0 \n\t" |
PAVGB" %%mm3, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm0 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
#else |
"movd (%0, %%"REG_d"), %%mm0 \n\t" |
"movd (%1, %%"REG_d"), %%mm1 \n\t" |
"movd 3(%0, %%"REG_d"), %%mm2 \n\t" |
"movd 3(%1, %%"REG_d"), %%mm3 \n\t" |
"punpcklbw %%mm7, %%mm0 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm3 \n\t" |
"paddw %%mm1, %%mm0 \n\t" |
"paddw %%mm3, %%mm2 \n\t" |
"paddw %%mm2, %%mm0 \n\t" |
"movd 6(%0, %%"REG_d"), %%mm4 \n\t" |
"movd 6(%1, %%"REG_d"), %%mm1 \n\t" |
"movd 9(%0, %%"REG_d"), %%mm2 \n\t" |
"movd 9(%1, %%"REG_d"), %%mm3 \n\t" |
"punpcklbw %%mm7, %%mm4 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm3 \n\t" |
"paddw %%mm1, %%mm4 \n\t" |
"paddw %%mm3, %%mm2 \n\t" |
"paddw %%mm4, %%mm2 \n\t" |
"psrlw $2, %%mm0 \n\t" |
"psrlw $2, %%mm2 \n\t" |
#endif |
"movq "BGR2V_IDX"(%5), %%mm1 \n\t" |
"movq "BGR2V_IDX"(%5), %%mm3 \n\t" |
"pmaddwd %%mm0, %%mm1 \n\t" |
"pmaddwd %%mm2, %%mm3 \n\t" |
"pmaddwd %%mm6, %%mm0 \n\t" |
"pmaddwd %%mm6, %%mm2 \n\t" |
"psrad $8, %%mm0 \n\t" |
"psrad $8, %%mm1 \n\t" |
"psrad $8, %%mm2 \n\t" |
"psrad $8, %%mm3 \n\t" |
"packssdw %%mm2, %%mm0 \n\t" |
"packssdw %%mm3, %%mm1 \n\t" |
"pmaddwd %%mm5, %%mm0 \n\t" |
"pmaddwd %%mm5, %%mm1 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 |
"psraw $7, %%mm0 \n\t" |
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW |
"movq 12(%0, %%"REG_d"), %%mm4 \n\t" |
"movq 12(%1, %%"REG_d"), %%mm1 \n\t" |
"movq 18(%0, %%"REG_d"), %%mm2 \n\t" |
"movq 18(%1, %%"REG_d"), %%mm3 \n\t" |
PAVGB" %%mm1, %%mm4 \n\t" |
PAVGB" %%mm3, %%mm2 \n\t" |
"movq %%mm4, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlq $24, %%mm4 \n\t" |
"psrlq $24, %%mm2 \n\t" |
PAVGB" %%mm1, %%mm4 \n\t" |
PAVGB" %%mm3, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm4 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
#else |
"movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
"movd 12(%1, %%"REG_d"), %%mm1 \n\t" |
"movd 15(%0, %%"REG_d"), %%mm2 \n\t" |
"movd 15(%1, %%"REG_d"), %%mm3 \n\t" |
"punpcklbw %%mm7, %%mm4 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm3 \n\t" |
"paddw %%mm1, %%mm4 \n\t" |
"paddw %%mm3, %%mm2 \n\t" |
"paddw %%mm2, %%mm4 \n\t" |
"movd 18(%0, %%"REG_d"), %%mm5 \n\t" |
"movd 18(%1, %%"REG_d"), %%mm1 \n\t" |
"movd 21(%0, %%"REG_d"), %%mm2 \n\t" |
"movd 21(%1, %%"REG_d"), %%mm3 \n\t" |
"punpcklbw %%mm7, %%mm5 \n\t" |
"punpcklbw %%mm7, %%mm1 \n\t" |
"punpcklbw %%mm7, %%mm2 \n\t" |
"punpcklbw %%mm7, %%mm3 \n\t" |
"paddw %%mm1, %%mm5 \n\t" |
"paddw %%mm3, %%mm2 \n\t" |
"paddw %%mm5, %%mm2 \n\t" |
"movq "MANGLE(ff_w1111)", %%mm5 \n\t" |
"psrlw $2, %%mm4 \n\t" |
"psrlw $2, %%mm2 \n\t" |
#endif |
"movq "BGR2V_IDX"(%5), %%mm1 \n\t" |
"movq "BGR2V_IDX"(%5), %%mm3 \n\t" |
"pmaddwd %%mm4, %%mm1 \n\t" |
"pmaddwd %%mm2, %%mm3 \n\t" |
"pmaddwd %%mm6, %%mm4 \n\t" |
"pmaddwd %%mm6, %%mm2 \n\t" |
"psrad $8, %%mm4 \n\t" |
"psrad $8, %%mm1 \n\t" |
"psrad $8, %%mm2 \n\t" |
"psrad $8, %%mm3 \n\t" |
"packssdw %%mm2, %%mm4 \n\t" |
"packssdw %%mm3, %%mm1 \n\t" |
"pmaddwd %%mm5, %%mm4 \n\t" |
"pmaddwd %%mm5, %%mm1 \n\t" |
"add $24, %%"REG_d" \n\t" |
"packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 |
"psraw $7, %%mm4 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"punpckldq %%mm4, %%mm0 \n\t" |
"punpckhdq %%mm4, %%mm1 \n\t" |
"packsswb %%mm1, %%mm0 \n\t" |
"paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" |
"movd %%mm0, (%2, %%"REG_a") \n\t" |
"punpckhdq %%mm0, %%mm0 \n\t" |
"movd %%mm0, (%3, %%"REG_a") \n\t" |
"add $4, %%"REG_a" \n\t" |
" js 1b \n\t" |
: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) |
NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset) |
: "%"REG_a, "%"REG_d |
); |
udst += chromStride; |
vdst += chromStride; |
src += srcStride*2; |
} |
__asm__ volatile(EMMS" \n\t" |
SFENCE" \n\t" |
:::"memory"); |
ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv); |
} |
#endif /* HAVE_7REGS */ |
#endif /* !COMPILE_TEMPLATE_SSE2 */ |
#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX |
static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, |
int width, int height, int src1Stride, |
int src2Stride, int dstStride) |
{ |
int h; |
for (h=0; h < height; h++) { |
int w; |
if (width >= 16) { |
#if COMPILE_TEMPLATE_SSE2 |
if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) { |
__asm__( |
"xor %%"REG_a", %%"REG_a" \n\t" |
"1: \n\t" |
PREFETCH" 64(%1, %%"REG_a") \n\t" |
PREFETCH" 64(%2, %%"REG_a") \n\t" |
"movdqa (%1, %%"REG_a"), %%xmm0 \n\t" |
"movdqa (%1, %%"REG_a"), %%xmm1 \n\t" |
"movdqa (%2, %%"REG_a"), %%xmm2 \n\t" |
"punpcklbw %%xmm2, %%xmm0 \n\t" |
"punpckhbw %%xmm2, %%xmm1 \n\t" |
"movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" |
"movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" |
"add $16, %%"REG_a" \n\t" |
"cmp %3, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) |
: "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"REG_a |
); |
} else |
#endif |
__asm__( |
"xor %%"REG_a", %%"REG_a" \n\t" |
"1: \n\t" |
PREFETCH" 64(%1, %%"REG_a") \n\t" |
PREFETCH" 64(%2, %%"REG_a") \n\t" |
"movq (%1, %%"REG_a"), %%mm0 \n\t" |
"movq 8(%1, %%"REG_a"), %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"movq (%2, %%"REG_a"), %%mm4 \n\t" |
"movq 8(%2, %%"REG_a"), %%mm5 \n\t" |
"punpcklbw %%mm4, %%mm0 \n\t" |
"punpckhbw %%mm4, %%mm1 \n\t" |
"punpcklbw %%mm5, %%mm2 \n\t" |
"punpckhbw %%mm5, %%mm3 \n\t" |
MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" |
MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" |
"add $16, %%"REG_a" \n\t" |
"cmp %3, %%"REG_a" \n\t" |
" jb 1b \n\t" |
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) |
: "memory", "%"REG_a |
); |
} |
for (w= (width&(~15)); w < width; w++) { |
dest[2*w+0] = src1[w]; |
dest[2*w+1] = src2[w]; |
} |
dest += dstStride; |
src1 += src1Stride; |
src2 += src2Stride; |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ |
#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL |
#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM |
void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, |
const uint8_t *unused, |
const uint8_t *src1, |
const uint8_t *src2, |
int w, |
uint32_t *unused2); |
static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, |
int width, int height, int srcStride, |
int dst1Stride, int dst2Stride) |
{ |
int h; |
for (h = 0; h < height; h++) { |
RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL); |
src += srcStride; |
dst1 += dst1Stride; |
dst2 += dst2Stride; |
} |
__asm__( |
#if !COMPILE_TEMPLATE_SSE2 |
EMMS" \n\t" |
#endif |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ |
#if !COMPILE_TEMPLATE_SSE2 |
#if !COMPILE_TEMPLATE_AMD3DNOW |
static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
uint8_t *dst1, uint8_t *dst2, |
int width, int height, |
int srcStride1, int srcStride2, |
int dstStride1, int dstStride2) |
{ |
x86_reg x, y; |
int w,h; |
w=width/2; h=height/2; |
__asm__ volatile( |
PREFETCH" %0 \n\t" |
PREFETCH" %1 \n\t" |
::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); |
for (y=0;y<h;y++) { |
const uint8_t* s1=src1+srcStride1*(y>>1); |
uint8_t* d=dst1+dstStride1*y; |
x=0; |
for (;x<w-31;x+=32) { |
__asm__ volatile( |
PREFETCH" 32(%1,%2) \n\t" |
"movq (%1,%2), %%mm0 \n\t" |
"movq 8(%1,%2), %%mm2 \n\t" |
"movq 16(%1,%2), %%mm4 \n\t" |
"movq 24(%1,%2), %%mm6 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"movq %%mm4, %%mm5 \n\t" |
"movq %%mm6, %%mm7 \n\t" |
"punpcklbw %%mm0, %%mm0 \n\t" |
"punpckhbw %%mm1, %%mm1 \n\t" |
"punpcklbw %%mm2, %%mm2 \n\t" |
"punpckhbw %%mm3, %%mm3 \n\t" |
"punpcklbw %%mm4, %%mm4 \n\t" |
"punpckhbw %%mm5, %%mm5 \n\t" |
"punpcklbw %%mm6, %%mm6 \n\t" |
"punpckhbw %%mm7, %%mm7 \n\t" |
MOVNTQ" %%mm0, (%0,%2,2) \n\t" |
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" |
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" |
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" |
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" |
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" |
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" |
MOVNTQ" %%mm7, 56(%0,%2,2)" |
:: "r"(d), "r"(s1), "r"(x) |
:"memory"); |
} |
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
} |
for (y=0;y<h;y++) { |
const uint8_t* s2=src2+srcStride2*(y>>1); |
uint8_t* d=dst2+dstStride2*y; |
x=0; |
for (;x<w-31;x+=32) { |
__asm__ volatile( |
PREFETCH" 32(%1,%2) \n\t" |
"movq (%1,%2), %%mm0 \n\t" |
"movq 8(%1,%2), %%mm2 \n\t" |
"movq 16(%1,%2), %%mm4 \n\t" |
"movq 24(%1,%2), %%mm6 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"movq %%mm4, %%mm5 \n\t" |
"movq %%mm6, %%mm7 \n\t" |
"punpcklbw %%mm0, %%mm0 \n\t" |
"punpckhbw %%mm1, %%mm1 \n\t" |
"punpcklbw %%mm2, %%mm2 \n\t" |
"punpckhbw %%mm3, %%mm3 \n\t" |
"punpcklbw %%mm4, %%mm4 \n\t" |
"punpckhbw %%mm5, %%mm5 \n\t" |
"punpcklbw %%mm6, %%mm6 \n\t" |
"punpckhbw %%mm7, %%mm7 \n\t" |
MOVNTQ" %%mm0, (%0,%2,2) \n\t" |
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" |
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" |
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" |
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" |
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" |
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" |
MOVNTQ" %%mm7, 56(%0,%2,2)" |
:: "r"(d), "r"(s2), "r"(x) |
:"memory"); |
} |
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, |
uint8_t *dst, |
int width, int height, |
int srcStride1, int srcStride2, |
int srcStride3, int dstStride) |
{ |
x86_reg x; |
int y,w,h; |
w=width/2; h=height; |
for (y=0;y<h;y++) { |
const uint8_t* yp=src1+srcStride1*y; |
const uint8_t* up=src2+srcStride2*(y>>2); |
const uint8_t* vp=src3+srcStride3*(y>>2); |
uint8_t* d=dst+dstStride*y; |
x=0; |
for (;x<w-7;x+=8) { |
__asm__ volatile( |
PREFETCH" 32(%1, %0) \n\t" |
PREFETCH" 32(%2, %0) \n\t" |
PREFETCH" 32(%3, %0) \n\t" |
"movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
"movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ |
"movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ |
"movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
"movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ |
"movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ |
"punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ |
"punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ |
"punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ |
"punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ |
"movq %%mm1, %%mm6 \n\t" |
"punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ |
"punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ |
"punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ |
MOVNTQ" %%mm0, (%4, %0, 8) \n\t" |
MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" |
"punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ |
"movq 8(%1, %0, 4), %%mm0 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ |
"punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ |
MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" |
MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" |
"movq %%mm4, %%mm6 \n\t" |
"movq 16(%1, %0, 4), %%mm0 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"punpcklbw %%mm5, %%mm4 \n\t" |
"punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ |
"punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ |
MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" |
MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" |
"punpckhbw %%mm5, %%mm6 \n\t" |
"movq 24(%1, %0, 4), %%mm0 \n\t" |
"movq %%mm0, %%mm3 \n\t" |
"punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ |
"punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ |
MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" |
MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" |
: "+r" (x) |
: "r"(yp), "r" (up), "r"(vp), "r"(d) |
:"memory"); |
} |
for (; x<w; x++) { |
const int x2 = x<<2; |
d[8*x+0] = yp[x2]; |
d[8*x+1] = up[x]; |
d[8*x+2] = yp[x2+1]; |
d[8*x+3] = vp[x]; |
d[8*x+4] = yp[x2+2]; |
d[8*x+5] = up[x]; |
d[8*x+6] = yp[x2+3]; |
d[8*x+7] = vp[x]; |
} |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) |
{ |
dst += count; |
src += 2*count; |
count= - count; |
if(count <= -16) { |
count += 15; |
__asm__ volatile( |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" |
"1: \n\t" |
"movq -30(%1, %0, 2), %%mm0 \n\t" |
"movq -22(%1, %0, 2), %%mm1 \n\t" |
"movq -14(%1, %0, 2), %%mm2 \n\t" |
"movq -6(%1, %0, 2), %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm1, %%mm0 \n\t" |
"packuswb %%mm3, %%mm2 \n\t" |
MOVNTQ" %%mm0,-15(%2, %0) \n\t" |
MOVNTQ" %%mm2,- 7(%2, %0) \n\t" |
"add $16, %0 \n\t" |
" js 1b \n\t" |
: "+r"(count) |
: "r"(src), "r"(dst) |
); |
count -= 15; |
} |
while(count<0) { |
dst[count]= src[2*count]; |
count++; |
} |
} |
static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count) |
{ |
src ++; |
dst += count; |
src += 2*count; |
count= - count; |
if(count < -16) { |
count += 16; |
__asm__ volatile( |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" |
"1: \n\t" |
"movq -32(%1, %0, 2), %%mm0 \n\t" |
"movq -24(%1, %0, 2), %%mm1 \n\t" |
"movq -16(%1, %0, 2), %%mm2 \n\t" |
"movq -8(%1, %0, 2), %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm1, %%mm0 \n\t" |
"packuswb %%mm3, %%mm2 \n\t" |
MOVNTQ" %%mm0,-16(%2, %0) \n\t" |
MOVNTQ" %%mm2,- 8(%2, %0) \n\t" |
"add $16, %0 \n\t" |
" js 1b \n\t" |
: "+r"(count) |
: "r"(src), "r"(dst) |
); |
count -= 16; |
} |
while(count<0) { |
dst[count]= src[2*count]; |
count++; |
} |
} |
#if !COMPILE_TEMPLATE_AMD3DNOW |
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
{ |
dst0+= count; |
dst1+= count; |
src += 4*count; |
count= - count; |
if(count <= -8) { |
count += 7; |
__asm__ volatile( |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" |
"1: \n\t" |
"movq -28(%1, %0, 4), %%mm0 \n\t" |
"movq -20(%1, %0, 4), %%mm1 \n\t" |
"movq -12(%1, %0, 4), %%mm2 \n\t" |
"movq -4(%1, %0, 4), %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm1, %%mm0 \n\t" |
"packuswb %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlw $8, %%mm0 \n\t" |
"psrlw $8, %%mm2 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm2, %%mm0 \n\t" |
"packuswb %%mm3, %%mm1 \n\t" |
MOVNTQ" %%mm0,- 7(%3, %0) \n\t" |
MOVNTQ" %%mm1,- 7(%2, %0) \n\t" |
"add $8, %0 \n\t" |
" js 1b \n\t" |
: "+r"(count) |
: "r"(src), "r"(dst0), "r"(dst1) |
); |
count -= 7; |
} |
while(count<0) { |
dst0[count]= src[4*count+0]; |
dst1[count]= src[4*count+2]; |
count++; |
} |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
{ |
dst0 += count; |
dst1 += count; |
src0 += 4*count; |
src1 += 4*count; |
count= - count; |
#ifdef PAVGB |
if(count <= -8) { |
count += 7; |
__asm__ volatile( |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" |
"1: \n\t" |
"movq -28(%1, %0, 4), %%mm0 \n\t" |
"movq -20(%1, %0, 4), %%mm1 \n\t" |
"movq -12(%1, %0, 4), %%mm2 \n\t" |
"movq -4(%1, %0, 4), %%mm3 \n\t" |
PAVGB" -28(%2, %0, 4), %%mm0 \n\t" |
PAVGB" -20(%2, %0, 4), %%mm1 \n\t" |
PAVGB" -12(%2, %0, 4), %%mm2 \n\t" |
PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" |
"pand %%mm7, %%mm0 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm2 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm1, %%mm0 \n\t" |
"packuswb %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlw $8, %%mm0 \n\t" |
"psrlw $8, %%mm2 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm2, %%mm0 \n\t" |
"packuswb %%mm3, %%mm1 \n\t" |
MOVNTQ" %%mm0,- 7(%4, %0) \n\t" |
MOVNTQ" %%mm1,- 7(%3, %0) \n\t" |
"add $8, %0 \n\t" |
" js 1b \n\t" |
: "+r"(count) |
: "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
); |
count -= 7; |
} |
#endif |
while(count<0) { |
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
count++; |
} |
} |
#if !COMPILE_TEMPLATE_AMD3DNOW |
static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
{ |
dst0+= count; |
dst1+= count; |
src += 4*count; |
count= - count; |
if(count <= -8) { |
count += 7; |
__asm__ volatile( |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" |
"1: \n\t" |
"movq -28(%1, %0, 4), %%mm0 \n\t" |
"movq -20(%1, %0, 4), %%mm1 \n\t" |
"movq -12(%1, %0, 4), %%mm2 \n\t" |
"movq -4(%1, %0, 4), %%mm3 \n\t" |
"psrlw $8, %%mm0 \n\t" |
"psrlw $8, %%mm1 \n\t" |
"psrlw $8, %%mm2 \n\t" |
"psrlw $8, %%mm3 \n\t" |
"packuswb %%mm1, %%mm0 \n\t" |
"packuswb %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlw $8, %%mm0 \n\t" |
"psrlw $8, %%mm2 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm2, %%mm0 \n\t" |
"packuswb %%mm3, %%mm1 \n\t" |
MOVNTQ" %%mm0,- 7(%3, %0) \n\t" |
MOVNTQ" %%mm1,- 7(%2, %0) \n\t" |
"add $8, %0 \n\t" |
" js 1b \n\t" |
: "+r"(count) |
: "r"(src), "r"(dst0), "r"(dst1) |
); |
count -= 7; |
} |
src++; |
while(count<0) { |
dst0[count]= src[4*count+0]; |
dst1[count]= src[4*count+2]; |
count++; |
} |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
{ |
dst0 += count; |
dst1 += count; |
src0 += 4*count; |
src1 += 4*count; |
count= - count; |
#ifdef PAVGB |
if(count <= -8) { |
count += 7; |
__asm__ volatile( |
"pcmpeqw %%mm7, %%mm7 \n\t" |
"psrlw $8, %%mm7 \n\t" |
"1: \n\t" |
"movq -28(%1, %0, 4), %%mm0 \n\t" |
"movq -20(%1, %0, 4), %%mm1 \n\t" |
"movq -12(%1, %0, 4), %%mm2 \n\t" |
"movq -4(%1, %0, 4), %%mm3 \n\t" |
PAVGB" -28(%2, %0, 4), %%mm0 \n\t" |
PAVGB" -20(%2, %0, 4), %%mm1 \n\t" |
PAVGB" -12(%2, %0, 4), %%mm2 \n\t" |
PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" |
"psrlw $8, %%mm0 \n\t" |
"psrlw $8, %%mm1 \n\t" |
"psrlw $8, %%mm2 \n\t" |
"psrlw $8, %%mm3 \n\t" |
"packuswb %%mm1, %%mm0 \n\t" |
"packuswb %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm2, %%mm3 \n\t" |
"psrlw $8, %%mm0 \n\t" |
"psrlw $8, %%mm2 \n\t" |
"pand %%mm7, %%mm1 \n\t" |
"pand %%mm7, %%mm3 \n\t" |
"packuswb %%mm2, %%mm0 \n\t" |
"packuswb %%mm3, %%mm1 \n\t" |
MOVNTQ" %%mm0,- 7(%4, %0) \n\t" |
MOVNTQ" %%mm1,- 7(%3, %0) \n\t" |
"add $8, %0 \n\t" |
" js 1b \n\t" |
: "+r"(count) |
: "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
); |
count -= 7; |
} |
#endif |
src0++; |
src1++; |
while(count<0) { |
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
count++; |
} |
} |
static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y=0; y<height; y++) { |
RENAME(extract_even)(src, ydst, width); |
if(y&1) { |
RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); |
udst+= chromStride; |
vdst+= chromStride; |
} |
src += srcStride; |
ydst+= lumStride; |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#if !COMPILE_TEMPLATE_AMD3DNOW |
static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y=0; y<height; y++) { |
RENAME(extract_even)(src, ydst, width); |
RENAME(extract_odd2)(src, udst, vdst, chromWidth); |
src += srcStride; |
ydst+= lumStride; |
udst+= chromStride; |
vdst+= chromStride; |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y=0; y<height; y++) { |
RENAME(extract_odd)(src, ydst, width); |
if(y&1) { |
RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); |
udst+= chromStride; |
vdst+= chromStride; |
} |
src += srcStride; |
ydst+= lumStride; |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#if !COMPILE_TEMPLATE_AMD3DNOW |
static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
int width, int height, |
int lumStride, int chromStride, int srcStride) |
{ |
int y; |
const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
for (y=0; y<height; y++) { |
RENAME(extract_odd)(src, ydst, width); |
RENAME(extract_even2)(src, udst, vdst, chromWidth); |
src += srcStride; |
ydst+= lumStride; |
udst+= chromStride; |
vdst+= chromStride; |
} |
__asm__( |
EMMS" \n\t" |
SFENCE" \n\t" |
::: "memory" |
); |
} |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
#endif /* !COMPILE_TEMPLATE_SSE2 */ |
static av_cold void RENAME(rgb2rgb_init)(void) |
{ |
#if !COMPILE_TEMPLATE_SSE2 |
#if !COMPILE_TEMPLATE_AMD3DNOW |
rgb15to16 = RENAME(rgb15to16); |
rgb15tobgr24 = RENAME(rgb15tobgr24); |
rgb15to32 = RENAME(rgb15to32); |
rgb16tobgr24 = RENAME(rgb16tobgr24); |
rgb16to32 = RENAME(rgb16to32); |
rgb16to15 = RENAME(rgb16to15); |
rgb24tobgr16 = RENAME(rgb24tobgr16); |
rgb24tobgr15 = RENAME(rgb24tobgr15); |
rgb24tobgr32 = RENAME(rgb24tobgr32); |
rgb32to16 = RENAME(rgb32to16); |
rgb32to15 = RENAME(rgb32to15); |
rgb32tobgr24 = RENAME(rgb32tobgr24); |
rgb24to15 = RENAME(rgb24to15); |
rgb24to16 = RENAME(rgb24to16); |
rgb24tobgr24 = RENAME(rgb24tobgr24); |
shuffle_bytes_2103 = RENAME(shuffle_bytes_2103); |
rgb32tobgr16 = RENAME(rgb32tobgr16); |
rgb32tobgr15 = RENAME(rgb32tobgr15); |
yv12toyuy2 = RENAME(yv12toyuy2); |
yv12touyvy = RENAME(yv12touyvy); |
yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); |
yuv422ptouyvy = RENAME(yuv422ptouyvy); |
yuy2toyv12 = RENAME(yuy2toyv12); |
vu9_to_vu12 = RENAME(vu9_to_vu12); |
yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); |
uyvytoyuv422 = RENAME(uyvytoyuv422); |
yuyvtoyuv422 = RENAME(yuyvtoyuv422); |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ |
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW |
planar2x = RENAME(planar2x); |
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ |
#if HAVE_7REGS |
ff_rgb24toyv12 = RENAME(rgb24toyv12); |
#endif /* HAVE_7REGS */ |
yuyvtoyuv420 = RENAME(yuyvtoyuv420); |
uyvytoyuv420 = RENAME(uyvytoyuv420); |
#endif /* !COMPILE_TEMPLATE_SSE2 */ |
#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX |
interleaveBytes = RENAME(interleaveBytes); |
#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ |
#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL |
#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM |
deinterleaveBytes = RENAME(deinterleaveBytes); |
#endif |
#endif |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/scale.asm |
---|
0,0 → 1,431 |
;****************************************************************************** |
;* x86-optimized horizontal line scaling functions |
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com> |
;* |
;* This file is part of FFmpeg. |
;* |
;* FFmpeg is free software; you can redistribute it and/or |
;* modify it under the terms of the GNU Lesser General Public |
;* License as published by the Free Software Foundation; either |
;* version 2.1 of the License, or (at your option) any later version. |
;* |
;* FFmpeg is distributed in the hope that it will be useful, |
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
;* Lesser General Public License for more details. |
;* |
;* You should have received a copy of the GNU Lesser General Public |
;* License along with FFmpeg; if not, write to the Free Software |
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
;****************************************************************************** |
%include "libavutil/x86/x86util.asm" |
SECTION_RODATA |
max_19bit_int: times 4 dd 0x7ffff |
max_19bit_flt: times 4 dd 524287.0 |
minshort: times 8 dw 0x8000 |
unicoeff: times 4 dd 0x20000000 |
SECTION .text |
;----------------------------------------------------------------------------- |
; horizontal line scaling |
; |
; void hscale<source_width>to<intermediate_nbits>_<filterSize>_<opt> |
; (SwsContext *c, int{16,32}_t *dst, |
; int dstW, const uint{8,16}_t *src, |
; const int16_t *filter, |
; const int32_t *filterPos, int filterSize); |
; |
; Scale one horizontal line. Input is either 8-bits width or 16-bits width |
; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to |
; downscale before multiplying). Filter is 14-bits. Output is either 15bits |
; (in int16_t) or 19bits (in int32_t), as given in $intermediate_nbits. Each |
; output pixel is generated from $filterSize input pixels, the position of |
; the first pixel is given in filterPos[nOutputPixel]. |
;----------------------------------------------------------------------------- |
; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, n_args, n_xmm |
%macro SCALE_FUNC 6 |
%ifnidn %3, X |
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1 |
%else |
cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize |
%endif |
%if ARCH_X86_64 |
movsxd wq, wd |
%define mov32 movsxd |
%else ; x86-32 |
%define mov32 mov |
%endif ; x86-64 |
%if %2 == 19 |
%if mmsize == 8 ; mmx |
mova m2, [max_19bit_int] |
%elif cpuflag(sse4) |
mova m2, [max_19bit_int] |
%else ; ssse3/sse2 |
mova m2, [max_19bit_flt] |
%endif ; mmx/sse2/ssse3/sse4 |
%endif ; %2 == 19 |
%if %1 == 16 |
mova m6, [minshort] |
mova m7, [unicoeff] |
%elif %1 == 8 |
pxor m3, m3 |
%endif ; %1 == 8/16 |
%if %1 == 8 |
%define movlh movd |
%define movbh movh |
%define srcmul 1 |
%else ; %1 == 9-16 |
%define movlh movq |
%define movbh movu |
%define srcmul 2 |
%endif ; %1 == 8/9-16 |
%ifnidn %3, X |
; setup loop |
%if %3 == 8 |
shl wq, 1 ; this allows *16 (i.e. now *8) in lea instructions for the 8-tap filter |
%define wshr 1 |
%else ; %3 == 4 |
%define wshr 0 |
%endif ; %3 == 8 |
lea filterq, [filterq+wq*8] |
%if %2 == 15 |
lea dstq, [dstq+wq*(2>>wshr)] |
%else ; %2 == 19 |
lea dstq, [dstq+wq*(4>>wshr)] |
%endif ; %2 == 15/19 |
lea fltposq, [fltposq+wq*(4>>wshr)] |
neg wq |
.loop: |
%if %3 == 4 ; filterSize == 4 scaling |
; load 2x4 or 4x4 source pixels into m0/m1 |
mov32 pos0q, dword [fltposq+wq*4+ 0] ; filterPos[0] |
mov32 pos1q, dword [fltposq+wq*4+ 4] ; filterPos[1] |
movlh m0, [srcq+pos0q*srcmul] ; src[filterPos[0] + {0,1,2,3}] |
%if mmsize == 8 |
movlh m1, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}] |
%else ; mmsize == 16 |
%if %1 > 8 |
movhps m0, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}] |
%else ; %1 == 8 |
movd m4, [srcq+pos1q*srcmul] ; src[filterPos[1] + {0,1,2,3}] |
%endif |
mov32 pos0q, dword [fltposq+wq*4+ 8] ; filterPos[2] |
mov32 pos1q, dword [fltposq+wq*4+12] ; filterPos[3] |
movlh m1, [srcq+pos0q*srcmul] ; src[filterPos[2] + {0,1,2,3}] |
%if %1 > 8 |
movhps m1, [srcq+pos1q*srcmul] ; src[filterPos[3] + {0,1,2,3}] |
%else ; %1 == 8 |
movd m5, [srcq+pos1q*srcmul] ; src[filterPos[3] + {0,1,2,3}] |
punpckldq m0, m4 |
punpckldq m1, m5 |
%endif ; %1 == 8 |
%endif ; mmsize == 8/16 |
%if %1 == 8 |
punpcklbw m0, m3 ; byte -> word |
punpcklbw m1, m3 ; byte -> word |
%endif ; %1 == 8 |
; multiply with filter coefficients |
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll |
; add back 0x8000 * sum(coeffs) after the horizontal add |
psubw m0, m6 |
psubw m1, m6 |
%endif ; %1 == 16 |
pmaddwd m0, [filterq+wq*8+mmsize*0] ; *= filter[{0,1,..,6,7}] |
pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}] |
; add up horizontally (4 srcpix * 4 coefficients -> 1 dstpix) |
%if mmsize == 8 ; mmx |
movq m4, m0 |
punpckldq m0, m1 |
punpckhdq m4, m1 |
paddd m0, m4 |
%elif notcpuflag(ssse3) ; sse2 |
mova m4, m0 |
shufps m0, m1, 10001000b |
shufps m4, m1, 11011101b |
paddd m0, m4 |
%else ; ssse3/sse4 |
phaddd m0, m1 ; filter[{ 0, 1, 2, 3}]*src[filterPos[0]+{0,1,2,3}], |
; filter[{ 4, 5, 6, 7}]*src[filterPos[1]+{0,1,2,3}], |
; filter[{ 8, 9,10,11}]*src[filterPos[2]+{0,1,2,3}], |
; filter[{12,13,14,15}]*src[filterPos[3]+{0,1,2,3}] |
%endif ; mmx/sse2/ssse3/sse4 |
%else ; %3 == 8, i.e. filterSize == 8 scaling |
; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 |
mov32 pos0q, dword [fltposq+wq*2+0] ; filterPos[0] |
mov32 pos1q, dword [fltposq+wq*2+4] ; filterPos[1] |
movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] |
%if mmsize == 8 |
movbh m1, [srcq+(pos0q+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] |
movbh m4, [srcq+ pos1q *srcmul] ; src[filterPos[1] + {0,1,2,3}] |
movbh m5, [srcq+(pos1q+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] |
%else ; mmsize == 16 |
movbh m1, [srcq+ pos1q *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] |
mov32 pos0q, dword [fltposq+wq*2+8] ; filterPos[2] |
mov32 pos1q, dword [fltposq+wq*2+12] ; filterPos[3] |
movbh m4, [srcq+ pos0q *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] |
movbh m5, [srcq+ pos1q *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] |
%endif ; mmsize == 8/16 |
%if %1 == 8 |
punpcklbw m0, m3 ; byte -> word |
punpcklbw m1, m3 ; byte -> word |
punpcklbw m4, m3 ; byte -> word |
punpcklbw m5, m3 ; byte -> word |
%endif ; %1 == 8 |
; multiply |
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll |
; add back 0x8000 * sum(coeffs) after the horizontal add |
psubw m0, m6 |
psubw m1, m6 |
psubw m4, m6 |
psubw m5, m6 |
%endif ; %1 == 16 |
pmaddwd m0, [filterq+wq*8+mmsize*0] ; *= filter[{0,1,..,6,7}] |
pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}] |
pmaddwd m4, [filterq+wq*8+mmsize*2] ; *= filter[{16,17,..,22,23}] |
pmaddwd m5, [filterq+wq*8+mmsize*3] ; *= filter[{24,25,..,30,31}] |
; add up horizontally (8 srcpix * 8 coefficients -> 1 dstpix) |
%if mmsize == 8 |
paddd m0, m1 |
paddd m4, m5 |
movq m1, m0 |
punpckldq m0, m4 |
punpckhdq m1, m4 |
paddd m0, m1 |
%elif notcpuflag(ssse3) ; sse2 |
%if %1 == 8 |
%define mex m6 |
%else |
%define mex m3 |
%endif |
; emulate horizontal add as transpose + vertical add |
mova mex, m0 |
punpckldq m0, m1 |
punpckhdq mex, m1 |
paddd m0, mex |
mova m1, m4 |
punpckldq m4, m5 |
punpckhdq m1, m5 |
paddd m4, m1 |
mova m1, m0 |
punpcklqdq m0, m4 |
punpckhqdq m1, m4 |
paddd m0, m1 |
%else ; ssse3/sse4 |
; FIXME if we rearrange the filter in pairs of 4, we can |
; load pixels likewise and use 2 x paddd + phaddd instead |
; of 3 x phaddd here, faster on older cpus |
phaddd m0, m1 |
phaddd m4, m5 |
phaddd m0, m4 ; filter[{ 0, 1,..., 6, 7}]*src[filterPos[0]+{0,1,...,6,7}], |
; filter[{ 8, 9,...,14,15}]*src[filterPos[1]+{0,1,...,6,7}], |
; filter[{16,17,...,22,23}]*src[filterPos[2]+{0,1,...,6,7}], |
; filter[{24,25,...,30,31}]*src[filterPos[3]+{0,1,...,6,7}] |
%endif ; mmx/sse2/ssse3/sse4 |
%endif ; %3 == 4/8 |
%else ; %3 == X, i.e. any filterSize scaling |
%ifidn %4, X4 |
%define dlt 4 |
%else ; %4 == X || %4 == X8 |
%define dlt 0 |
%endif ; %4 ==/!= X4 |
%if ARCH_X86_64 |
%define srcq r8 |
%define pos1q r7 |
%define srcendq r9 |
movsxd fltsizeq, fltsized ; filterSize |
lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4] |
%else ; x86-32 |
%define srcq srcmemq |
%define pos1q dstq |
%define srcendq r6m |
lea pos0q, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4] |
mov srcendq, pos0q |
%endif ; x86-32/64 |
lea fltposq, [fltposq+wq*4] |
%if %2 == 15 |
lea dstq, [dstq+wq*2] |
%else ; %2 == 19 |
lea dstq, [dstq+wq*4] |
%endif ; %2 == 15/19 |
movifnidn dstmp, dstq |
neg wq |
.loop: |
mov32 pos0q, dword [fltposq+wq*4+0] ; filterPos[0] |
mov32 pos1q, dword [fltposq+wq*4+4] ; filterPos[1] |
; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? |
pxor m4, m4 |
pxor m5, m5 |
mov srcq, srcmemmp |
.innerloop: |
; load 2x4 (mmx) or 2x8 (sse) source pixels into m0/m1 -> m4/m5 |
movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3(,4,5,6,7)}] |
movbh m1, [srcq+(pos1q+dlt)*srcmul] ; src[filterPos[1] + {0,1,2,3(,4,5,6,7)}] |
%if %1 == 8 |
punpcklbw m0, m3 |
punpcklbw m1, m3 |
%endif ; %1 == 8 |
; multiply |
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll |
; add back 0x8000 * sum(coeffs) after the horizontal add |
psubw m0, m6 |
psubw m1, m6 |
%endif ; %1 == 16 |
pmaddwd m0, [filterq] ; filter[{0,1,2,3(,4,5,6,7)}] |
pmaddwd m1, [filterq+(fltsizeq+dlt)*2]; filter[filtersize+{0,1,2,3(,4,5,6,7)}] |
paddd m4, m0 |
paddd m5, m1 |
add filterq, mmsize |
add srcq, srcmul*mmsize/2 |
cmp srcq, srcendq ; while (src += 4) < &src[filterSize] |
jl .innerloop |
%ifidn %4, X4 |
mov32 pos1q, dword [fltposq+wq*4+4] ; filterPos[1] |
movlh m0, [srcq+ pos0q *srcmul] ; split last 4 srcpx of dstpx[0] |
sub pos1q, fltsizeq ; and first 4 srcpx of dstpx[1] |
%if %1 > 8 |
movhps m0, [srcq+(pos1q+dlt)*srcmul] |
%else ; %1 == 8 |
movd m1, [srcq+(pos1q+dlt)*srcmul] |
punpckldq m0, m1 |
%endif ; %1 == 8 |
%if %1 == 8 |
punpcklbw m0, m3 |
%endif ; %1 == 8 |
%if %1 == 16 ; pmaddwd needs signed adds, so this moves unsigned -> signed, we'll |
; add back 0x8000 * sum(coeffs) after the horizontal add |
psubw m0, m6 |
%endif ; %1 == 16 |
pmaddwd m0, [filterq] |
%endif ; %4 == X4 |
lea filterq, [filterq+(fltsizeq+dlt)*2] |
%if mmsize == 8 ; mmx |
movq m0, m4 |
punpckldq m4, m5 |
punpckhdq m0, m5 |
paddd m0, m4 |
%else ; mmsize == 16 |
%if notcpuflag(ssse3) ; sse2 |
mova m1, m4 |
punpcklqdq m4, m5 |
punpckhqdq m1, m5 |
paddd m4, m1 |
%else ; ssse3/sse4 |
phaddd m4, m5 |
%endif ; sse2/ssse3/sse4 |
%ifidn %4, X4 |
paddd m4, m0 |
%endif ; %3 == X4 |
%if notcpuflag(ssse3) ; sse2 |
pshufd m4, m4, 11011000b |
movhlps m0, m4 |
paddd m0, m4 |
%else ; ssse3/sse4 |
phaddd m4, m4 |
SWAP 0, 4 |
%endif ; sse2/ssse3/sse4 |
%endif ; mmsize == 8/16 |
%endif ; %3 ==/!= X |
%if %1 == 16 ; add 0x8000 * sum(coeffs), i.e. back from signed -> unsigned |
paddd m0, m7 |
%endif ; %1 == 16 |
; clip, store |
psrad m0, 14 + %1 - %2 |
%ifidn %3, X |
movifnidn dstq, dstmp |
%endif ; %3 == X |
%if %2 == 15 |
packssdw m0, m0 |
%ifnidn %3, X |
movh [dstq+wq*(2>>wshr)], m0 |
%else ; %3 == X |
movd [dstq+wq*2], m0 |
%endif ; %3 ==/!= X |
%else ; %2 == 19 |
%if mmsize == 8 |
PMINSD_MMX m0, m2, m4 |
%elif cpuflag(sse4) |
pminsd m0, m2 |
%else ; sse2/ssse3 |
cvtdq2ps m0, m0 |
minps m0, m2 |
cvtps2dq m0, m0 |
%endif ; mmx/sse2/ssse3/sse4 |
%ifnidn %3, X |
mova [dstq+wq*(4>>wshr)], m0 |
%else ; %3 == X |
movq [dstq+wq*4], m0 |
%endif ; %3 ==/!= X |
%endif ; %2 == 15/19 |
%ifnidn %3, X |
add wq, (mmsize<<wshr)/4 ; both 8tap and 4tap really only do 4 pixels (or for mmx: 2 pixels) |
; per iteration. see "shl wq,1" above as for why we do this |
%else ; %3 == X |
add wq, 2 |
%endif ; %3 ==/!= X |
jl .loop |
REP_RET |
%endmacro |
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm |
%macro SCALE_FUNCS 3 |
SCALE_FUNC %1, %2, 4, 4, 6, %3 |
SCALE_FUNC %1, %2, 8, 8, 6, %3 |
%if mmsize == 8 |
SCALE_FUNC %1, %2, X, X, 7, %3 |
%else |
SCALE_FUNC %1, %2, X, X4, 7, %3 |
SCALE_FUNC %1, %2, X, X8, 7, %3 |
%endif |
%endmacro |
; SCALE_FUNCS2 8_xmm_args, 9to10_xmm_args, 16_xmm_args |
%macro SCALE_FUNCS2 3 |
%if notcpuflag(sse4) |
SCALE_FUNCS 8, 15, %1 |
SCALE_FUNCS 9, 15, %2 |
SCALE_FUNCS 10, 15, %2 |
SCALE_FUNCS 12, 15, %2 |
SCALE_FUNCS 14, 15, %2 |
SCALE_FUNCS 16, 15, %3 |
%endif ; !sse4 |
SCALE_FUNCS 8, 19, %1 |
SCALE_FUNCS 9, 19, %2 |
SCALE_FUNCS 10, 19, %2 |
SCALE_FUNCS 12, 19, %2 |
SCALE_FUNCS 14, 19, %2 |
SCALE_FUNCS 16, 19, %3 |
%endmacro |
%if ARCH_X86_32 |
INIT_MMX mmx |
SCALE_FUNCS2 0, 0, 0 |
%endif |
INIT_XMM sse2 |
SCALE_FUNCS2 7, 6, 8 |
INIT_XMM ssse3 |
SCALE_FUNCS2 6, 6, 8 |
INIT_XMM sse4 |
SCALE_FUNCS2 6, 6, 8 |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/swscale.c |
---|
0,0 → 1,607 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <inttypes.h> |
#include "config.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#include "libavutil/attributes.h" |
#include "libavutil/avassert.h" |
#include "libavutil/intreadwrite.h" |
#include "libavutil/x86/asm.h" |
#include "libavutil/x86/cpu.h" |
#include "libavutil/cpu.h" |
#include "libavutil/pixdesc.h" |
#if HAVE_INLINE_ASM |
#define DITHER1XBPP |
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; |
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; |
DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; |
DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; |
const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { |
0x0103010301030103LL, |
0x0200020002000200LL,}; |
const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { |
0x0602060206020602LL, |
0x0004000400040004LL,}; |
DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; |
DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; |
DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; |
DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; |
DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; |
DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; |
DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; |
DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; |
DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; |
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; |
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; |
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; |
//MMX versions |
#if HAVE_MMX_INLINE |
#undef RENAME |
#define COMPILE_TEMPLATE_MMXEXT 0 |
#define RENAME(a) a ## _mmx |
#include "swscale_template.c" |
#endif |
// MMXEXT versions |
#if HAVE_MMXEXT_INLINE |
#undef RENAME |
#undef COMPILE_TEMPLATE_MMXEXT |
#define COMPILE_TEMPLATE_MMXEXT 1 |
#define RENAME(a) a ## _mmxext |
#include "swscale_template.c" |
#endif |
void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, |
int lastInLumBuf, int lastInChrBuf) |
{ |
const int dstH= c->dstH; |
const int flags= c->flags; |
#ifdef NEW_FILTER |
SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0]; |
SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1]; |
SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3]; |
#else |
int16_t **lumPixBuf= c->lumPixBuf; |
int16_t **chrUPixBuf= c->chrUPixBuf; |
int16_t **alpPixBuf= c->alpPixBuf; |
const int vLumBufSize= c->vLumBufSize; |
const int vChrBufSize= c->vChrBufSize; |
#endif |
int hasAlpha = c->alpPixBuf != NULL; |
int32_t *vLumFilterPos= c->vLumFilterPos; |
int32_t *vChrFilterPos= c->vChrFilterPos; |
int16_t *vLumFilter= c->vLumFilter; |
int16_t *vChrFilter= c->vChrFilter; |
int32_t *lumMmxFilter= c->lumMmxFilter; |
int32_t *chrMmxFilter= c->chrMmxFilter; |
int32_t av_unused *alpMmxFilter= c->alpMmxFilter; |
const int vLumFilterSize= c->vLumFilterSize; |
const int vChrFilterSize= c->vChrFilterSize; |
const int chrDstY= dstY>>c->chrDstVSubSample; |
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input |
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input |
c->blueDither= ff_dither8[dstY&1]; |
if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555) |
c->greenDither= ff_dither8[dstY&1]; |
else |
c->greenDither= ff_dither4[dstY&1]; |
c->redDither= ff_dither8[(dstY+1)&1]; |
if (dstY < dstH - 2) { |
#ifdef NEW_FILTER |
const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY; |
const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY; |
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL; |
#else |
const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; |
#endif |
int i; |
if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { |
#ifdef NEW_FILTER |
const int16_t **tmpY = (const int16_t **) lumPlane->tmp; |
#else |
const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; |
#endif |
int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); |
for (i = 0; i < neg; i++) |
tmpY[i] = lumSrcPtr[neg]; |
for ( ; i < end; i++) |
tmpY[i] = lumSrcPtr[i]; |
for ( ; i < vLumFilterSize; i++) |
tmpY[i] = tmpY[i-1]; |
lumSrcPtr = tmpY; |
if (alpSrcPtr) { |
#ifdef NEW_FILTER |
const int16_t **tmpA = (const int16_t **) alpPlane->tmp; |
#else |
const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; |
#endif |
for (i = 0; i < neg; i++) |
tmpA[i] = alpSrcPtr[neg]; |
for ( ; i < end; i++) |
tmpA[i] = alpSrcPtr[i]; |
for ( ; i < vLumFilterSize; i++) |
tmpA[i] = tmpA[i - 1]; |
alpSrcPtr = tmpA; |
} |
} |
if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { |
#ifdef NEW_FILTER |
const int16_t **tmpU = (const int16_t **) chrUPlane->tmp; |
#else |
const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize; |
#endif |
int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); |
for (i = 0; i < neg; i++) { |
tmpU[i] = chrUSrcPtr[neg]; |
} |
for ( ; i < end; i++) { |
tmpU[i] = chrUSrcPtr[i]; |
} |
for ( ; i < vChrFilterSize; i++) { |
tmpU[i] = tmpU[i - 1]; |
} |
chrUSrcPtr = tmpU; |
} |
if (flags & SWS_ACCURATE_RND) { |
int s= APCK_SIZE / 8; |
for (i=0; i<vLumFilterSize; i+=2) { |
*(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; |
*(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; |
lumMmxFilter[s*i+APCK_COEF/4 ]= |
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] |
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); |
if (CONFIG_SWSCALE_ALPHA && hasAlpha) { |
*(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; |
*(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; |
alpMmxFilter[s*i+APCK_COEF/4 ]= |
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; |
} |
} |
for (i=0; i<vChrFilterSize; i+=2) { |
*(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ]; |
*(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)]; |
chrMmxFilter[s*i+APCK_COEF/4 ]= |
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] |
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); |
} |
} else { |
for (i=0; i<vLumFilterSize; i++) { |
*(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i]; |
lumMmxFilter[4*i+2]= |
lumMmxFilter[4*i+3]= |
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U; |
if (CONFIG_SWSCALE_ALPHA && hasAlpha) { |
*(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i]; |
alpMmxFilter[4*i+2]= |
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; |
} |
} |
for (i=0; i<vChrFilterSize; i++) { |
*(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i]; |
chrMmxFilter[4*i+2]= |
chrMmxFilter[4*i+3]= |
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U; |
} |
} |
} |
} |
#if HAVE_MMXEXT |
static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset) |
{ |
if(((uintptr_t)dest) & 15){ |
yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); |
return; |
} |
filterSize--; |
#define MAIN_FUNCTION \ |
"pxor %%xmm0, %%xmm0 \n\t" \ |
"punpcklbw %%xmm0, %%xmm3 \n\t" \ |
"movd %4, %%xmm1 \n\t" \ |
"punpcklwd %%xmm1, %%xmm1 \n\t" \ |
"punpckldq %%xmm1, %%xmm1 \n\t" \ |
"punpcklqdq %%xmm1, %%xmm1 \n\t" \ |
"psllw $3, %%xmm1 \n\t" \ |
"paddw %%xmm1, %%xmm3 \n\t" \ |
"psraw $4, %%xmm3 \n\t" \ |
"movdqa %%xmm3, %%xmm4 \n\t" \ |
"movdqa %%xmm3, %%xmm7 \n\t" \ |
"movl %3, %%ecx \n\t" \ |
"mov %0, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
".p2align 4 \n\t" /* FIXME Unroll? */\ |
"1: \n\t"\ |
"movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\ |
"movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\ |
"movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\ |
"add $16, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"test %%"REG_S", %%"REG_S" \n\t"\ |
"pmulhw %%xmm0, %%xmm2 \n\t"\ |
"pmulhw %%xmm0, %%xmm5 \n\t"\ |
"paddw %%xmm2, %%xmm3 \n\t"\ |
"paddw %%xmm5, %%xmm4 \n\t"\ |
" jnz 1b \n\t"\ |
"psraw $3, %%xmm3 \n\t"\ |
"psraw $3, %%xmm4 \n\t"\ |
"packuswb %%xmm4, %%xmm3 \n\t"\ |
"movntdq %%xmm3, (%1, %%"REG_c")\n\t"\ |
"add $16, %%"REG_c" \n\t"\ |
"cmp %2, %%"REG_c" \n\t"\ |
"movdqa %%xmm7, %%xmm3 \n\t" \ |
"movdqa %%xmm7, %%xmm4 \n\t" \ |
"mov %0, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"jb 1b \n\t" |
if (offset) { |
__asm__ volatile( |
"movq %5, %%xmm3 \n\t" |
"movdqa %%xmm3, %%xmm4 \n\t" |
"psrlq $24, %%xmm3 \n\t" |
"psllq $40, %%xmm4 \n\t" |
"por %%xmm4, %%xmm3 \n\t" |
MAIN_FUNCTION |
:: "g" (filter), |
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), |
"m"(filterSize), "m"(((uint64_t *) dither)[0]) |
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) |
"%"REG_d, "%"REG_S, "%"REG_c |
); |
} else { |
__asm__ volatile( |
"movq %5, %%xmm3 \n\t" |
MAIN_FUNCTION |
:: "g" (filter), |
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), |
"m"(filterSize), "m"(((uint64_t *) dither)[0]) |
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) |
"%"REG_d, "%"REG_S, "%"REG_c |
); |
} |
} |
#endif |
#endif /* HAVE_INLINE_ASM */ |
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ |
void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ |
SwsContext *c, int16_t *data, \ |
int dstW, const uint8_t *src, \ |
const int16_t *filter, \ |
const int32_t *filterPos, int filterSize) |
#define SCALE_FUNCS(filter_n, opt) \ |
SCALE_FUNC(filter_n, 8, 15, opt); \ |
SCALE_FUNC(filter_n, 9, 15, opt); \ |
SCALE_FUNC(filter_n, 10, 15, opt); \ |
SCALE_FUNC(filter_n, 12, 15, opt); \ |
SCALE_FUNC(filter_n, 14, 15, opt); \ |
SCALE_FUNC(filter_n, 16, 15, opt); \ |
SCALE_FUNC(filter_n, 8, 19, opt); \ |
SCALE_FUNC(filter_n, 9, 19, opt); \ |
SCALE_FUNC(filter_n, 10, 19, opt); \ |
SCALE_FUNC(filter_n, 12, 19, opt); \ |
SCALE_FUNC(filter_n, 14, 19, opt); \ |
SCALE_FUNC(filter_n, 16, 19, opt) |
#define SCALE_FUNCS_MMX(opt) \ |
SCALE_FUNCS(4, opt); \ |
SCALE_FUNCS(8, opt); \ |
SCALE_FUNCS(X, opt) |
#define SCALE_FUNCS_SSE(opt) \ |
SCALE_FUNCS(4, opt); \ |
SCALE_FUNCS(8, opt); \ |
SCALE_FUNCS(X4, opt); \ |
SCALE_FUNCS(X8, opt) |
#if ARCH_X86_32 |
SCALE_FUNCS_MMX(mmx); |
#endif |
SCALE_FUNCS_SSE(sse2); |
SCALE_FUNCS_SSE(ssse3); |
SCALE_FUNCS_SSE(sse4); |
#define VSCALEX_FUNC(size, opt) \ |
void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ |
const int16_t **src, uint8_t *dest, int dstW, \ |
const uint8_t *dither, int offset) |
#define VSCALEX_FUNCS(opt) \ |
VSCALEX_FUNC(8, opt); \ |
VSCALEX_FUNC(9, opt); \ |
VSCALEX_FUNC(10, opt) |
#if ARCH_X86_32 |
VSCALEX_FUNCS(mmxext); |
#endif |
VSCALEX_FUNCS(sse2); |
VSCALEX_FUNCS(sse4); |
VSCALEX_FUNC(16, sse4); |
VSCALEX_FUNCS(avx); |
#define VSCALE_FUNC(size, opt) \ |
void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ |
const uint8_t *dither, int offset) |
#define VSCALE_FUNCS(opt1, opt2) \ |
VSCALE_FUNC(8, opt1); \ |
VSCALE_FUNC(9, opt2); \ |
VSCALE_FUNC(10, opt2); \ |
VSCALE_FUNC(16, opt1) |
#if ARCH_X86_32 |
VSCALE_FUNCS(mmx, mmxext); |
#endif |
VSCALE_FUNCS(sse2, sse2); |
VSCALE_FUNC(16, sse4); |
VSCALE_FUNCS(avx, avx); |
#define INPUT_Y_FUNC(fmt, opt) \ |
void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ |
const uint8_t *unused1, const uint8_t *unused2, \ |
int w, uint32_t *unused) |
#define INPUT_UV_FUNC(fmt, opt) \ |
void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ |
const uint8_t *unused0, \ |
const uint8_t *src1, \ |
const uint8_t *src2, \ |
int w, uint32_t *unused) |
#define INPUT_FUNC(fmt, opt) \ |
INPUT_Y_FUNC(fmt, opt); \ |
INPUT_UV_FUNC(fmt, opt) |
#define INPUT_FUNCS(opt) \ |
INPUT_FUNC(uyvy, opt); \ |
INPUT_FUNC(yuyv, opt); \ |
INPUT_UV_FUNC(nv12, opt); \ |
INPUT_UV_FUNC(nv21, opt); \ |
INPUT_FUNC(rgba, opt); \ |
INPUT_FUNC(bgra, opt); \ |
INPUT_FUNC(argb, opt); \ |
INPUT_FUNC(abgr, opt); \ |
INPUT_FUNC(rgb24, opt); \ |
INPUT_FUNC(bgr24, opt) |
#if ARCH_X86_32 |
INPUT_FUNCS(mmx); |
#endif |
INPUT_FUNCS(sse2); |
INPUT_FUNCS(ssse3); |
INPUT_FUNCS(avx); |
av_cold void ff_sws_init_swscale_x86(SwsContext *c) |
{ |
int cpu_flags = av_get_cpu_flags(); |
#if HAVE_MMX_INLINE |
if (INLINE_MMX(cpu_flags)) |
sws_init_swscale_mmx(c); |
#endif |
#if HAVE_MMXEXT_INLINE |
if (INLINE_MMXEXT(cpu_flags)) |
sws_init_swscale_mmxext(c); |
if (cpu_flags & AV_CPU_FLAG_SSE3){ |
if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) |
c->yuv2planeX = yuv2yuvX_sse3; |
} |
#endif |
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ |
if (c->srcBpc == 8) { \ |
hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ |
ff_hscale8to19_ ## filtersize ## _ ## opt1; \ |
} else if (c->srcBpc == 9) { \ |
hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ |
ff_hscale9to19_ ## filtersize ## _ ## opt1; \ |
} else if (c->srcBpc == 10) { \ |
hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ |
ff_hscale10to19_ ## filtersize ## _ ## opt1; \ |
} else if (c->srcBpc == 12) { \ |
hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \ |
ff_hscale12to19_ ## filtersize ## _ ## opt1; \ |
} else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \ |
hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ |
ff_hscale14to19_ ## filtersize ## _ ## opt1; \ |
} else { /* c->srcBpc == 16 */ \ |
av_assert0(c->srcBpc == 16);\ |
hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ |
ff_hscale16to19_ ## filtersize ## _ ## opt1; \ |
} \ |
} while (0) |
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ |
switch (filtersize) { \ |
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ |
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ |
default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ |
} |
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ |
switch(c->dstBpc){ \ |
case 16: do_16_case; break; \ |
case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ |
case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ |
case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ |
} |
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ |
switch(c->dstBpc){ \ |
case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ |
case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ |
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ |
case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ |
default: av_assert0(c->dstBpc>8); \ |
} |
#define case_rgb(x, X, opt) \ |
case AV_PIX_FMT_ ## X: \ |
c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ |
if (!c->chrSrcHSubSample) \ |
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ |
break |
#if ARCH_X86_32 |
if (EXTERNAL_MMX(cpu_flags)) { |
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); |
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); |
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT); |
switch (c->srcFormat) { |
case AV_PIX_FMT_YA8: |
c->lumToYV12 = ff_yuyvToY_mmx; |
if (c->alpPixBuf) |
c->alpToYV12 = ff_uyvyToY_mmx; |
break; |
case AV_PIX_FMT_YUYV422: |
c->lumToYV12 = ff_yuyvToY_mmx; |
c->chrToYV12 = ff_yuyvToUV_mmx; |
break; |
case AV_PIX_FMT_UYVY422: |
c->lumToYV12 = ff_uyvyToY_mmx; |
c->chrToYV12 = ff_uyvyToUV_mmx; |
break; |
case AV_PIX_FMT_NV12: |
c->chrToYV12 = ff_nv12ToUV_mmx; |
break; |
case AV_PIX_FMT_NV21: |
c->chrToYV12 = ff_nv21ToUV_mmx; |
break; |
case_rgb(rgb24, RGB24, mmx); |
case_rgb(bgr24, BGR24, mmx); |
case_rgb(bgra, BGRA, mmx); |
case_rgb(rgba, RGBA, mmx); |
case_rgb(abgr, ABGR, mmx); |
case_rgb(argb, ARGB, mmx); |
default: |
break; |
} |
} |
if (EXTERNAL_MMXEXT(cpu_flags)) { |
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1); |
} |
#endif /* ARCH_X86_32 */ |
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ |
switch (filtersize) { \ |
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ |
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ |
default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ |
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ |
break; \ |
} |
if (EXTERNAL_SSE2(cpu_flags)) { |
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); |
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); |
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , |
HAVE_ALIGNED_STACK || ARCH_X86_64); |
ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); |
switch (c->srcFormat) { |
case AV_PIX_FMT_YA8: |
c->lumToYV12 = ff_yuyvToY_sse2; |
if (c->alpPixBuf) |
c->alpToYV12 = ff_uyvyToY_sse2; |
break; |
case AV_PIX_FMT_YUYV422: |
c->lumToYV12 = ff_yuyvToY_sse2; |
c->chrToYV12 = ff_yuyvToUV_sse2; |
break; |
case AV_PIX_FMT_UYVY422: |
c->lumToYV12 = ff_uyvyToY_sse2; |
c->chrToYV12 = ff_uyvyToUV_sse2; |
break; |
case AV_PIX_FMT_NV12: |
c->chrToYV12 = ff_nv12ToUV_sse2; |
break; |
case AV_PIX_FMT_NV21: |
c->chrToYV12 = ff_nv21ToUV_sse2; |
break; |
case_rgb(rgb24, RGB24, sse2); |
case_rgb(bgr24, BGR24, sse2); |
case_rgb(bgra, BGRA, sse2); |
case_rgb(rgba, RGBA, sse2); |
case_rgb(abgr, ABGR, sse2); |
case_rgb(argb, ARGB, sse2); |
default: |
break; |
} |
} |
if (EXTERNAL_SSSE3(cpu_flags)) { |
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); |
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); |
switch (c->srcFormat) { |
case_rgb(rgb24, RGB24, ssse3); |
case_rgb(bgr24, BGR24, ssse3); |
default: |
break; |
} |
} |
if (EXTERNAL_SSE4(cpu_flags)) { |
/* Xto15 don't need special sse4 functions */ |
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); |
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); |
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, |
if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4, |
HAVE_ALIGNED_STACK || ARCH_X86_64); |
if (c->dstBpc == 16 && !isBE(c->dstFormat)) |
c->yuv2plane1 = ff_yuv2plane1_16_sse4; |
} |
if (EXTERNAL_AVX(cpu_flags)) { |
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , |
HAVE_ALIGNED_STACK || ARCH_X86_64); |
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); |
switch (c->srcFormat) { |
case AV_PIX_FMT_YUYV422: |
c->chrToYV12 = ff_yuyvToUV_avx; |
break; |
case AV_PIX_FMT_UYVY422: |
c->chrToYV12 = ff_uyvyToUV_avx; |
break; |
case AV_PIX_FMT_NV12: |
c->chrToYV12 = ff_nv12ToUV_avx; |
break; |
case AV_PIX_FMT_NV21: |
c->chrToYV12 = ff_nv21ToUV_avx; |
break; |
case_rgb(rgb24, RGB24, avx); |
case_rgb(bgr24, BGR24, avx); |
case_rgb(bgra, BGRA, avx); |
case_rgb(rgba, RGBA, avx); |
case_rgb(abgr, ABGR, avx); |
case_rgb(argb, ARGB, avx); |
default: |
break; |
} |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/swscale_template.c |
---|
0,0 → 1,1576 |
/* |
* Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdint.h> |
#include "libavutil/x86/asm.h" |
#include "libswscale/swscale_internal.h" |
#undef REAL_MOVNTQ |
#undef MOVNTQ |
#undef MOVNTQ2 |
#undef PREFETCH |
#if COMPILE_TEMPLATE_MMXEXT |
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
#define MOVNTQ2 "movntq " |
#else |
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
#define MOVNTQ2 "movq " |
#endif |
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b) |
#if !COMPILE_TEMPLATE_MMXEXT |
static av_always_inline void |
dither_8to16(const uint8_t *srcDither, int rot) |
{ |
if (rot) { |
__asm__ volatile("pxor %%mm0, %%mm0\n\t" |
"movq (%0), %%mm3\n\t" |
"movq %%mm3, %%mm4\n\t" |
"psrlq $24, %%mm3\n\t" |
"psllq $40, %%mm4\n\t" |
"por %%mm4, %%mm3\n\t" |
"movq %%mm3, %%mm4\n\t" |
"punpcklbw %%mm0, %%mm3\n\t" |
"punpckhbw %%mm0, %%mm4\n\t" |
:: "r"(srcDither) |
); |
} else { |
__asm__ volatile("pxor %%mm0, %%mm0\n\t" |
"movq (%0), %%mm3\n\t" |
"movq %%mm3, %%mm4\n\t" |
"punpcklbw %%mm0, %%mm3\n\t" |
"punpckhbw %%mm0, %%mm4\n\t" |
:: "r"(srcDither) |
); |
} |
} |
#endif |
static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, |
const int16_t **src, uint8_t *dest, int dstW, |
const uint8_t *dither, int offset) |
{ |
dither_8to16(dither, offset); |
filterSize--; |
__asm__ volatile( |
"movd %0, %%mm1\n\t" |
"punpcklwd %%mm1, %%mm1\n\t" |
"punpckldq %%mm1, %%mm1\n\t" |
"psllw $3, %%mm1\n\t" |
"paddw %%mm1, %%mm3\n\t" |
"paddw %%mm1, %%mm4\n\t" |
"psraw $4, %%mm3\n\t" |
"psraw $4, %%mm4\n\t" |
::"m"(filterSize) |
); |
__asm__ volatile(\ |
"movq %%mm3, %%mm6\n\t" |
"movq %%mm4, %%mm7\n\t" |
"movl %3, %%ecx\n\t" |
"mov %0, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
".p2align 4 \n\t" /* FIXME Unroll? */\ |
"1: \n\t"\ |
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ |
"movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\ |
"movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\ |
"add $16, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"test %%"REG_S", %%"REG_S" \n\t"\ |
"pmulhw %%mm0, %%mm2 \n\t"\ |
"pmulhw %%mm0, %%mm5 \n\t"\ |
"paddw %%mm2, %%mm3 \n\t"\ |
"paddw %%mm5, %%mm4 \n\t"\ |
" jnz 1b \n\t"\ |
"psraw $3, %%mm3 \n\t"\ |
"psraw $3, %%mm4 \n\t"\ |
"packuswb %%mm4, %%mm3 \n\t" |
MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t" |
"add $8, %%"REG_c" \n\t"\ |
"cmp %2, %%"REG_c" \n\t"\ |
"movq %%mm6, %%mm3\n\t" |
"movq %%mm7, %%mm4\n\t" |
"mov %0, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"jb 1b \n\t"\ |
:: "g" (filter), |
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) |
: "%"REG_d, "%"REG_S, "%"REG_c |
); |
} |
#define YSCALEYUV2PACKEDX_UV \ |
__asm__ volatile(\ |
"xor %%"REG_a", %%"REG_a" \n\t"\ |
".p2align 4 \n\t"\ |
"nop \n\t"\ |
"1: \n\t"\ |
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ |
"movq %%mm3, %%mm4 \n\t"\ |
".p2align 4 \n\t"\ |
"2: \n\t"\ |
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ |
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ |
"add %6, %%"REG_S" \n\t" \ |
"movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ |
"add $16, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"pmulhw %%mm0, %%mm2 \n\t"\ |
"pmulhw %%mm0, %%mm5 \n\t"\ |
"paddw %%mm2, %%mm3 \n\t"\ |
"paddw %%mm5, %%mm4 \n\t"\ |
"test %%"REG_S", %%"REG_S" \n\t"\ |
" jnz 2b \n\t"\ |
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \ |
"lea "offset"(%0), %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ |
"movq "#dst1", "#dst2" \n\t"\ |
".p2align 4 \n\t"\ |
"2: \n\t"\ |
"movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ |
"movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ |
"movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\ |
"add $16, %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"pmulhw "#coeff", "#src1" \n\t"\ |
"pmulhw "#coeff", "#src2" \n\t"\ |
"paddw "#src1", "#dst1" \n\t"\ |
"paddw "#src2", "#dst2" \n\t"\ |
"test %%"REG_S", %%"REG_S" \n\t"\ |
" jnz 2b \n\t"\ |
#define YSCALEYUV2PACKEDX \ |
YSCALEYUV2PACKEDX_UV \ |
YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \ |
#define YSCALEYUV2PACKEDX_END \ |
:: "r" (&c->redDither), \ |
"m" (dummy), "m" (dummy), "m" (dummy),\ |
"r" (dest), "m" (dstW_reg), "m"(uv_off) \ |
NAMED_CONSTRAINTS_ADD(bF8,bFC) \ |
: "%"REG_a, "%"REG_d, "%"REG_S \ |
); |
#define YSCALEYUV2PACKEDX_ACCURATE_UV \ |
__asm__ volatile(\ |
"xor %%"REG_a", %%"REG_a" \n\t"\ |
".p2align 4 \n\t"\ |
"nop \n\t"\ |
"1: \n\t"\ |
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"pxor %%mm4, %%mm4 \n\t"\ |
"pxor %%mm5, %%mm5 \n\t"\ |
"pxor %%mm6, %%mm6 \n\t"\ |
"pxor %%mm7, %%mm7 \n\t"\ |
".p2align 4 \n\t"\ |
"2: \n\t"\ |
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ |
"add %6, %%"REG_S" \n\t" \ |
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ |
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ |
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ |
"movq %%mm0, %%mm3 \n\t"\ |
"punpcklwd %%mm1, %%mm0 \n\t"\ |
"punpckhwd %%mm1, %%mm3 \n\t"\ |
"movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ |
"pmaddwd %%mm1, %%mm0 \n\t"\ |
"pmaddwd %%mm1, %%mm3 \n\t"\ |
"paddd %%mm0, %%mm4 \n\t"\ |
"paddd %%mm3, %%mm5 \n\t"\ |
"add %6, %%"REG_S" \n\t" \ |
"movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ |
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ |
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ |
"test %%"REG_S", %%"REG_S" \n\t"\ |
"movq %%mm2, %%mm0 \n\t"\ |
"punpcklwd %%mm3, %%mm2 \n\t"\ |
"punpckhwd %%mm3, %%mm0 \n\t"\ |
"pmaddwd %%mm1, %%mm2 \n\t"\ |
"pmaddwd %%mm1, %%mm0 \n\t"\ |
"paddd %%mm2, %%mm6 \n\t"\ |
"paddd %%mm0, %%mm7 \n\t"\ |
" jnz 2b \n\t"\ |
"psrad $16, %%mm4 \n\t"\ |
"psrad $16, %%mm5 \n\t"\ |
"psrad $16, %%mm6 \n\t"\ |
"psrad $16, %%mm7 \n\t"\ |
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ |
"packssdw %%mm5, %%mm4 \n\t"\ |
"packssdw %%mm7, %%mm6 \n\t"\ |
"paddw %%mm0, %%mm4 \n\t"\ |
"paddw %%mm0, %%mm6 \n\t"\ |
"movq %%mm4, "U_TEMP"(%0) \n\t"\ |
"movq %%mm6, "V_TEMP"(%0) \n\t"\ |
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ |
"lea "offset"(%0), %%"REG_d" \n\t"\ |
"mov (%%"REG_d"), %%"REG_S" \n\t"\ |
"pxor %%mm1, %%mm1 \n\t"\ |
"pxor %%mm5, %%mm5 \n\t"\ |
"pxor %%mm7, %%mm7 \n\t"\ |
"pxor %%mm6, %%mm6 \n\t"\ |
".p2align 4 \n\t"\ |
"2: \n\t"\ |
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ |
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ |
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ |
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ |
"movq %%mm0, %%mm3 \n\t"\ |
"punpcklwd %%mm4, %%mm0 \n\t"\ |
"punpckhwd %%mm4, %%mm3 \n\t"\ |
"movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ |
"pmaddwd %%mm4, %%mm0 \n\t"\ |
"pmaddwd %%mm4, %%mm3 \n\t"\ |
"paddd %%mm0, %%mm1 \n\t"\ |
"paddd %%mm3, %%mm5 \n\t"\ |
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ |
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ |
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ |
"test %%"REG_S", %%"REG_S" \n\t"\ |
"movq %%mm2, %%mm0 \n\t"\ |
"punpcklwd %%mm3, %%mm2 \n\t"\ |
"punpckhwd %%mm3, %%mm0 \n\t"\ |
"pmaddwd %%mm4, %%mm2 \n\t"\ |
"pmaddwd %%mm4, %%mm0 \n\t"\ |
"paddd %%mm2, %%mm7 \n\t"\ |
"paddd %%mm0, %%mm6 \n\t"\ |
" jnz 2b \n\t"\ |
"psrad $16, %%mm1 \n\t"\ |
"psrad $16, %%mm5 \n\t"\ |
"psrad $16, %%mm7 \n\t"\ |
"psrad $16, %%mm6 \n\t"\ |
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ |
"packssdw %%mm5, %%mm1 \n\t"\ |
"packssdw %%mm6, %%mm7 \n\t"\ |
"paddw %%mm0, %%mm1 \n\t"\ |
"paddw %%mm0, %%mm7 \n\t"\ |
"movq "U_TEMP"(%0), %%mm3 \n\t"\ |
"movq "V_TEMP"(%0), %%mm4 \n\t"\ |
#define YSCALEYUV2PACKEDX_ACCURATE \ |
YSCALEYUV2PACKEDX_ACCURATE_UV \ |
YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) |
#define YSCALEYUV2RGBX \ |
"psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ |
"psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ |
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
"pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ |
"pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ |
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
"pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ |
"pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ |
"psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ |
"psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ |
"pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ |
"pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ |
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
"paddw %%mm3, %%mm4 \n\t"\ |
"movq %%mm2, %%mm0 \n\t"\ |
"movq %%mm5, %%mm6 \n\t"\ |
"movq %%mm4, %%mm3 \n\t"\ |
"punpcklwd %%mm2, %%mm2 \n\t"\ |
"punpcklwd %%mm5, %%mm5 \n\t"\ |
"punpcklwd %%mm4, %%mm4 \n\t"\ |
"paddw %%mm1, %%mm2 \n\t"\ |
"paddw %%mm1, %%mm5 \n\t"\ |
"paddw %%mm1, %%mm4 \n\t"\ |
"punpckhwd %%mm0, %%mm0 \n\t"\ |
"punpckhwd %%mm6, %%mm6 \n\t"\ |
"punpckhwd %%mm3, %%mm3 \n\t"\ |
"paddw %%mm7, %%mm0 \n\t"\ |
"paddw %%mm7, %%mm6 \n\t"\ |
"paddw %%mm7, %%mm3 \n\t"\ |
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
"packuswb %%mm0, %%mm2 \n\t"\ |
"packuswb %%mm6, %%mm5 \n\t"\ |
"packuswb %%mm3, %%mm4 \n\t"\ |
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \ |
"movq "#b", "#q2" \n\t" /* B */\ |
"movq "#r", "#t" \n\t" /* R */\ |
"punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\ |
"punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\ |
"punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\ |
"punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\ |
"movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\ |
"movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\ |
"punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\ |
"punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\ |
"punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\ |
"punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\ |
\ |
MOVNTQ( q0, (dst, index, 4))\ |
MOVNTQ( b, 8(dst, index, 4))\ |
MOVNTQ( q2, 16(dst, index, 4))\ |
MOVNTQ( q3, 24(dst, index, 4))\ |
\ |
"add $8, "#index" \n\t"\ |
"cmp "dstw", "#index" \n\t"\ |
" jb 1b \n\t" |
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) |
static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
YSCALEYUV2PACKEDX_ACCURATE |
YSCALEYUV2RGBX |
"movq %%mm2, "U_TEMP"(%0) \n\t" |
"movq %%mm4, "V_TEMP"(%0) \n\t" |
"movq %%mm5, "Y_TEMP"(%0) \n\t" |
YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET) |
"movq "Y_TEMP"(%0), %%mm5 \n\t" |
"psraw $3, %%mm1 \n\t" |
"psraw $3, %%mm7 \n\t" |
"packuswb %%mm7, %%mm1 \n\t" |
WRITEBGR32(%4, "%5", %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) |
YSCALEYUV2PACKEDX_END |
} else { |
YSCALEYUV2PACKEDX_ACCURATE |
YSCALEYUV2RGBX |
"pcmpeqd %%mm7, %%mm7 \n\t" |
WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
YSCALEYUV2PACKEDX_END |
} |
} |
static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) |
"psraw $3, %%mm1 \n\t" |
"psraw $3, %%mm7 \n\t" |
"packuswb %%mm7, %%mm1 \n\t" |
WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) |
YSCALEYUV2PACKEDX_END |
} else { |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
"pcmpeqd %%mm7, %%mm7 \n\t" |
WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
YSCALEYUV2PACKEDX_END |
} |
} |
static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) |
"psraw $3, %%mm1 \n\t" |
"psraw $3, %%mm7 \n\t" |
"packuswb %%mm7, %%mm1 \n\t" |
WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) |
YSCALEYUV2PACKEDX_END |
} else { |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
"pcmpeqd %%mm7, %%mm7 \n\t" |
WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
YSCALEYUV2PACKEDX_END |
} |
} |
#define REAL_WRITERGB16(dst, dstw, index) \ |
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
"pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ |
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ |
"psrlq $3, %%mm2 \n\t"\ |
\ |
"movq %%mm2, %%mm1 \n\t"\ |
"movq %%mm4, %%mm3 \n\t"\ |
\ |
"punpcklbw %%mm7, %%mm3 \n\t"\ |
"punpcklbw %%mm5, %%mm2 \n\t"\ |
"punpckhbw %%mm7, %%mm4 \n\t"\ |
"punpckhbw %%mm5, %%mm1 \n\t"\ |
\ |
"psllq $3, %%mm3 \n\t"\ |
"psllq $3, %%mm4 \n\t"\ |
\ |
"por %%mm3, %%mm2 \n\t"\ |
"por %%mm4, %%mm1 \n\t"\ |
\ |
MOVNTQ(%%mm2, (dst, index, 2))\ |
MOVNTQ(%%mm1, 8(dst, index, 2))\ |
\ |
"add $8, "#index" \n\t"\ |
"cmp "dstw", "#index" \n\t"\ |
" jb 1b \n\t" |
#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index) |
static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX_ACCURATE |
YSCALEYUV2RGBX |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%0), %%mm2\n\t" |
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t" |
"paddusb "RED_DITHER"(%0), %%mm5\n\t" |
#endif |
WRITERGB16(%4, "%5", %%REGa) |
YSCALEYUV2PACKEDX_END |
} |
static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%0), %%mm5 \n\t" |
#endif |
WRITERGB16(%4, "%5", %%REGa) |
YSCALEYUV2PACKEDX_END |
} |
#define REAL_WRITERGB15(dst, dstw, index) \ |
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
"pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ |
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ |
"psrlq $3, %%mm2 \n\t"\ |
"psrlq $1, %%mm5 \n\t"\ |
\ |
"movq %%mm2, %%mm1 \n\t"\ |
"movq %%mm4, %%mm3 \n\t"\ |
\ |
"punpcklbw %%mm7, %%mm3 \n\t"\ |
"punpcklbw %%mm5, %%mm2 \n\t"\ |
"punpckhbw %%mm7, %%mm4 \n\t"\ |
"punpckhbw %%mm5, %%mm1 \n\t"\ |
\ |
"psllq $2, %%mm3 \n\t"\ |
"psllq $2, %%mm4 \n\t"\ |
\ |
"por %%mm3, %%mm2 \n\t"\ |
"por %%mm4, %%mm1 \n\t"\ |
\ |
MOVNTQ(%%mm2, (dst, index, 2))\ |
MOVNTQ(%%mm1, 8(dst, index, 2))\ |
\ |
"add $8, "#index" \n\t"\ |
"cmp "dstw", "#index" \n\t"\ |
" jb 1b \n\t" |
#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) |
static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX_ACCURATE |
YSCALEYUV2RGBX |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%0), %%mm2\n\t" |
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t" |
"paddusb "RED_DITHER"(%0), %%mm5\n\t" |
#endif |
WRITERGB15(%4, "%5", %%REGa) |
YSCALEYUV2PACKEDX_END |
} |
static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%0), %%mm5 \n\t" |
#endif |
WRITERGB15(%4, "%5", %%REGa) |
YSCALEYUV2PACKEDX_END |
} |
#define WRITEBGR24MMX(dst, dstw, index) \ |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
"movq %%mm2, %%mm1 \n\t" /* B */\ |
"movq %%mm5, %%mm6 \n\t" /* R */\ |
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
\ |
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
"movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ |
"movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ |
"movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ |
\ |
"psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ |
"psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ |
"psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ |
"psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ |
\ |
"punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ |
"punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ |
"punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ |
"punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ |
\ |
"psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
"movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ |
"psllq $40, %%mm2 \n\t" /* GB000000 1 */\ |
"por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
MOVNTQ(%%mm0, (dst))\ |
\ |
"psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ |
"movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ |
"psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ |
"por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ |
MOVNTQ(%%mm6, 8(dst))\ |
\ |
"psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ |
"psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ |
"por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ |
MOVNTQ(%%mm5, 16(dst))\ |
\ |
"add $24, "#dst" \n\t"\ |
\ |
"add $8, "#index" \n\t"\ |
"cmp "dstw", "#index" \n\t"\ |
" jb 1b \n\t" |
#define WRITEBGR24MMXEXT(dst, dstw, index) \ |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
"movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ |
"movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ |
"pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ |
"pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ |
"pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ |
\ |
"pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ |
"pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ |
"pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ |
\ |
"psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ |
"por %%mm1, %%mm6 \n\t"\ |
"por %%mm3, %%mm6 \n\t"\ |
MOVNTQ(%%mm6, (dst))\ |
\ |
"psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ |
"pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ |
"pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ |
"pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ |
\ |
"pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ |
"pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ |
"pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ |
\ |
"por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ |
"por %%mm3, %%mm6 \n\t"\ |
MOVNTQ(%%mm6, 8(dst))\ |
\ |
"pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ |
"pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ |
"pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ |
\ |
"pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ |
"pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ |
"pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ |
\ |
"por %%mm1, %%mm3 \n\t"\ |
"por %%mm3, %%mm6 \n\t"\ |
MOVNTQ(%%mm6, 16(dst))\ |
\ |
"add $24, "#dst" \n\t"\ |
\ |
"add $8, "#index" \n\t"\ |
"cmp "dstw", "#index" \n\t"\ |
" jb 1b \n\t" |
#if COMPILE_TEMPLATE_MMXEXT |
#undef WRITEBGR24 |
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index) |
#else |
#undef WRITEBGR24 |
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) |
#endif |
#if HAVE_6REGS |
static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX_ACCURATE |
YSCALEYUV2RGBX |
"pxor %%mm7, %%mm7 \n\t" |
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize |
"add %4, %%"REG_c" \n\t" |
WRITEBGR24(%%REGc, "%5", %%REGa) |
:: "r" (&c->redDither), |
"m" (dummy), "m" (dummy), "m" (dummy), |
"r" (dest), "m" (dstW_reg), "m"(uv_off) |
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) |
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S |
); |
} |
static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX |
YSCALEYUV2RGBX |
"pxor %%mm7, %%mm7 \n\t" |
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize |
"add %4, %%"REG_c" \n\t" |
WRITEBGR24(%%REGc, "%5", %%REGa) |
:: "r" (&c->redDither), |
"m" (dummy), "m" (dummy), "m" (dummy), |
"r" (dest), "m" (dstW_reg), "m"(uv_off) |
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) |
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S |
); |
} |
#endif /* HAVE_6REGS */ |
#define REAL_WRITEYUY2(dst, dstw, index) \ |
"packuswb %%mm3, %%mm3 \n\t"\ |
"packuswb %%mm4, %%mm4 \n\t"\ |
"packuswb %%mm7, %%mm1 \n\t"\ |
"punpcklbw %%mm4, %%mm3 \n\t"\ |
"movq %%mm1, %%mm7 \n\t"\ |
"punpcklbw %%mm3, %%mm1 \n\t"\ |
"punpckhbw %%mm3, %%mm7 \n\t"\ |
\ |
MOVNTQ(%%mm1, (dst, index, 2))\ |
MOVNTQ(%%mm7, 8(dst, index, 2))\ |
\ |
"add $8, "#index" \n\t"\ |
"cmp "dstw", "#index" \n\t"\ |
" jb 1b \n\t" |
#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) |
static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX_ACCURATE |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
"psraw $3, %%mm3 \n\t" |
"psraw $3, %%mm4 \n\t" |
"psraw $3, %%mm1 \n\t" |
"psraw $3, %%mm7 \n\t" |
WRITEYUY2(%4, "%5", %%REGa) |
YSCALEYUV2PACKEDX_END |
} |
static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, |
const int16_t **lumSrc, int lumFilterSize, |
const int16_t *chrFilter, const int16_t **chrUSrc, |
const int16_t **chrVSrc, |
int chrFilterSize, const int16_t **alpSrc, |
uint8_t *dest, int dstW, int dstY) |
{ |
x86_reg dummy=0; |
x86_reg dstW_reg = dstW; |
x86_reg uv_off = c->uv_offx2; |
YSCALEYUV2PACKEDX |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
"psraw $3, %%mm3 \n\t" |
"psraw $3, %%mm4 \n\t" |
"psraw $3, %%mm1 \n\t" |
"psraw $3, %%mm7 \n\t" |
WRITEYUY2(%4, "%5", %%REGa) |
YSCALEYUV2PACKEDX_END |
} |
#define REAL_YSCALEYUV2RGB_UV(index, c) \ |
"xor "#index", "#index" \n\t"\ |
".p2align 4 \n\t"\ |
"1: \n\t"\ |
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ |
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ |
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
"pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ |
"pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ |
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \ |
"movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ |
"movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ |
"movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ |
"movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ |
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
#define REAL_YSCALEYUV2RGB_COEFF(c) \ |
"pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ |
"pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ |
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ |
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ |
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ |
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ |
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
"paddw %%mm3, %%mm4 \n\t"\ |
"movq %%mm2, %%mm0 \n\t"\ |
"movq %%mm5, %%mm6 \n\t"\ |
"movq %%mm4, %%mm3 \n\t"\ |
"punpcklwd %%mm2, %%mm2 \n\t"\ |
"punpcklwd %%mm5, %%mm5 \n\t"\ |
"punpcklwd %%mm4, %%mm4 \n\t"\ |
"paddw %%mm1, %%mm2 \n\t"\ |
"paddw %%mm1, %%mm5 \n\t"\ |
"paddw %%mm1, %%mm4 \n\t"\ |
"punpckhwd %%mm0, %%mm0 \n\t"\ |
"punpckhwd %%mm6, %%mm6 \n\t"\ |
"punpckhwd %%mm3, %%mm3 \n\t"\ |
"paddw %%mm7, %%mm0 \n\t"\ |
"paddw %%mm7, %%mm6 \n\t"\ |
"paddw %%mm7, %%mm3 \n\t"\ |
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
"packuswb %%mm0, %%mm2 \n\t"\ |
"packuswb %%mm6, %%mm5 \n\t"\ |
"packuswb %%mm3, %%mm4 \n\t"\ |
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) |
#define YSCALEYUV2RGB(index, c) \ |
REAL_YSCALEYUV2RGB_UV(index, c) \ |
REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \ |
REAL_YSCALEYUV2RGB_COEFF(c) |
/** |
* vertical bilinear scale YV12 to RGB |
*/ |
static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, |
int dstW, int yalpha, int uvalpha, int y) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1]; |
#if ARCH_X86_64 |
__asm__ volatile( |
YSCALEYUV2RGB(%%r8, %5) |
YSCALEYUV2RGB_YA(%%r8, %5, %6, %7) |
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ |
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ |
"packuswb %%mm7, %%mm1 \n\t" |
WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest), |
"a" (&c->redDither), |
"r" (abuf0), "r" (abuf1) |
: "%r8" |
); |
#else |
c->u_temp=(intptr_t)abuf0; |
c->v_temp=(intptr_t)abuf1; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB(%%REGBP, %5) |
"push %0 \n\t" |
"push %1 \n\t" |
"mov "U_TEMP"(%5), %0 \n\t" |
"mov "V_TEMP"(%5), %1 \n\t" |
YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) |
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ |
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ |
"packuswb %%mm7, %%mm1 \n\t" |
"pop %1 \n\t" |
"pop %0 \n\t" |
WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
#endif |
} else { |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB(%%REGBP, %5) |
"pcmpeqd %%mm7, %%mm7 \n\t" |
WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} |
} |
static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, |
int dstW, int yalpha, int uvalpha, int y) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) |
); |
} |
static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, |
int dstW, int yalpha, int uvalpha, int y) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" |
#endif |
WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(bF8) |
); |
} |
static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, |
int dstW, int yalpha, int uvalpha, int y) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" |
#endif |
WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(bF8,bFC) |
); |
} |
#define REAL_YSCALEYUV2PACKED(index, c) \ |
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ |
"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\ |
"psraw $3, %%mm0 \n\t"\ |
"psraw $3, %%mm1 \n\t"\ |
"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\ |
"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\ |
"xor "#index", "#index" \n\t"\ |
".p2align 4 \n\t"\ |
"1: \n\t"\ |
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ |
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
"psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
"psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
"movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ |
"movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ |
"movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ |
"movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ |
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
"psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) |
static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf[2], uint8_t *dest, |
int dstW, int yalpha, int uvalpha, int y) |
{ |
const int16_t *buf0 = buf[0], *buf1 = buf[1], |
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2PACKED(%%REGBP, %5) |
WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} |
#define REAL_YSCALEYUV2RGB1(index, c) \ |
"xor "#index", "#index" \n\t"\ |
".p2align 4 \n\t"\ |
"1: \n\t"\ |
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ |
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
"pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ |
"pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ |
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ |
"pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ |
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ |
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ |
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ |
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ |
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
"paddw %%mm3, %%mm4 \n\t"\ |
"movq %%mm2, %%mm0 \n\t"\ |
"movq %%mm5, %%mm6 \n\t"\ |
"movq %%mm4, %%mm3 \n\t"\ |
"punpcklwd %%mm2, %%mm2 \n\t"\ |
"punpcklwd %%mm5, %%mm5 \n\t"\ |
"punpcklwd %%mm4, %%mm4 \n\t"\ |
"paddw %%mm1, %%mm2 \n\t"\ |
"paddw %%mm1, %%mm5 \n\t"\ |
"paddw %%mm1, %%mm4 \n\t"\ |
"punpckhwd %%mm0, %%mm0 \n\t"\ |
"punpckhwd %%mm6, %%mm6 \n\t"\ |
"punpckhwd %%mm3, %%mm3 \n\t"\ |
"paddw %%mm7, %%mm0 \n\t"\ |
"paddw %%mm7, %%mm6 \n\t"\ |
"paddw %%mm7, %%mm3 \n\t"\ |
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
"packuswb %%mm0, %%mm2 \n\t"\ |
"packuswb %%mm6, %%mm5 \n\t"\ |
"packuswb %%mm3, %%mm4 \n\t"\ |
#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) |
// do vertical chrominance interpolation |
#define REAL_YSCALEYUV2RGB1b(index, c) \ |
"xor "#index", "#index" \n\t"\ |
".p2align 4 \n\t"\ |
"1: \n\t"\ |
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ |
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ |
"psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ |
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ |
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
"pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ |
"pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ |
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
"pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ |
"pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ |
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ |
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ |
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ |
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ |
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
"paddw %%mm3, %%mm4 \n\t"\ |
"movq %%mm2, %%mm0 \n\t"\ |
"movq %%mm5, %%mm6 \n\t"\ |
"movq %%mm4, %%mm3 \n\t"\ |
"punpcklwd %%mm2, %%mm2 \n\t"\ |
"punpcklwd %%mm5, %%mm5 \n\t"\ |
"punpcklwd %%mm4, %%mm4 \n\t"\ |
"paddw %%mm1, %%mm2 \n\t"\ |
"paddw %%mm1, %%mm5 \n\t"\ |
"paddw %%mm1, %%mm4 \n\t"\ |
"punpckhwd %%mm0, %%mm0 \n\t"\ |
"punpckhwd %%mm6, %%mm6 \n\t"\ |
"punpckhwd %%mm3, %%mm3 \n\t"\ |
"paddw %%mm7, %%mm0 \n\t"\ |
"paddw %%mm7, %%mm6 \n\t"\ |
"paddw %%mm7, %%mm3 \n\t"\ |
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
"packuswb %%mm0, %%mm2 \n\t"\ |
"packuswb %%mm6, %%mm5 \n\t"\ |
"packuswb %%mm3, %%mm4 \n\t"\ |
#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) |
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \ |
"movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\ |
"movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\ |
"psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\ |
"psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\ |
"packuswb %%mm1, %%mm7 \n\t" |
#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index) |
/** |
* YV12 to RGB without scaling or interpolating |
*/ |
static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, |
int dstW, int uvalpha, int y) |
{ |
const int16_t *ubuf0 = ubuf[0]; |
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster |
const int16_t *ubuf1 = ubuf[0]; |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1(%%REGBP, %5) |
YSCALEYUV2RGB1_ALPHA(%%REGBP) |
WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} else { |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1(%%REGBP, %5) |
"pcmpeqd %%mm7, %%mm7 \n\t" |
WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} |
} else { |
const int16_t *ubuf1 = ubuf[1]; |
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1b(%%REGBP, %5) |
YSCALEYUV2RGB1_ALPHA(%%REGBP) |
WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} else { |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1b(%%REGBP, %5) |
"pcmpeqd %%mm7, %%mm7 \n\t" |
WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} |
} |
} |
static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, |
int dstW, int uvalpha, int y) |
{ |
const int16_t *ubuf0 = ubuf[0]; |
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster |
const int16_t *ubuf1 = ubuf[0]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) |
); |
} else { |
const int16_t *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1b(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) |
); |
} |
} |
static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, |
int dstW, int uvalpha, int y) |
{ |
const int16_t *ubuf0 = ubuf[0]; |
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster |
const int16_t *ubuf1 = ubuf[0]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" |
#endif |
WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(bF8) |
); |
} else { |
const int16_t *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1b(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" |
#endif |
WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(bF8) |
); |
} |
} |
static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, |
int dstW, int uvalpha, int y) |
{ |
const int16_t *ubuf0 = ubuf[0]; |
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster |
const int16_t *ubuf1 = ubuf[0]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" |
#endif |
WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(bF8,bFC) |
); |
} else { |
const int16_t *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2RGB1b(%%REGBP, %5) |
"pxor %%mm7, %%mm7 \n\t" |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
#ifdef DITHER1XBPP |
"paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" |
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" |
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" |
#endif |
WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
NAMED_CONSTRAINTS_ADD(bF8,bFC) |
); |
} |
} |
#define REAL_YSCALEYUV2PACKED1(index, c) \ |
"xor "#index", "#index" \n\t"\ |
".p2align 4 \n\t"\ |
"1: \n\t"\ |
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"psraw $7, %%mm3 \n\t" \ |
"psraw $7, %%mm4 \n\t" \ |
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
"psraw $7, %%mm1 \n\t" \ |
"psraw $7, %%mm7 \n\t" \ |
#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) |
#define REAL_YSCALEYUV2PACKED1b(index, c) \ |
"xor "#index", "#index" \n\t"\ |
".p2align 4 \n\t"\ |
"1: \n\t"\ |
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \ |
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ |
"psrlw $8, %%mm3 \n\t" \ |
"psrlw $8, %%mm4 \n\t" \ |
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
"psraw $7, %%mm1 \n\t" \ |
"psraw $7, %%mm7 \n\t" |
#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) |
static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, |
const int16_t *ubuf[2], const int16_t *vbuf[2], |
const int16_t *abuf0, uint8_t *dest, |
int dstW, int uvalpha, int y) |
{ |
const int16_t *ubuf0 = ubuf[0]; |
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster |
const int16_t *ubuf1 = ubuf[0]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2PACKED1(%%REGBP, %5) |
WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} else { |
const int16_t *ubuf1 = ubuf[1]; |
__asm__ volatile( |
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
"mov %4, %%"REG_b" \n\t" |
"push %%"REG_BP" \n\t" |
YSCALEYUV2PACKED1b(%%REGBP, %5) |
WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP) |
"pop %%"REG_BP" \n\t" |
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), |
"a" (&c->redDither) |
); |
} |
} |
static av_cold void RENAME(sws_init_swscale)(SwsContext *c) |
{ |
enum AVPixelFormat dstFormat = c->dstFormat; |
c->use_mmx_vfilter= 0; |
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12 |
&& dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { |
if (c->flags & SWS_ACCURATE_RND) { |
if (!(c->flags & SWS_FULL_CHR_H_INT)) { |
switch (c->dstFormat) { |
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break; |
#if HAVE_6REGS |
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break; |
#endif |
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break; |
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break; |
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break; |
default: break; |
} |
} |
} else { |
c->use_mmx_vfilter= 1; |
c->yuv2planeX = RENAME(yuv2yuvX ); |
if (!(c->flags & SWS_FULL_CHR_H_INT)) { |
switch (c->dstFormat) { |
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; |
case AV_PIX_FMT_BGR32: c->yuv2packedX = RENAME(yuv2bgr32_X); break; |
#if HAVE_6REGS |
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break; |
#endif |
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break; |
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break; |
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break; |
default: break; |
} |
} |
} |
if (!(c->flags & SWS_FULL_CHR_H_INT)) { |
switch (c->dstFormat) { |
case AV_PIX_FMT_RGB32: |
c->yuv2packed1 = RENAME(yuv2rgb32_1); |
c->yuv2packed2 = RENAME(yuv2rgb32_2); |
break; |
case AV_PIX_FMT_BGR24: |
c->yuv2packed1 = RENAME(yuv2bgr24_1); |
c->yuv2packed2 = RENAME(yuv2bgr24_2); |
break; |
case AV_PIX_FMT_RGB555: |
c->yuv2packed1 = RENAME(yuv2rgb555_1); |
c->yuv2packed2 = RENAME(yuv2rgb555_2); |
break; |
case AV_PIX_FMT_RGB565: |
c->yuv2packed1 = RENAME(yuv2rgb565_1); |
c->yuv2packed2 = RENAME(yuv2rgb565_2); |
break; |
case AV_PIX_FMT_YUYV422: |
c->yuv2packed1 = RENAME(yuv2yuyv422_1); |
c->yuv2packed2 = RENAME(yuv2yuyv422_2); |
break; |
default: |
break; |
} |
} |
} |
if (c->srcBpc == 8 && c->dstBpc <= 14) { |
// Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). |
#if COMPILE_TEMPLATE_MMXEXT |
if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) { |
c->hyscale_fast = ff_hyscale_fast_mmxext; |
c->hcscale_fast = ff_hcscale_fast_mmxext; |
} else { |
#endif /* COMPILE_TEMPLATE_MMXEXT */ |
c->hyscale_fast = NULL; |
c->hcscale_fast = NULL; |
#if COMPILE_TEMPLATE_MMXEXT |
} |
#endif /* COMPILE_TEMPLATE_MMXEXT */ |
} |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/w64xmmtest.c |
---|
0,0 → 1,31 |
/* |
* check XMM registers for clobbers on Win64 |
* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include "libavutil/x86/w64xmmtest.h" |
#include "libswscale/swscale.h" |
wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], |
const int srcStride[], int srcSliceY, int srcSliceH, |
uint8_t *const dst[], const int dstStride[])) |
{ |
testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY, |
srcSliceH, dst, dstStride); |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/yuv2rgb.c |
---|
0,0 → 1,119 |
/* |
* software YUV to RGB converter |
* |
* Copyright (C) 2009 Konstantin Shishkov |
* |
* MMX/MMXEXT template stuff (needed for fast movntq support), |
* 1,4,8bpp support and context / deglobalize stuff |
* by Michael Niedermayer (michaelni@gmx.at) |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <inttypes.h> |
#include "config.h" |
#include "libswscale/rgb2rgb.h" |
#include "libswscale/swscale.h" |
#include "libswscale/swscale_internal.h" |
#include "libavutil/attributes.h" |
#include "libavutil/x86/asm.h" |
#include "libavutil/x86/cpu.h" |
#include "libavutil/cpu.h" |
#if HAVE_INLINE_ASM |
#define DITHER1XBPP // only for MMX |
/* hope these constant values are cache line aligned */ |
DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; |
DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; |
DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; |
DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL; |
DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; |
DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; |
//MMX versions |
#if HAVE_MMX_INLINE && HAVE_6REGS |
#undef RENAME |
#undef COMPILE_TEMPLATE_MMXEXT |
#define COMPILE_TEMPLATE_MMXEXT 0 |
#define RENAME(a) a ## _mmx |
#include "yuv2rgb_template.c" |
#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ |
// MMXEXT versions |
#if HAVE_MMXEXT_INLINE && HAVE_6REGS |
#undef RENAME |
#undef COMPILE_TEMPLATE_MMXEXT |
#define COMPILE_TEMPLATE_MMXEXT 1 |
#define RENAME(a) a ## _mmxext |
#include "yuv2rgb_template.c" |
#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ |
#endif /* HAVE_INLINE_ASM */ |
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) |
{ |
#if HAVE_MMX_INLINE && HAVE_6REGS |
int cpu_flags = av_get_cpu_flags(); |
#if HAVE_MMXEXT_INLINE |
if (INLINE_MMXEXT(cpu_flags)) { |
switch (c->dstFormat) { |
case AV_PIX_FMT_RGB24: |
return yuv420_rgb24_mmxext; |
case AV_PIX_FMT_BGR24: |
return yuv420_bgr24_mmxext; |
} |
} |
#endif |
if (INLINE_MMX(cpu_flags)) { |
switch (c->dstFormat) { |
case AV_PIX_FMT_RGB32: |
if (c->srcFormat == AV_PIX_FMT_YUVA420P) { |
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA |
return yuva420_rgb32_mmx; |
#endif |
break; |
} else |
return yuv420_rgb32_mmx; |
case AV_PIX_FMT_BGR32: |
if (c->srcFormat == AV_PIX_FMT_YUVA420P) { |
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA |
return yuva420_bgr32_mmx; |
#endif |
break; |
} else |
return yuv420_bgr32_mmx; |
case AV_PIX_FMT_RGB24: |
return yuv420_rgb24_mmx; |
case AV_PIX_FMT_BGR24: |
return yuv420_bgr24_mmx; |
case AV_PIX_FMT_RGB565: |
return yuv420_rgb16_mmx; |
case AV_PIX_FMT_RGB555: |
return yuv420_rgb15_mmx; |
} |
} |
#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ |
return NULL; |
} |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/x86/yuv2rgb_template.c |
---|
0,0 → 1,467 |
/* |
* software YUV to RGB converter |
* |
* Copyright (C) 2001-2007 Michael Niedermayer |
* (c) 2010 Konstantin Shishkov |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdint.h> |
#include "libavutil/x86/asm.h" |
#include "libswscale/swscale_internal.h" |
#undef MOVNTQ |
#undef EMMS |
#undef SFENCE |
#if COMPILE_TEMPLATE_MMXEXT |
#define MOVNTQ "movntq" |
#define SFENCE "sfence" |
#else |
#define MOVNTQ "movq" |
#define SFENCE " # nop" |
#endif |
#define REG_BLUE "0" |
#define REG_RED "1" |
#define REG_GREEN "2" |
#define REG_ALPHA "3" |
#define YUV2RGB_LOOP(depth) \ |
h_size = (c->dstW + 7) & ~7; \ |
if (h_size * depth > FFABS(dstStride[0])) \ |
h_size -= 8; \ |
\ |
vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \ |
\ |
__asm__ volatile ("pxor %mm4, %mm4\n\t"); \ |
for (y = 0; y < srcSliceH; y++) { \ |
uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ |
const uint8_t *py = src[0] + y * srcStride[0]; \ |
const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ |
const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ |
x86_reg index = -h_size / 2; \ |
#define YUV2RGB_INITIAL_LOAD \ |
__asm__ volatile ( \ |
"movq (%5, %0, 2), %%mm6\n\t" \ |
"movd (%2, %0), %%mm0\n\t" \ |
"movd (%3, %0), %%mm1\n\t" \ |
"1: \n\t" \ |
/* YUV2RGB core |
* Conversion is performed in usual way: |
* R = Y' * Ycoef + Vred * V' |
* G = Y' * Ycoef + Vgreen * V' + Ugreen * U' |
* B = Y' * Ycoef + Ublue * U' |
* |
* where X' = X * 8 - Xoffset (multiplication is performed to increase |
* precision a bit). |
* Since it operates in YUV420 colorspace, Y component is additionally |
* split into Y1 and Y2 for even and odd pixels. |
* |
* Input: |
* mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register |
* Output: |
* mm1 - R, mm2 - G, mm0 - B |
*/ |
#define YUV2RGB \ |
/* convert Y, U, V into Y1', Y2', U', V' */ \ |
"movq %%mm6, %%mm7\n\t" \ |
"punpcklbw %%mm4, %%mm0\n\t" \ |
"punpcklbw %%mm4, %%mm1\n\t" \ |
"pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \ |
"psrlw $8, %%mm7\n\t" \ |
"psllw $3, %%mm0\n\t" \ |
"psllw $3, %%mm1\n\t" \ |
"psllw $3, %%mm6\n\t" \ |
"psllw $3, %%mm7\n\t" \ |
"psubsw "U_OFFSET"(%4), %%mm0\n\t" \ |
"psubsw "V_OFFSET"(%4), %%mm1\n\t" \ |
"psubw "Y_OFFSET"(%4), %%mm6\n\t" \ |
"psubw "Y_OFFSET"(%4), %%mm7\n\t" \ |
\ |
/* multiply by coefficients */ \ |
"movq %%mm0, %%mm2\n\t" \ |
"movq %%mm1, %%mm3\n\t" \ |
"pmulhw "UG_COEFF"(%4), %%mm2\n\t" \ |
"pmulhw "VG_COEFF"(%4), %%mm3\n\t" \ |
"pmulhw "Y_COEFF" (%4), %%mm6\n\t" \ |
"pmulhw "Y_COEFF" (%4), %%mm7\n\t" \ |
"pmulhw "UB_COEFF"(%4), %%mm0\n\t" \ |
"pmulhw "VR_COEFF"(%4), %%mm1\n\t" \ |
"paddsw %%mm3, %%mm2\n\t" \ |
/* now: mm0 = UB, mm1 = VR, mm2 = CG */ \ |
/* mm6 = Y1, mm7 = Y2 */ \ |
\ |
/* produce RGB */ \ |
"movq %%mm7, %%mm3\n\t" \ |
"movq %%mm7, %%mm5\n\t" \ |
"paddsw %%mm0, %%mm3\n\t" \ |
"paddsw %%mm1, %%mm5\n\t" \ |
"paddsw %%mm2, %%mm7\n\t" \ |
"paddsw %%mm6, %%mm0\n\t" \ |
"paddsw %%mm6, %%mm1\n\t" \ |
"paddsw %%mm6, %%mm2\n\t" \ |
#define RGB_PACK_INTERLEAVE \ |
/* pack and interleave even/odd pixels */ \ |
"packuswb %%mm1, %%mm0\n\t" \ |
"packuswb %%mm5, %%mm3\n\t" \ |
"packuswb %%mm2, %%mm2\n\t" \ |
"movq %%mm0, %%mm1\n\n" \ |
"packuswb %%mm7, %%mm7\n\t" \ |
"punpcklbw %%mm3, %%mm0\n\t" \ |
"punpckhbw %%mm3, %%mm1\n\t" \ |
"punpcklbw %%mm7, %%mm2\n\t" \ |
#define YUV2RGB_ENDLOOP(depth) \ |
"movq 8 (%5, %0, 2), %%mm6\n\t" \ |
"movd 4 (%3, %0), %%mm1\n\t" \ |
"movd 4 (%2, %0), %%mm0\n\t" \ |
"add $"AV_STRINGIFY(depth * 8)", %1\n\t" \ |
"add $4, %0\n\t" \ |
"js 1b\n\t" \ |
#if COMPILE_TEMPLATE_MMXEXT |
#undef RGB_PACK24_B_OPERANDS |
#define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ARRAY_ADD(mask1101,mask0110,mask0100,mask0010,mask1001) |
#else |
#undef RGB_PACK24_B_OPERANDS |
#define RGB_PACK24_B_OPERANDS |
#endif |
#define YUV2RGB_OPERANDS \ |
: "+r" (index), "+r" (image) \ |
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ |
"r" (py - 2*index) \ |
NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \ |
RGB_PACK24_B_OPERANDS \ |
: "memory" \ |
); \ |
} \ |
#define YUV2RGB_OPERANDS_ALPHA \ |
: "+r" (index), "+r" (image) \ |
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ |
"r" (py - 2*index), "r" (pa - 2*index) \ |
NAMED_CONSTRAINTS_ADD(mmx_00ffw) \ |
: "memory" \ |
); \ |
} \ |
#define YUV2RGB_ENDFUNC \ |
__asm__ volatile (SFENCE"\n\t" \ |
"emms \n\t"); \ |
return srcSliceH; \ |
#define IF0(x) |
#define IF1(x) x |
#define RGB_PACK16(gmask, is15) \ |
"pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ |
"pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ |
"movq %%mm2, %%mm3\n\t" \ |
"psllw $"AV_STRINGIFY(3-is15)", %%mm2\n\t" \ |
"psrlw $"AV_STRINGIFY(5+is15)", %%mm3\n\t" \ |
"psrlw $3, %%mm0\n\t" \ |
IF##is15("psrlw $1, %%mm1\n\t") \ |
"pand "MANGLE(pb_e0)", %%mm2\n\t" \ |
"pand "MANGLE(gmask)", %%mm3\n\t" \ |
"por %%mm2, %%mm0\n\t" \ |
"por %%mm3, %%mm1\n\t" \ |
"movq %%mm0, %%mm2\n\t" \ |
"punpcklbw %%mm1, %%mm0\n\t" \ |
"punpckhbw %%mm1, %%mm2\n\t" \ |
MOVNTQ " %%mm0, (%1)\n\t" \ |
MOVNTQ " %%mm2, 8(%1)\n\t" \ |
#define DITHER_RGB \ |
"paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ |
"paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ |
"paddusb "RED_DITHER"(%4), %%mm1\n\t" \ |
#if !COMPILE_TEMPLATE_MMXEXT |
static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(2) |
#ifdef DITHER1XBPP |
c->blueDither = ff_dither8[y & 1]; |
c->greenDither = ff_dither8[y & 1]; |
c->redDither = ff_dither8[(y + 1) & 1]; |
#endif |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK_INTERLEAVE |
#ifdef DITHER1XBPP |
DITHER_RGB |
#endif |
RGB_PACK16(pb_03, 1) |
YUV2RGB_ENDLOOP(2) |
YUV2RGB_OPERANDS |
YUV2RGB_ENDFUNC |
} |
static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(2) |
#ifdef DITHER1XBPP |
c->blueDither = ff_dither8[y & 1]; |
c->greenDither = ff_dither4[y & 1]; |
c->redDither = ff_dither8[(y + 1) & 1]; |
#endif |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK_INTERLEAVE |
#ifdef DITHER1XBPP |
DITHER_RGB |
#endif |
RGB_PACK16(pb_07, 0) |
YUV2RGB_ENDLOOP(2) |
YUV2RGB_OPERANDS |
YUV2RGB_ENDFUNC |
} |
#endif /* !COMPILE_TEMPLATE_MMXEXT */ |
#define RGB_PACK24(blue, red)\ |
"packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ |
"packuswb %%mm5, %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\ |
"packuswb %%mm7, %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\ |
"movq %%mm"red", %%mm3 \n"\ |
"movq %%mm"blue", %%mm6 \n"\ |
"psrlq $32, %%mm"red" \n" /* R1 R3 R5 R7 */\ |
"punpcklbw %%mm2, %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\ |
"punpcklbw %%mm"red", %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\ |
"movq %%mm3, %%mm5 \n"\ |
"punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7Â */\ |
"punpcklwd %%mm6, %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\ |
"punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ |
RGB_PACK24_B |
#if COMPILE_TEMPLATE_MMXEXT |
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; |
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; |
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; |
DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1}; |
DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0}; |
#undef RGB_PACK24_B |
#define RGB_PACK24_B\ |
"pshufw $0xc6, %%mm2, %%mm1 \n"\ |
"pshufw $0x84, %%mm3, %%mm6 \n"\ |
"pshufw $0x38, %%mm5, %%mm7 \n"\ |
"pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\ |
"movq %%mm1, %%mm0 \n"\ |
"pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\ |
"movq %%mm1, %%mm2 \n"\ |
"pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\ |
"psrlq $48, %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\ |
"pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\ |
"psllq $32, %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\ |
"pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\ |
"por %%mm3, %%mm1 \n"\ |
"por %%mm6, %%mm0 \n"\ |
"por %%mm5, %%mm1 \n"\ |
"por %%mm7, %%mm2 \n"\ |
MOVNTQ" %%mm0, (%1) \n"\ |
MOVNTQ" %%mm1, 8(%1) \n"\ |
MOVNTQ" %%mm2, 16(%1) \n"\ |
#else |
#undef RGB_PACK24_B |
#define RGB_PACK24_B\ |
"movd %%mm3, (%1) \n" /* R0 G0 B0 R1 */\ |
"movd %%mm2, 4(%1) \n" /* G1 B1 */\ |
"psrlq $32, %%mm3 \n"\ |
"psrlq $16, %%mm2 \n"\ |
"movd %%mm3, 6(%1) \n" /* R2 G2 B2 R3 */\ |
"movd %%mm2, 10(%1) \n" /* G3 B3 */\ |
"psrlq $16, %%mm2 \n"\ |
"movd %%mm5, 12(%1) \n" /* R4 G4 B4 R5 */\ |
"movd %%mm2, 16(%1) \n" /* G5 B5 */\ |
"psrlq $32, %%mm5 \n"\ |
"movd %%mm2, 20(%1) \n" /* -- -- G7 B7 */\ |
"movd %%mm5, 18(%1) \n" /* R6 G6 B6 R7 */\ |
#endif |
static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(3) |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK24(REG_BLUE, REG_RED) |
YUV2RGB_ENDLOOP(3) |
YUV2RGB_OPERANDS |
YUV2RGB_ENDFUNC |
} |
static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(3) |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK24(REG_RED, REG_BLUE) |
YUV2RGB_ENDLOOP(3) |
YUV2RGB_OPERANDS |
YUV2RGB_ENDFUNC |
} |
#define SET_EMPTY_ALPHA \ |
"pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \ |
#define LOAD_ALPHA \ |
"movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \ |
#define RGB_PACK32(red, green, blue, alpha) \ |
"movq %%mm"blue", %%mm5\n\t" \ |
"movq %%mm"red", %%mm6\n\t" \ |
"punpckhbw %%mm"green", %%mm5\n\t" \ |
"punpcklbw %%mm"green", %%mm"blue"\n\t" \ |
"punpckhbw %%mm"alpha", %%mm6\n\t" \ |
"punpcklbw %%mm"alpha", %%mm"red"\n\t" \ |
"movq %%mm"blue", %%mm"green"\n\t" \ |
"movq %%mm5, %%mm"alpha"\n\t" \ |
"punpcklwd %%mm"red", %%mm"blue"\n\t" \ |
"punpckhwd %%mm"red", %%mm"green"\n\t" \ |
"punpcklwd %%mm6, %%mm5\n\t" \ |
"punpckhwd %%mm6, %%mm"alpha"\n\t" \ |
MOVNTQ " %%mm"blue", 0(%1)\n\t" \ |
MOVNTQ " %%mm"green", 8(%1)\n\t" \ |
MOVNTQ " %%mm5, 16(%1)\n\t" \ |
MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ |
#if !COMPILE_TEMPLATE_MMXEXT |
static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(4) |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK_INTERLEAVE |
SET_EMPTY_ALPHA |
RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) |
YUV2RGB_ENDLOOP(4) |
YUV2RGB_OPERANDS |
YUV2RGB_ENDFUNC |
} |
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA |
static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(4) |
const uint8_t *pa = src[3] + y * srcStride[3]; |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK_INTERLEAVE |
LOAD_ALPHA |
RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) |
YUV2RGB_ENDLOOP(4) |
YUV2RGB_OPERANDS_ALPHA |
YUV2RGB_ENDFUNC |
} |
#endif |
static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(4) |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK_INTERLEAVE |
SET_EMPTY_ALPHA |
RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) |
YUV2RGB_ENDLOOP(4) |
YUV2RGB_OPERANDS |
YUV2RGB_ENDFUNC |
} |
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA |
static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], |
int srcStride[], |
int srcSliceY, int srcSliceH, |
uint8_t *dst[], int dstStride[]) |
{ |
int y, h_size, vshift; |
YUV2RGB_LOOP(4) |
const uint8_t *pa = src[3] + y * srcStride[3]; |
YUV2RGB_INITIAL_LOAD |
YUV2RGB |
RGB_PACK_INTERLEAVE |
LOAD_ALPHA |
RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) |
YUV2RGB_ENDLOOP(4) |
YUV2RGB_OPERANDS_ALPHA |
YUV2RGB_ENDFUNC |
} |
#endif |
#endif /* !COMPILE_TEMPLATE_MMXEXT */ |
/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libswscale/yuv2rgb.c |
---|
0,0 → 1,977 |
/* |
* software YUV to RGB converter |
* |
* Copyright (C) 2009 Konstantin Shishkov |
* |
* 1,4,8bpp support and context / deglobalize stuff |
* by Michael Niedermayer (michaelni@gmx.at) |
* |
* This file is part of FFmpeg. |
* |
* FFmpeg is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2.1 of the License, or (at your option) any later version. |
* |
* FFmpeg is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with FFmpeg; if not, write to the Free Software |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <inttypes.h> |
#include "libavutil/cpu.h" |
#include "libavutil/bswap.h" |
#include "config.h" |
#include "rgb2rgb.h" |
#include "swscale.h" |
#include "swscale_internal.h" |
#include "libavutil/pixdesc.h" |
const int32_t ff_yuv2rgb_coeffs[8][4] = { |
{ 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */ |
{ 117504, 138453, 13954, 34903 }, /* ITU-R Rec. 709 (1990) */ |
{ 104597, 132201, 25675, 53279 }, /* unspecified */ |
{ 104597, 132201, 25675, 53279 }, /* reserved */ |
{ 104448, 132798, 24759, 53109 }, /* FCC */ |
{ 104597, 132201, 25675, 53279 }, /* ITU-R Rec. 624-4 System B, G */ |
{ 104597, 132201, 25675, 53279 }, /* SMPTE 170M */ |
{ 117579, 136230, 16907, 35559 } /* SMPTE 240M (1987) */ |
}; |
const int *sws_getCoefficients(int colorspace) |
{ |
if (colorspace > 7 || colorspace < 0) |
colorspace = SWS_CS_DEFAULT; |
return ff_yuv2rgb_coeffs[colorspace]; |
} |
#define LOADCHROMA(i) \ |
U = pu[i]; \ |
V = pv[i]; \ |
r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \ |
g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \ |
b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM]; |
#define PUTRGB(dst, src, i) \ |
Y = src[2 * i]; \ |
dst[2 * i] = r[Y] + g[Y] + b[Y]; \ |
Y = src[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y] + g[Y] + b[Y]; |
#define PUTRGB24(dst, src, i) \ |
Y = src[2 * i]; \ |
dst[6 * i + 0] = r[Y]; \ |
dst[6 * i + 1] = g[Y]; \ |
dst[6 * i + 2] = b[Y]; \ |
Y = src[2 * i + 1]; \ |
dst[6 * i + 3] = r[Y]; \ |
dst[6 * i + 4] = g[Y]; \ |
dst[6 * i + 5] = b[Y]; |
#define PUTBGR24(dst, src, i) \ |
Y = src[2 * i]; \ |
dst[6 * i + 0] = b[Y]; \ |
dst[6 * i + 1] = g[Y]; \ |
dst[6 * i + 2] = r[Y]; \ |
Y = src[2 * i + 1]; \ |
dst[6 * i + 3] = b[Y]; \ |
dst[6 * i + 4] = g[Y]; \ |
dst[6 * i + 5] = r[Y]; |
#define PUTRGBA(dst, ysrc, asrc, i, s) \ |
Y = ysrc[2 * i]; \ |
dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << s); \ |
Y = ysrc[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s); |
#define PUTRGB48(dst, src, i) \ |
Y = src[ 2 * i]; \ |
dst[12 * i + 0] = dst[12 * i + 1] = r[Y]; \ |
dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \ |
dst[12 * i + 4] = dst[12 * i + 5] = b[Y]; \ |
Y = src[ 2 * i + 1]; \ |
dst[12 * i + 6] = dst[12 * i + 7] = r[Y]; \ |
dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \ |
dst[12 * i + 10] = dst[12 * i + 11] = b[Y]; |
#define PUTBGR48(dst, src, i) \ |
Y = src[2 * i]; \ |
dst[12 * i + 0] = dst[12 * i + 1] = b[Y]; \ |
dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \ |
dst[12 * i + 4] = dst[12 * i + 5] = r[Y]; \ |
Y = src[2 * i + 1]; \ |
dst[12 * i + 6] = dst[12 * i + 7] = b[Y]; \ |
dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \ |
dst[12 * i + 10] = dst[12 * i + 11] = r[Y]; |
#define YUV2RGBFUNC(func_name, dst_type, alpha) \ |
static int func_name(SwsContext *c, const uint8_t *src[], \ |
int srcStride[], int srcSliceY, int srcSliceH, \ |
uint8_t *dst[], int dstStride[]) \ |
{ \ |
int y; \ |
\ |
if (!alpha && c->srcFormat == AV_PIX_FMT_YUV422P) { \ |
srcStride[1] *= 2; \ |
srcStride[2] *= 2; \ |
} \ |
for (y = 0; y < srcSliceH; y += 2) { \ |
dst_type *dst_1 = \ |
(dst_type *)(dst[0] + (y + srcSliceY) * dstStride[0]); \ |
dst_type *dst_2 = \ |
(dst_type *)(dst[0] + (y + srcSliceY + 1) * dstStride[0]); \ |
dst_type av_unused *r, *g, *b; \ |
const uint8_t *py_1 = src[0] + y * srcStride[0]; \ |
const uint8_t *py_2 = py_1 + srcStride[0]; \ |
const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ |
const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ |
const uint8_t av_unused *pa_1, *pa_2; \ |
unsigned int h_size = c->dstW >> 3; \ |
if (alpha) { \ |
pa_1 = src[3] + y * srcStride[3]; \ |
pa_2 = pa_1 + srcStride[3]; \ |
} \ |
while (h_size--) { \ |
int av_unused U, V, Y; \ |
#define ENDYUV2RGBLINE(dst_delta, ss) \ |
pu += 4 >> ss; \ |
pv += 4 >> ss; \ |
py_1 += 8 >> ss; \ |
py_2 += 8 >> ss; \ |
dst_1 += dst_delta >> ss; \ |
dst_2 += dst_delta >> ss; \ |
} \ |
if (c->dstW & (4 >> ss)) { \ |
int av_unused Y, U, V; \ |
#define ENDYUV2RGBFUNC() \ |
} \ |
} \ |
return srcSliceH; \ |
} |
#define CLOSEYUV2RGBFUNC(dst_delta) \ |
ENDYUV2RGBLINE(dst_delta, 0) \ |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0) |
LOADCHROMA(0); |
PUTRGB48(dst_1, py_1, 0); |
PUTRGB48(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTRGB48(dst_2, py_2, 1); |
PUTRGB48(dst_1, py_1, 1); |
LOADCHROMA(2); |
PUTRGB48(dst_1, py_1, 2); |
PUTRGB48(dst_2, py_2, 2); |
LOADCHROMA(3); |
PUTRGB48(dst_2, py_2, 3); |
PUTRGB48(dst_1, py_1, 3); |
ENDYUV2RGBLINE(48, 0) |
LOADCHROMA(0); |
PUTRGB48(dst_1, py_1, 0); |
PUTRGB48(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTRGB48(dst_2, py_2, 1); |
PUTRGB48(dst_1, py_1, 1); |
ENDYUV2RGBLINE(48, 1) |
LOADCHROMA(0); |
PUTRGB48(dst_1, py_1, 0); |
PUTRGB48(dst_2, py_2, 0); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0) |
LOADCHROMA(0); |
PUTBGR48(dst_1, py_1, 0); |
PUTBGR48(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTBGR48(dst_2, py_2, 1); |
PUTBGR48(dst_1, py_1, 1); |
LOADCHROMA(2); |
PUTBGR48(dst_1, py_1, 2); |
PUTBGR48(dst_2, py_2, 2); |
LOADCHROMA(3); |
PUTBGR48(dst_2, py_2, 3); |
PUTBGR48(dst_1, py_1, 3); |
ENDYUV2RGBLINE(48, 0) |
LOADCHROMA(0); |
PUTBGR48(dst_1, py_1, 0); |
PUTBGR48(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTBGR48(dst_2, py_2, 1); |
PUTBGR48(dst_1, py_1, 1); |
ENDYUV2RGBLINE(48, 1) |
LOADCHROMA(0); |
PUTBGR48(dst_1, py_1, 0); |
PUTBGR48(dst_2, py_2, 0); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0) |
LOADCHROMA(0); |
PUTRGB(dst_1, py_1, 0); |
PUTRGB(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTRGB(dst_2, py_2, 1); |
PUTRGB(dst_1, py_1, 1); |
LOADCHROMA(2); |
PUTRGB(dst_1, py_1, 2); |
PUTRGB(dst_2, py_2, 2); |
LOADCHROMA(3); |
PUTRGB(dst_2, py_2, 3); |
PUTRGB(dst_1, py_1, 3); |
ENDYUV2RGBLINE(8, 0) |
LOADCHROMA(0); |
PUTRGB(dst_1, py_1, 0); |
PUTRGB(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTRGB(dst_2, py_2, 1); |
PUTRGB(dst_1, py_1, 1); |
ENDYUV2RGBLINE(8, 1) |
LOADCHROMA(0); |
PUTRGB(dst_1, py_1, 0); |
PUTRGB(dst_2, py_2, 0); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1) |
LOADCHROMA(0); |
PUTRGBA(dst_1, py_1, pa_1, 0, 24); |
PUTRGBA(dst_2, py_2, pa_2, 0, 24); |
LOADCHROMA(1); |
PUTRGBA(dst_2, py_2, pa_2, 1, 24); |
PUTRGBA(dst_1, py_1, pa_1, 1, 24); |
LOADCHROMA(2); |
PUTRGBA(dst_1, py_1, pa_1, 2, 24); |
PUTRGBA(dst_2, py_2, pa_2, 2, 24); |
LOADCHROMA(3); |
PUTRGBA(dst_2, py_2, pa_2, 3, 24); |
PUTRGBA(dst_1, py_1, pa_1, 3, 24); |
pa_1 += 8; |
pa_2 += 8; |
ENDYUV2RGBLINE(8, 0) |
LOADCHROMA(0); |
PUTRGBA(dst_1, py_1, pa_1, 0, 24); |
PUTRGBA(dst_2, py_2, pa_2, 0, 24); |
LOADCHROMA(1); |
PUTRGBA(dst_2, py_2, pa_2, 1, 24); |
PUTRGBA(dst_1, py_1, pa_1, 1, 24); |
pa_1 += 4; |
pa_2 += 4; |
ENDYUV2RGBLINE(8, 1) |
LOADCHROMA(0); |
PUTRGBA(dst_1, py_1, pa_1, 0, 24); |
PUTRGBA(dst_2, py_2, pa_2, 0, 24); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuva2argb_c, uint32_t, 1) |
LOADCHROMA(0); |
PUTRGBA(dst_1, py_1, pa_1, 0, 0); |
PUTRGBA(dst_2, py_2, pa_2, 0, 0); |
LOADCHROMA(1); |
PUTRGBA(dst_2, py_2, pa_2, 1, 0); |
PUTRGBA(dst_1, py_1, pa_1, 1, 0); |
LOADCHROMA(2); |
PUTRGBA(dst_1, py_1, pa_1, 2, 0); |
PUTRGBA(dst_2, py_2, pa_2, 2, 0); |
LOADCHROMA(3); |
PUTRGBA(dst_2, py_2, pa_2, 3, 0); |
PUTRGBA(dst_1, py_1, pa_1, 3, 0); |
pa_1 += 8; |
pa_2 += 8; |
ENDYUV2RGBLINE(8, 0) |
LOADCHROMA(0); |
PUTRGBA(dst_1, py_1, pa_1, 0, 0); |
PUTRGBA(dst_2, py_2, pa_2, 0, 0); |
LOADCHROMA(1); |
PUTRGBA(dst_2, py_2, pa_2, 1, 0); |
PUTRGBA(dst_1, py_1, pa_1, 1, 0); |
pa_1 += 4; |
pa_2 += 4; |
ENDYUV2RGBLINE(8, 1) |
LOADCHROMA(0); |
PUTRGBA(dst_1, py_1, pa_1, 0, 0); |
PUTRGBA(dst_2, py_2, pa_2, 0, 0); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0) |
LOADCHROMA(0); |
PUTRGB24(dst_1, py_1, 0); |
PUTRGB24(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTRGB24(dst_2, py_2, 1); |
PUTRGB24(dst_1, py_1, 1); |
LOADCHROMA(2); |
PUTRGB24(dst_1, py_1, 2); |
PUTRGB24(dst_2, py_2, 2); |
LOADCHROMA(3); |
PUTRGB24(dst_2, py_2, 3); |
PUTRGB24(dst_1, py_1, 3); |
ENDYUV2RGBLINE(24, 0) |
LOADCHROMA(0); |
PUTRGB24(dst_1, py_1, 0); |
PUTRGB24(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTRGB24(dst_2, py_2, 1); |
PUTRGB24(dst_1, py_1, 1); |
ENDYUV2RGBLINE(24, 1) |
LOADCHROMA(0); |
PUTRGB24(dst_1, py_1, 0); |
PUTRGB24(dst_2, py_2, 0); |
ENDYUV2RGBFUNC() |
// only trivial mods from yuv2rgb_c_24_rgb |
YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0) |
LOADCHROMA(0); |
PUTBGR24(dst_1, py_1, 0); |
PUTBGR24(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTBGR24(dst_2, py_2, 1); |
PUTBGR24(dst_1, py_1, 1); |
LOADCHROMA(2); |
PUTBGR24(dst_1, py_1, 2); |
PUTBGR24(dst_2, py_2, 2); |
LOADCHROMA(3); |
PUTBGR24(dst_2, py_2, 3); |
PUTBGR24(dst_1, py_1, 3); |
ENDYUV2RGBLINE(24, 0) |
LOADCHROMA(0); |
PUTBGR24(dst_1, py_1, 0); |
PUTBGR24(dst_2, py_2, 0); |
LOADCHROMA(1); |
PUTBGR24(dst_2, py_2, 1); |
PUTBGR24(dst_1, py_1, 1); |
ENDYUV2RGBLINE(24, 1) |
LOADCHROMA(0); |
PUTBGR24(dst_1, py_1, 0); |
PUTBGR24(dst_2, py_2, 0); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) |
const uint8_t *d16 = ff_dither_2x2_8[y & 1]; |
const uint8_t *e16 = ff_dither_2x2_4[y & 1]; |
const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1]; |
#define PUTRGB16(dst, src, i, o) \ |
Y = src[2 * i]; \ |
dst[2 * i] = r[Y + d16[0 + o]] + \ |
g[Y + e16[0 + o]] + \ |
b[Y + f16[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y + d16[1 + o]] + \ |
g[Y + e16[1 + o]] + \ |
b[Y + f16[1 + o]]; |
LOADCHROMA(0); |
PUTRGB16(dst_1, py_1, 0, 0); |
PUTRGB16(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB16(dst_2, py_2, 1, 2 + 8); |
PUTRGB16(dst_1, py_1, 1, 2); |
LOADCHROMA(2); |
PUTRGB16(dst_1, py_1, 2, 4); |
PUTRGB16(dst_2, py_2, 2, 4 + 8); |
LOADCHROMA(3); |
PUTRGB16(dst_2, py_2, 3, 6 + 8); |
PUTRGB16(dst_1, py_1, 3, 6); |
CLOSEYUV2RGBFUNC(8) |
YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0) |
const uint8_t *d16 = ff_dither_2x2_8[y & 1]; |
const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1]; |
#define PUTRGB15(dst, src, i, o) \ |
Y = src[2 * i]; \ |
dst[2 * i] = r[Y + d16[0 + o]] + \ |
g[Y + d16[1 + o]] + \ |
b[Y + e16[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y + d16[1 + o]] + \ |
g[Y + d16[0 + o]] + \ |
b[Y + e16[1 + o]]; |
LOADCHROMA(0); |
PUTRGB15(dst_1, py_1, 0, 0); |
PUTRGB15(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB15(dst_2, py_2, 1, 2 + 8); |
PUTRGB15(dst_1, py_1, 1, 2); |
LOADCHROMA(2); |
PUTRGB15(dst_1, py_1, 2, 4); |
PUTRGB15(dst_2, py_2, 2, 4 + 8); |
LOADCHROMA(3); |
PUTRGB15(dst_2, py_2, 3, 6 + 8); |
PUTRGB15(dst_1, py_1, 3, 6); |
CLOSEYUV2RGBFUNC(8) |
// r, g, b, dst_1, dst_2 |
YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0) |
const uint8_t *d16 = ff_dither_4x4_16[y & 3]; |
#define PUTRGB12(dst, src, i, o) \ |
Y = src[2 * i]; \ |
dst[2 * i] = r[Y + d16[0 + o]] + \ |
g[Y + d16[0 + o]] + \ |
b[Y + d16[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y + d16[1 + o]] + \ |
g[Y + d16[1 + o]] + \ |
b[Y + d16[1 + o]]; |
LOADCHROMA(0); |
PUTRGB12(dst_1, py_1, 0, 0); |
PUTRGB12(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB12(dst_2, py_2, 1, 2 + 8); |
PUTRGB12(dst_1, py_1, 1, 2); |
LOADCHROMA(2); |
PUTRGB12(dst_1, py_1, 2, 4); |
PUTRGB12(dst_2, py_2, 2, 4 + 8); |
LOADCHROMA(3); |
PUTRGB12(dst_2, py_2, 3, 6 + 8); |
PUTRGB12(dst_1, py_1, 3, 6); |
CLOSEYUV2RGBFUNC(8) |
// r, g, b, dst_1, dst_2 |
YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) |
const uint8_t *d32 = ff_dither_8x8_32[y & 7]; |
const uint8_t *d64 = ff_dither_8x8_73[y & 7]; |
#define PUTRGB8(dst, src, i, o) \ |
Y = src[2 * i]; \ |
dst[2 * i] = r[Y + d32[0 + o]] + \ |
g[Y + d32[0 + o]] + \ |
b[Y + d64[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y + d32[1 + o]] + \ |
g[Y + d32[1 + o]] + \ |
b[Y + d64[1 + o]]; |
LOADCHROMA(0); |
PUTRGB8(dst_1, py_1, 0, 0); |
PUTRGB8(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB8(dst_2, py_2, 1, 2 + 8); |
PUTRGB8(dst_1, py_1, 1, 2); |
LOADCHROMA(2); |
PUTRGB8(dst_1, py_1, 2, 4); |
PUTRGB8(dst_2, py_2, 2, 4 + 8); |
LOADCHROMA(3); |
PUTRGB8(dst_2, py_2, 3, 6 + 8); |
PUTRGB8(dst_1, py_1, 3, 6); |
ENDYUV2RGBLINE(8, 0) |
const uint8_t *d32 = ff_dither_8x8_32[y & 7]; |
const uint8_t *d64 = ff_dither_8x8_73[y & 7]; |
LOADCHROMA(0); |
PUTRGB8(dst_1, py_1, 0, 0); |
PUTRGB8(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB8(dst_2, py_2, 1, 2 + 8); |
PUTRGB8(dst_1, py_1, 1, 2); |
ENDYUV2RGBLINE(8, 1) |
const uint8_t *d32 = ff_dither_8x8_32[y & 7]; |
const uint8_t *d64 = ff_dither_8x8_73[y & 7]; |
LOADCHROMA(0); |
PUTRGB8(dst_1, py_1, 0, 0); |
PUTRGB8(dst_2, py_2, 0, 0 + 8); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) |
const uint8_t * d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
int acc; |
#define PUTRGB4D(dst, src, i, o) \ |
Y = src[2 * i]; \ |
acc = r[Y + d128[0 + o]] + \ |
g[Y + d64[0 + o]] + \ |
b[Y + d128[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
acc |= (r[Y + d128[1 + o]] + \ |
g[Y + d64[1 + o]] + \ |
b[Y + d128[1 + o]]) << 4; \ |
dst[i] = acc; |
LOADCHROMA(0); |
PUTRGB4D(dst_1, py_1, 0, 0); |
PUTRGB4D(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB4D(dst_2, py_2, 1, 2 + 8); |
PUTRGB4D(dst_1, py_1, 1, 2); |
LOADCHROMA(2); |
PUTRGB4D(dst_1, py_1, 2, 4); |
PUTRGB4D(dst_2, py_2, 2, 4 + 8); |
LOADCHROMA(3); |
PUTRGB4D(dst_2, py_2, 3, 6 + 8); |
PUTRGB4D(dst_1, py_1, 3, 6); |
ENDYUV2RGBLINE(4, 0) |
const uint8_t * d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
int acc; |
LOADCHROMA(0); |
PUTRGB4D(dst_1, py_1, 0, 0); |
PUTRGB4D(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB4D(dst_2, py_2, 1, 2 + 8); |
PUTRGB4D(dst_1, py_1, 1, 2); |
ENDYUV2RGBLINE(4, 1) |
const uint8_t * d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
int acc; |
LOADCHROMA(0); |
PUTRGB4D(dst_1, py_1, 0, 0); |
PUTRGB4D(dst_2, py_2, 0, 0 + 8); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) |
const uint8_t *d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
#define PUTRGB4DB(dst, src, i, o) \ |
Y = src[2 * i]; \ |
dst[2 * i] = r[Y + d128[0 + o]] + \ |
g[Y + d64[0 + o]] + \ |
b[Y + d128[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
dst[2 * i + 1] = r[Y + d128[1 + o]] + \ |
g[Y + d64[1 + o]] + \ |
b[Y + d128[1 + o]]; |
LOADCHROMA(0); |
PUTRGB4DB(dst_1, py_1, 0, 0); |
PUTRGB4DB(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB4DB(dst_2, py_2, 1, 2 + 8); |
PUTRGB4DB(dst_1, py_1, 1, 2); |
LOADCHROMA(2); |
PUTRGB4DB(dst_1, py_1, 2, 4); |
PUTRGB4DB(dst_2, py_2, 2, 4 + 8); |
LOADCHROMA(3); |
PUTRGB4DB(dst_2, py_2, 3, 6 + 8); |
PUTRGB4DB(dst_1, py_1, 3, 6); |
ENDYUV2RGBLINE(8, 0) |
const uint8_t *d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
LOADCHROMA(0); |
PUTRGB4DB(dst_1, py_1, 0, 0); |
PUTRGB4DB(dst_2, py_2, 0, 0 + 8); |
LOADCHROMA(1); |
PUTRGB4DB(dst_2, py_2, 1, 2 + 8); |
PUTRGB4DB(dst_1, py_1, 1, 2); |
ENDYUV2RGBLINE(8, 1) |
const uint8_t *d64 = ff_dither_8x8_73[y & 7]; |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
LOADCHROMA(0); |
PUTRGB4DB(dst_1, py_1, 0, 0); |
PUTRGB4DB(dst_2, py_2, 0, 0 + 8); |
ENDYUV2RGBFUNC() |
YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) |
const uint8_t *d128 = ff_dither_8x8_220[y & 7]; |
char out_1 = 0, out_2 = 0; |
g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; |
#define PUTRGB1(out, src, i, o) \ |
Y = src[2 * i]; \ |
out += out + g[Y + d128[0 + o]]; \ |
Y = src[2 * i + 1]; \ |
out += out + g[Y + d128[1 + o]]; |
PUTRGB1(out_1, py_1, 0, 0); |
PUTRGB1(out_2, py_2, 0, 0 + 8); |
PUTRGB1(out_2, py_2, 1, 2 + 8); |
PUTRGB1(out_1, py_1, 1, 2); |
PUTRGB1(out_1, py_1, 2, 4); |
PUTRGB1(out_2, py_2, 2, 4 + 8); |
PUTRGB1(out_2, py_2, 3, 6 + 8); |
PUTRGB1(out_1, py_1, 3, 6); |
dst_1[0] = out_1; |
dst_2[0] = out_2; |
CLOSEYUV2RGBFUNC(1) |
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) |
{ |
SwsFunc t = NULL; |
if (ARCH_PPC) |
t = ff_yuv2rgb_init_ppc(c); |
if (ARCH_X86) |
t = ff_yuv2rgb_init_x86(c); |
if (t) |
return t; |
av_log(c, AV_LOG_WARNING, |
"No accelerated colorspace conversion found from %s to %s.\n", |
av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); |
switch (c->dstFormat) { |
case AV_PIX_FMT_BGR48BE: |
case AV_PIX_FMT_BGR48LE: |
return yuv2rgb_c_bgr48; |
case AV_PIX_FMT_RGB48BE: |
case AV_PIX_FMT_RGB48LE: |
return yuv2rgb_c_48; |
case AV_PIX_FMT_ARGB: |
case AV_PIX_FMT_ABGR: |
if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) |
return yuva2argb_c; |
case AV_PIX_FMT_RGBA: |
case AV_PIX_FMT_BGRA: |
return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32; |
case AV_PIX_FMT_RGB24: |
return yuv2rgb_c_24_rgb; |
case AV_PIX_FMT_BGR24: |
return yuv2rgb_c_24_bgr; |
case AV_PIX_FMT_RGB565: |
case AV_PIX_FMT_BGR565: |
return yuv2rgb_c_16_ordered_dither; |
case AV_PIX_FMT_RGB555: |
case AV_PIX_FMT_BGR555: |
return yuv2rgb_c_15_ordered_dither; |
case AV_PIX_FMT_RGB444: |
case AV_PIX_FMT_BGR444: |
return yuv2rgb_c_12_ordered_dither; |
case AV_PIX_FMT_RGB8: |
case AV_PIX_FMT_BGR8: |
return yuv2rgb_c_8_ordered_dither; |
case AV_PIX_FMT_RGB4: |
case AV_PIX_FMT_BGR4: |
return yuv2rgb_c_4_ordered_dither; |
case AV_PIX_FMT_RGB4_BYTE: |
case AV_PIX_FMT_BGR4_BYTE: |
return yuv2rgb_c_4b_ordered_dither; |
case AV_PIX_FMT_MONOBLACK: |
return yuv2rgb_c_1_ordered_dither; |
} |
return NULL; |
} |
static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, |
const int64_t inc, void *y_tab) |
{ |
int i; |
uint8_t *y_table = y_tab; |
y_table -= elemsize * (inc >> 9); |
for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { |
int64_t cb = av_clip_uint8(i-YUVRGB_TABLE_HEADROOM)*inc; |
table[i] = y_table + elemsize * (cb >> 16); |
} |
} |
static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int64_t inc) |
{ |
int i; |
int off = -(inc >> 9); |
for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { |
int64_t cb = av_clip_uint8(i-YUVRGB_TABLE_HEADROOM)*inc; |
table[i] = elemsize * (off + (cb >> 16)); |
} |
} |
static uint16_t roundToInt16(int64_t f) |
{ |
int r = (f + (1 << 15)) >> 16; |
if (r < -0x7FFF) |
return 0x8000; |
else if (r > 0x7FFF) |
return 0x7FFF; |
else |
return r; |
} |
av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], |
int fullRange, int brightness, |
int contrast, int saturation) |
{ |
const int isRgb = c->dstFormat == AV_PIX_FMT_RGB32 || |
c->dstFormat == AV_PIX_FMT_RGB32_1 || |
c->dstFormat == AV_PIX_FMT_BGR24 || |
c->dstFormat == AV_PIX_FMT_RGB565BE || |
c->dstFormat == AV_PIX_FMT_RGB565LE || |
c->dstFormat == AV_PIX_FMT_RGB555BE || |
c->dstFormat == AV_PIX_FMT_RGB555LE || |
c->dstFormat == AV_PIX_FMT_RGB444BE || |
c->dstFormat == AV_PIX_FMT_RGB444LE || |
c->dstFormat == AV_PIX_FMT_RGB8 || |
c->dstFormat == AV_PIX_FMT_RGB4 || |
c->dstFormat == AV_PIX_FMT_RGB4_BYTE || |
c->dstFormat == AV_PIX_FMT_MONOBLACK; |
const int isNotNe = c->dstFormat == AV_PIX_FMT_NE(RGB565LE, RGB565BE) || |
c->dstFormat == AV_PIX_FMT_NE(RGB555LE, RGB555BE) || |
c->dstFormat == AV_PIX_FMT_NE(RGB444LE, RGB444BE) || |
c->dstFormat == AV_PIX_FMT_NE(BGR565LE, BGR565BE) || |
c->dstFormat == AV_PIX_FMT_NE(BGR555LE, BGR555BE) || |
c->dstFormat == AV_PIX_FMT_NE(BGR444LE, BGR444BE); |
const int bpp = c->dstFormatBpp; |
uint8_t *y_table; |
uint16_t *y_table16; |
uint32_t *y_table32; |
int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha; |
const int yoffs = (fullRange ? 384 : 326) + YUVRGB_TABLE_LUMA_HEADROOM; |
const int table_plane_size = 1024 + 2*YUVRGB_TABLE_LUMA_HEADROOM; |
int64_t crv = inv_table[0]; |
int64_t cbu = inv_table[1]; |
int64_t cgu = -inv_table[2]; |
int64_t cgv = -inv_table[3]; |
int64_t cy = 1 << 16; |
int64_t oy = 0; |
int64_t yb = 0; |
if (!fullRange) { |
cy = (cy * 255) / 219; |
oy = 16 << 16; |
} else { |
crv = (crv * 224) / 255; |
cbu = (cbu * 224) / 255; |
cgu = (cgu * 224) / 255; |
cgv = (cgv * 224) / 255; |
} |
cy = (cy * contrast) >> 16; |
crv = (crv * contrast * saturation) >> 32; |
cbu = (cbu * contrast * saturation) >> 32; |
cgu = (cgu * contrast * saturation) >> 32; |
cgv = (cgv * contrast * saturation) >> 32; |
oy -= 256 * brightness; |
c->uOffset = 0x0400040004000400LL; |
c->vOffset = 0x0400040004000400LL; |
c->yCoeff = roundToInt16(cy * 8192) * 0x0001000100010001ULL; |
c->vrCoeff = roundToInt16(crv * 8192) * 0x0001000100010001ULL; |
c->ubCoeff = roundToInt16(cbu * 8192) * 0x0001000100010001ULL; |
c->vgCoeff = roundToInt16(cgv * 8192) * 0x0001000100010001ULL; |
c->ugCoeff = roundToInt16(cgu * 8192) * 0x0001000100010001ULL; |
c->yOffset = roundToInt16(oy * 8) * 0x0001000100010001ULL; |
c->yuv2rgb_y_coeff = (int16_t)roundToInt16(cy << 13); |
c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9); |
c->yuv2rgb_v2r_coeff = (int16_t)roundToInt16(crv << 13); |
c->yuv2rgb_v2g_coeff = (int16_t)roundToInt16(cgv << 13); |
c->yuv2rgb_u2g_coeff = (int16_t)roundToInt16(cgu << 13); |
c->yuv2rgb_u2b_coeff = (int16_t)roundToInt16(cbu << 13); |
//scale coefficients by cy |
crv = ((crv << 16) + 0x8000) / FFMAX(cy, 1); |
cbu = ((cbu << 16) + 0x8000) / FFMAX(cy, 1); |
cgu = ((cgu << 16) + 0x8000) / FFMAX(cy, 1); |
cgv = ((cgv << 16) + 0x8000) / FFMAX(cy, 1); |
av_freep(&c->yuvTable); |
#define ALLOC_YUV_TABLE(x) \ |
c->yuvTable = av_malloc(x); \ |
if (!c->yuvTable) \ |
return AVERROR(ENOMEM); |
switch (bpp) { |
case 1: |
ALLOC_YUV_TABLE(table_plane_size); |
y_table = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size - 110; i++) { |
y_table[i + 110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7; |
yb += cy; |
} |
fill_table(c->table_gU, 1, cgu, y_table + yoffs); |
fill_gv_table(c->table_gV, 1, cgv); |
break; |
case 4: |
case 4 | 128: |
rbase = isRgb ? 3 : 0; |
gbase = 1; |
bbase = isRgb ? 0 : 3; |
ALLOC_YUV_TABLE(table_plane_size * 3); |
y_table = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size - 110; i++) { |
int yval = av_clip_uint8((yb + 0x8000) >> 16); |
y_table[i + 110] = (yval >> 7) << rbase; |
y_table[i + 37 + table_plane_size] = ((yval + 43) / 85) << gbase; |
y_table[i + 110 + 2*table_plane_size] = (yval >> 7) << bbase; |
yb += cy; |
} |
fill_table(c->table_rV, 1, crv, y_table + yoffs); |
fill_table(c->table_gU, 1, cgu, y_table + yoffs + table_plane_size); |
fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2*table_plane_size); |
fill_gv_table(c->table_gV, 1, cgv); |
break; |
case 8: |
rbase = isRgb ? 5 : 0; |
gbase = isRgb ? 2 : 3; |
bbase = isRgb ? 0 : 6; |
ALLOC_YUV_TABLE(table_plane_size * 3); |
y_table = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size - 38; i++) { |
int yval = av_clip_uint8((yb + 0x8000) >> 16); |
y_table[i + 16] = ((yval + 18) / 36) << rbase; |
y_table[i + 16 + table_plane_size] = ((yval + 18) / 36) << gbase; |
y_table[i + 37 + 2*table_plane_size] = ((yval + 43) / 85) << bbase; |
yb += cy; |
} |
fill_table(c->table_rV, 1, crv, y_table + yoffs); |
fill_table(c->table_gU, 1, cgu, y_table + yoffs + table_plane_size); |
fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2*table_plane_size); |
fill_gv_table(c->table_gV, 1, cgv); |
break; |
case 12: |
rbase = isRgb ? 8 : 0; |
gbase = 4; |
bbase = isRgb ? 0 : 8; |
ALLOC_YUV_TABLE(table_plane_size * 3 * 2); |
y_table16 = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size; i++) { |
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); |
y_table16[i] = (yval >> 4) << rbase; |
y_table16[i + table_plane_size] = (yval >> 4) << gbase; |
y_table16[i + 2*table_plane_size] = (yval >> 4) << bbase; |
yb += cy; |
} |
if (isNotNe) |
for (i = 0; i < table_plane_size * 3; i++) |
y_table16[i] = av_bswap16(y_table16[i]); |
fill_table(c->table_rV, 2, crv, y_table16 + yoffs); |
fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + table_plane_size); |
fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2*table_plane_size); |
fill_gv_table(c->table_gV, 2, cgv); |
break; |
case 15: |
case 16: |
rbase = isRgb ? bpp - 5 : 0; |
gbase = 5; |
bbase = isRgb ? 0 : (bpp - 5); |
ALLOC_YUV_TABLE(table_plane_size * 3 * 2); |
y_table16 = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size; i++) { |
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); |
y_table16[i] = (yval >> 3) << rbase; |
y_table16[i + table_plane_size] = (yval >> (18 - bpp)) << gbase; |
y_table16[i + 2*table_plane_size] = (yval >> 3) << bbase; |
yb += cy; |
} |
if (isNotNe) |
for (i = 0; i < table_plane_size * 3; i++) |
y_table16[i] = av_bswap16(y_table16[i]); |
fill_table(c->table_rV, 2, crv, y_table16 + yoffs); |
fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + table_plane_size); |
fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2*table_plane_size); |
fill_gv_table(c->table_gV, 2, cgv); |
break; |
case 24: |
case 48: |
ALLOC_YUV_TABLE(table_plane_size); |
y_table = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size; i++) { |
y_table[i] = av_clip_uint8((yb + 0x8000) >> 16); |
yb += cy; |
} |
fill_table(c->table_rV, 1, crv, y_table + yoffs); |
fill_table(c->table_gU, 1, cgu, y_table + yoffs); |
fill_table(c->table_bU, 1, cbu, y_table + yoffs); |
fill_gv_table(c->table_gV, 1, cgv); |
break; |
case 32: |
case 64: |
base = (c->dstFormat == AV_PIX_FMT_RGB32_1 || |
c->dstFormat == AV_PIX_FMT_BGR32_1) ? 8 : 0; |
rbase = base + (isRgb ? 16 : 0); |
gbase = base + 8; |
bbase = base + (isRgb ? 0 : 16); |
needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat); |
if (!needAlpha) |
abase = (base + 24) & 31; |
ALLOC_YUV_TABLE(table_plane_size * 3 * 4); |
y_table32 = c->yuvTable; |
yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy; |
for (i = 0; i < table_plane_size; i++) { |
unsigned yval = av_clip_uint8((yb + 0x8000) >> 16); |
y_table32[i] = (yval << rbase) + |
(needAlpha ? 0 : (255u << abase)); |
y_table32[i + table_plane_size] = yval << gbase; |
y_table32[i + 2*table_plane_size] = yval << bbase; |
yb += cy; |
} |
fill_table(c->table_rV, 4, crv, y_table32 + yoffs); |
fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + table_plane_size); |
fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2*table_plane_size); |
fill_gv_table(c->table_gV, 4, cgv); |
break; |
default: |
if(!isPlanar(c->dstFormat) || bpp <= 24) |
av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp); |
return -1; |
} |
return 0; |
} |