WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavfilter/x86/vf_hqdn3d.asm

Rev	Author	Line No.	Line
4349	Serge	1	;******************************************************************************
		2	;* Copyright (c) 2012 Loren Merritt
		3	;*
		4	;* This file is part of FFmpeg.
		5	;*
		6	;* FFmpeg is free software; you can redistribute it and/or
		7	;* modify it under the terms of the GNU Lesser General Public
		8	;* License as published by the Free Software Foundation; either
		9	;* version 2.1 of the License, or (at your option) any later version.
		10	;*
		11	;* FFmpeg is distributed in the hope that it will be useful,
		12	;* but WITHOUT ANY WARRANTY; without even the implied warranty of
		13	;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		14	;* Lesser General Public License for more details.
		15	;*
		16	;* You should have received a copy of the GNU Lesser General Public
		17	;* License along with FFmpeg; if not, write to the Free Software
		18	;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		19	;******************************************************************************
		20
		21	%include "libavutil/x86/x86util.asm"
		22
		23	SECTION .text
		24
		25	%macro LOWPASS 3 ; prevsample, cursample, lut
		26	sub %1q, %2q
		27	%if lut_bits != 8
		28	sar %1q, 8-lut_bits
		29	%endif
		30	movsx %1d, word [%3q+%1q*2]
		31	add %1d, %2d
		32	%endmacro
		33
		34	%macro LOAD 3 ; dstreg, x, bitdepth
		35	%if %3 == 8
		36	movzx %1, byte [srcq+%2]
		37	%else
		38	movzx %1, word [srcq+(%2)*2]
		39	%endif
		40	%if %3 != 16
		41	shl %1, 16-%3
		42	add %1, (1<<(15-%3))-1
		43	%endif
		44	%endmacro
		45
		46	%macro HQDN3D_ROW 1 ; bitdepth
		47	%if ARCH_X86_64
		48	cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1
		49	%else
		50	cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal
		51	%endif
		52	%assign bytedepth (%1+7)>>3
		53	%assign lut_bits 4+4*(%1/16)
		54	dec widthq
		55	lea srcq, [srcq+widthq*bytedepth]
		56	lea dstq, [dstq+widthq*bytedepth]
		57	lea frameantq, [frameantq+widthq*2]
		58	lea lineantq, [lineantq+widthq*2]
		59	neg widthq
		60	%define xq widthq
		61	%if ARCH_X86_32
		62	mov dstmp, dstq
		63	mov srcmp, srcq
		64	mov frameantmp, frameantq
		65	mov lineantmp, lineantq
		66	%define dstq r0
		67	%define frameantq r0
		68	%define lineantq r0
		69	%define pixelantq r1
		70	%define pixelantd r1d
		71	DECLARE_REG_TMP 2,3
		72	%endif
		73	LOAD pixelantd, xq, %1
		74	ALIGN 16
		75	.loop:
		76	movifnidn srcq, srcmp
		77	LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread
		78	.loop2:
		79	movifnidn lineantq, lineantmp
		80	movzx t1d, word [lineantq+xq*2]
		81	LOWPASS t1, pixelant, spatial
		82	mov [lineantq+xq*2], t1w
		83	LOWPASS pixelant, t0, spatial
		84	movifnidn frameantq, frameantmp
		85	movzx t0d, word [frameantq+xq*2]
		86	LOWPASS t0, t1, temporal
		87	mov [frameantq+xq*2], t0w
		88	movifnidn dstq, dstmp
		89	%if %1 != 16
		90	shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation
		91	%endif
		92	%if %1 == 8
		93	mov [dstq+xq], t0b
		94	%else
		95	mov [dstq+xq*2], t0w
		96	%endif
		97	inc xq
		98	jl .loop
		99	je .loop2
		100	REP_RET
		101	%endmacro ; HQDN3D_ROW
		102
		103	HQDN3D_ROW 8
		104	HQDN3D_ROW 9
		105	HQDN3D_ROW 10
		106	HQDN3D_ROW 16

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavfilter/x86/vf_hqdn3d.asm – Rev 4349