Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | ;****************************************************************************** |
2 | ;* V210 SIMD unpack |
||
3 | ;* Copyright (c) 2011 Loren Merritt |
||
4 | ;* Copyright (c) 2011 Kieran Kunhya |
||
5 | ;* |
||
6 | ;* This file is part of FFmpeg. |
||
7 | ;* |
||
8 | ;* FFmpeg is free software; you can redistribute it and/or |
||
9 | ;* modify it under the terms of the GNU Lesser General Public |
||
10 | ;* License as published by the Free Software Foundation; either |
||
11 | ;* version 2.1 of the License, or (at your option) any later version. |
||
12 | ;* |
||
13 | ;* FFmpeg is distributed in the hope that it will be useful, |
||
14 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 | ;* Lesser General Public License for more details. |
||
17 | ;* |
||
18 | ;* You should have received a copy of the GNU Lesser General Public |
||
19 | ;* License along with FFmpeg; if not, write to the Free Software |
||
20 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
21 | ;****************************************************************************** |
||
22 | |||
23 | %include "libavutil/x86/x86util.asm" |
||
24 | |||
25 | SECTION_RODATA |
||
26 | |||
27 | v210_mask: times 4 dd 0x3ff |
||
28 | v210_mult: dw 64,4,64,4,64,4,64,4 |
||
29 | v210_luma_shuf: db 8,9,0,1,2,3,12,13,4,5,6,7,-1,-1,-1,-1 |
||
30 | v210_chroma_shuf: db 0,1,8,9,6,7,-1,-1,2,3,4,5,12,13,-1,-1 |
||
31 | |||
32 | SECTION .text |
||
33 | |||
34 | %macro v210_planar_unpack 2 |
||
35 | |||
36 | ; v210_planar_unpack(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width) |
||
37 | cglobal v210_planar_unpack_%1_%2, 5, 5, 7 |
||
38 | movsxdifnidn r4, r4d |
||
39 | lea r1, [r1+2*r4] |
||
40 | add r2, r4 |
||
41 | add r3, r4 |
||
42 | neg r4 |
||
43 | |||
44 | mova m3, [v210_mult] |
||
45 | mova m4, [v210_mask] |
||
46 | mova m5, [v210_luma_shuf] |
||
47 | mova m6, [v210_chroma_shuf] |
||
48 | .loop |
||
49 | %ifidn %1, unaligned |
||
50 | movu m0, [r0] |
||
51 | %else |
||
52 | mova m0, [r0] |
||
53 | %endif |
||
54 | |||
55 | pmullw m1, m0, m3 |
||
56 | psrld m0, 10 |
||
57 | psrlw m1, 6 ; u0 v0 y1 y2 v1 u2 y4 y5 |
||
58 | pand m0, m4 ; y0 __ u1 __ y3 __ v2 __ |
||
59 | |||
60 | shufps m2, m1, m0, 0x8d ; y1 y2 y4 y5 y0 __ y3 __ |
||
61 | pshufb m2, m5 ; y0 y1 y2 y3 y4 y5 __ __ |
||
62 | movu [r1+2*r4], m2 |
||
63 | |||
64 | shufps m1, m0, 0xd8 ; u0 v0 v1 u2 u1 __ v2 __ |
||
65 | pshufb m1, m6 ; u0 u1 u2 __ v0 v1 v2 __ |
||
66 | movq [r2+r4], m1 |
||
67 | movhps [r3+r4], m1 |
||
68 | |||
69 | add r0, mmsize |
||
70 | add r4, 6 |
||
71 | jl .loop |
||
72 | |||
73 | REP_RET |
||
74 | %endmacro |
||
75 | |||
76 | INIT_XMM |
||
77 | v210_planar_unpack unaligned, ssse3 |
||
78 | %if HAVE_AVX_EXTERNAL |
||
79 | INIT_AVX |
||
80 | v210_planar_unpack unaligned, avx |
||
81 | %endif |
||
82 | |||
83 | INIT_XMM |
||
84 | v210_planar_unpack aligned, ssse3 |
||
85 | %if HAVE_AVX_EXTERNAL |
||
86 | INIT_AVX |
||
87 | v210_planar_unpack aligned, avx |
||
88 | %endif |