Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
;*****************************************************************************
2
;* SIMD-optimized pixel operations
3
;*****************************************************************************
4
;* Copyright (c) 2000, 2001 Fabrice Bellard
5
;* Copyright (c) 2002-2004 Michael Niedermayer 
6
;*
7
;* This file is part of FFmpeg.
8
;*
9
;* FFmpeg is free software; you can redistribute it and/or
10
;* modify it under the terms of the GNU Lesser General Public
11
;* License as published by the Free Software Foundation; either
12
;* version 2.1 of the License, or (at your option) any later version.
13
;*
14
;* FFmpeg is distributed in the hope that it will be useful,
15
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
;* Lesser General Public License for more details.
18
;*
19
;* You should have received a copy of the GNU Lesser General Public
20
;* License along with FFmpeg; if not, write to the Free Software
21
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
;*****************************************************************************
23
 
24
%include "libavutil/x86/x86util.asm"
25
 
26
SECTION .text
27
 
28
INIT_MMX mmx
29
; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size)
30
cglobal get_pixels, 3,4
31
    add          r0, 128
32
    mov          r3, -128
33
    pxor         m7, m7
34
.loop:
35
    mova         m0, [r1]
36
    mova         m2, [r1+r2]
37
    mova         m1, m0
38
    mova         m3, m2
39
    punpcklbw    m0, m7
40
    punpckhbw    m1, m7
41
    punpcklbw    m2, m7
42
    punpckhbw    m3, m7
43
    mova [r0+r3+ 0], m0
44
    mova [r0+r3+ 8], m1
45
    mova [r0+r3+16], m2
46
    mova [r0+r3+24], m3
47
    lea          r1, [r1+r2*2]
48
    add          r3, 32
49
    js .loop
50
    REP_RET
51
 
52
INIT_XMM sse2
53
cglobal get_pixels, 3, 4, 5
54
    lea          r3, [r2*3]
55
    pxor         m4, m4
56
    movh         m0, [r1]
57
    movh         m1, [r1+r2]
58
    movh         m2, [r1+r2*2]
59
    movh         m3, [r1+r3]
60
    lea          r1, [r1+r2*4]
61
    punpcklbw    m0, m4
62
    punpcklbw    m1, m4
63
    punpcklbw    m2, m4
64
    punpcklbw    m3, m4
65
    mova       [r0], m0
66
    mova  [r0+0x10], m1
67
    mova  [r0+0x20], m2
68
    mova  [r0+0x30], m3
69
    movh         m0, [r1]
70
    movh         m1, [r1+r2*1]
71
    movh         m2, [r1+r2*2]
72
    movh         m3, [r1+r3]
73
    punpcklbw    m0, m4
74
    punpcklbw    m1, m4
75
    punpcklbw    m2, m4
76
    punpcklbw    m3, m4
77
    mova  [r0+0x40], m0
78
    mova  [r0+0x50], m1
79
    mova  [r0+0x60], m2
80
    mova  [r0+0x70], m3
81
    RET
82
 
83
INIT_MMX mmx
84
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
85
;                         int stride);
86
cglobal diff_pixels, 4,5
87
    movsxdifnidn r3, r3d
88
    pxor         m7, m7
89
    add          r0,  128
90
    mov          r4, -128
91
.loop:
92
    mova         m0, [r1]
93
    mova         m2, [r2]
94
    mova         m1, m0
95
    mova         m3, m2
96
    punpcklbw    m0, m7
97
    punpckhbw    m1, m7
98
    punpcklbw    m2, m7
99
    punpckhbw    m3, m7
100
    psubw        m0, m2
101
    psubw        m1, m3
102
    mova  [r0+r4+0], m0
103
    mova  [r0+r4+8], m1
104
    add          r1, r3
105
    add          r2, r3
106
    add          r4, 16
107
    jne .loop
108
    REP_RET
109
 
110
INIT_XMM sse2
111
cglobal diff_pixels, 4, 5, 5
112
    movsxdifnidn r3, r3d
113
    pxor         m4, m4
114
    add          r0,  128
115
    mov          r4, -128
116
.loop:
117
    movh         m0, [r1]
118
    movh         m2, [r2]
119
    movh         m1, [r1+r3]
120
    movh         m3, [r2+r3]
121
    punpcklbw    m0, m4
122
    punpcklbw    m1, m4
123
    punpcklbw    m2, m4
124
    punpcklbw    m3, m4
125
    psubw        m0, m2
126
    psubw        m1, m3
127
    mova [r0+r4+0 ], m0
128
    mova [r0+r4+16], m1
129
    lea          r1, [r1+r3*2]
130
    lea          r2, [r2+r3*2]
131
    add          r4, 32
132
    jne .loop
133
    RET