Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
;******************************************************************************
2
;*
3
;* Copyright (c) 2000-2001 Fabrice Bellard 
4
;* Copyright (c)      Nick Kurshev 
5
;* Copyright (c) 2002 Michael Niedermayer 
6
;* Copyright (c) 2002 Zdenek Kabelac 
7
;* Copyright (c) 2013 Daniel Kang
8
;*
9
;* MMX optimized hpel functions
10
;*
11
;* This file is part of FFmpeg.
12
;*
13
;* FFmpeg is free software; you can redistribute it and/or
14
;* modify it under the terms of the GNU Lesser General Public
15
;* License as published by the Free Software Foundation; either
16
;* version 2.1 of the License, or (at your option) any later version.
17
;*
18
;* FFmpeg is distributed in the hope that it will be useful,
19
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
20
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21
;* Lesser General Public License for more details.
22
;*
23
;* You should have received a copy of the GNU Lesser General Public
24
;* License along with FFmpeg; if not, write to the Free Software
25
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
;******************************************************************************
27
 
28
%include "libavutil/x86/x86util.asm"
29
 
30
SECTION_RODATA
31
cextern pb_1
32
 
33
SECTION_TEXT
34
 
35
; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
36
%macro PUT_PIXELS8_X2 0
37
cglobal put_pixels8_x2, 4,5
38
    lea          r4, [r2*2]
39
.loop:
40
    mova         m0, [r1]
41
    mova         m1, [r1+r2]
42
    PAVGB        m0, [r1+1]
43
    PAVGB        m1, [r1+r2+1]
44
    mova       [r0], m0
45
    mova    [r0+r2], m1
46
    add          r1, r4
47
    add          r0, r4
48
    mova         m0, [r1]
49
    mova         m1, [r1+r2]
50
    PAVGB        m0, [r1+1]
51
    PAVGB        m1, [r1+r2+1]
52
    add          r1, r4
53
    mova       [r0], m0
54
    mova    [r0+r2], m1
55
    add          r0, r4
56
    sub         r3d, 4
57
    jne .loop
58
    REP_RET
59
%endmacro
60
 
61
INIT_MMX mmxext
62
PUT_PIXELS8_X2
63
INIT_MMX 3dnow
64
PUT_PIXELS8_X2
65
 
66
 
67
; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
68
%macro PUT_PIXELS_16 0
69
cglobal put_pixels16_x2, 4,5
70
    lea          r4, [r2*2]
71
.loop:
72
    mova         m0, [r1]
73
    mova         m1, [r1+r2]
74
    mova         m2, [r1+8]
75
    mova         m3, [r1+r2+8]
76
    PAVGB        m0, [r1+1]
77
    PAVGB        m1, [r1+r2+1]
78
    PAVGB        m2, [r1+9]
79
    PAVGB        m3, [r1+r2+9]
80
    mova       [r0], m0
81
    mova    [r0+r2], m1
82
    mova     [r0+8], m2
83
    mova  [r0+r2+8], m3
84
    add          r1, r4
85
    add          r0, r4
86
    mova         m0, [r1]
87
    mova         m1, [r1+r2]
88
    mova         m2, [r1+8]
89
    mova         m3, [r1+r2+8]
90
    PAVGB        m0, [r1+1]
91
    PAVGB        m1, [r1+r2+1]
92
    PAVGB        m2, [r1+9]
93
    PAVGB        m3, [r1+r2+9]
94
    add          r1, r4
95
    mova       [r0], m0
96
    mova    [r0+r2], m1
97
    mova     [r0+8], m2
98
    mova  [r0+r2+8], m3
99
    add          r0, r4
100
    sub         r3d, 4
101
    jne .loop
102
    REP_RET
103
%endmacro
104
 
105
INIT_MMX mmxext
106
PUT_PIXELS_16
107
INIT_MMX 3dnow
108
PUT_PIXELS_16
109
 
110
 
111
; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
112
%macro PUT_NO_RND_PIXELS8_X2 0
113
cglobal put_no_rnd_pixels8_x2, 4,5
114
    mova         m6, [pb_1]
115
    lea          r4, [r2*2]
116
.loop:
117
    mova         m0, [r1]
118
    mova         m2, [r1+r2]
119
    mova         m1, [r1+1]
120
    mova         m3, [r1+r2+1]
121
    add          r1, r4
122
    psubusb      m0, m6
123
    psubusb      m2, m6
124
    PAVGB        m0, m1
125
    PAVGB        m2, m3
126
    mova       [r0], m0
127
    mova    [r0+r2], m2
128
    mova         m0, [r1]
129
    mova         m1, [r1+1]
130
    mova         m2, [r1+r2]
131
    mova         m3, [r1+r2+1]
132
    add          r0, r4
133
    add          r1, r4
134
    psubusb      m0, m6
135
    psubusb      m2, m6
136
    PAVGB        m0, m1
137
    PAVGB        m2, m3
138
    mova       [r0], m0
139
    mova    [r0+r2], m2
140
    add          r0, r4
141
    sub         r3d, 4
142
    jne .loop
143
    REP_RET
144
%endmacro
145
 
146
INIT_MMX mmxext
147
PUT_NO_RND_PIXELS8_X2
148
INIT_MMX 3dnow
149
PUT_NO_RND_PIXELS8_X2
150
 
151
 
152
; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
153
%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
154
cglobal put_no_rnd_pixels8_x2_exact, 4,5
155
    lea          r4, [r2*3]
156
    pcmpeqb      m6, m6
157
.loop:
158
    mova         m0, [r1]
159
    mova         m2, [r1+r2]
160
    mova         m1, [r1+1]
161
    mova         m3, [r1+r2+1]
162
    pxor         m0, m6
163
    pxor         m2, m6
164
    pxor         m1, m6
165
    pxor         m3, m6
166
    PAVGB        m0, m1
167
    PAVGB        m2, m3
168
    pxor         m0, m6
169
    pxor         m2, m6
170
    mova       [r0], m0
171
    mova    [r0+r2], m2
172
    mova         m0, [r1+r2*2]
173
    mova         m1, [r1+r2*2+1]
174
    mova         m2, [r1+r4]
175
    mova         m3, [r1+r4+1]
176
    pxor         m0, m6
177
    pxor         m1, m6
178
    pxor         m2, m6
179
    pxor         m3, m6
180
    PAVGB        m0, m1
181
    PAVGB        m2, m3
182
    pxor         m0, m6
183
    pxor         m2, m6
184
    mova  [r0+r2*2], m0
185
    mova    [r0+r4], m2
186
    lea          r1, [r1+r2*4]
187
    lea          r0, [r0+r2*4]
188
    sub         r3d, 4
189
    jg .loop
190
    REP_RET
191
%endmacro
192
 
193
INIT_MMX mmxext
194
PUT_NO_RND_PIXELS8_X2_EXACT
195
INIT_MMX 3dnow
196
PUT_NO_RND_PIXELS8_X2_EXACT
197
 
198
 
199
; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
200
%macro PUT_PIXELS8_Y2 0
201
cglobal put_pixels8_y2, 4,5
202
    lea          r4, [r2*2]
203
    mova         m0, [r1]
204
    sub          r0, r2
205
.loop:
206
    mova         m1, [r1+r2]
207
    mova         m2, [r1+r4]
208
    add          r1, r4
209
    PAVGB        m0, m1
210
    PAVGB        m1, m2
211
    mova    [r0+r2], m0
212
    mova    [r0+r4], m1
213
    mova         m1, [r1+r2]
214
    mova         m0, [r1+r4]
215
    add          r0, r4
216
    add          r1, r4
217
    PAVGB        m2, m1
218
    PAVGB        m1, m0
219
    mova    [r0+r2], m2
220
    mova    [r0+r4], m1
221
    add          r0, r4
222
    sub         r3d, 4
223
    jne .loop
224
    REP_RET
225
%endmacro
226
 
227
INIT_MMX mmxext
228
PUT_PIXELS8_Y2
229
INIT_MMX 3dnow
230
PUT_PIXELS8_Y2
231
 
232
 
233
; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
234
%macro PUT_NO_RND_PIXELS8_Y2 0
235
cglobal put_no_rnd_pixels8_y2, 4,5
236
    mova         m6, [pb_1]
237
    lea          r4, [r2+r2]
238
    mova         m0, [r1]
239
    sub          r0, r2
240
.loop:
241
    mova         m1, [r1+r2]
242
    mova         m2, [r1+r4]
243
    add          r1, r4
244
    psubusb      m1, m6
245
    PAVGB        m0, m1
246
    PAVGB        m1, m2
247
    mova    [r0+r2], m0
248
    mova    [r0+r4], m1
249
    mova         m1, [r1+r2]
250
    mova         m0, [r1+r4]
251
    add          r0, r4
252
    add          r1, r4
253
    psubusb      m1, m6
254
    PAVGB        m2, m1
255
    PAVGB        m1, m0
256
    mova    [r0+r2], m2
257
    mova    [r0+r4], m1
258
    add          r0, r4
259
    sub         r3d, 4
260
    jne .loop
261
    REP_RET
262
%endmacro
263
 
264
INIT_MMX mmxext
265
PUT_NO_RND_PIXELS8_Y2
266
INIT_MMX 3dnow
267
PUT_NO_RND_PIXELS8_Y2
268
 
269
 
270
; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
271
%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
272
cglobal put_no_rnd_pixels8_y2_exact, 4,5
273
    lea          r4, [r2*3]
274
    mova         m0, [r1]
275
    pcmpeqb      m6, m6
276
    add          r1, r2
277
    pxor         m0, m6
278
.loop:
279
    mova         m1, [r1]
280
    mova         m2, [r1+r2]
281
    pxor         m1, m6
282
    pxor         m2, m6
283
    PAVGB        m0, m1
284
    PAVGB        m1, m2
285
    pxor         m0, m6
286
    pxor         m1, m6
287
    mova       [r0], m0
288
    mova    [r0+r2], m1
289
    mova         m1, [r1+r2*2]
290
    mova         m0, [r1+r4]
291
    pxor         m1, m6
292
    pxor         m0, m6
293
    PAVGB        m2, m1
294
    PAVGB        m1, m0
295
    pxor         m2, m6
296
    pxor         m1, m6
297
    mova  [r0+r2*2], m2
298
    mova    [r0+r4], m1
299
    lea          r1, [r1+r2*4]
300
    lea          r0, [r0+r2*4]
301
    sub         r3d, 4
302
    jg .loop
303
    REP_RET
304
%endmacro
305
 
306
INIT_MMX mmxext
307
PUT_NO_RND_PIXELS8_Y2_EXACT
308
INIT_MMX 3dnow
309
PUT_NO_RND_PIXELS8_Y2_EXACT
310
 
311
 
312
; avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
313
%macro AVG_PIXELS8 0
314
cglobal avg_pixels8, 4,5
315
    lea          r4, [r2*2]
316
.loop:
317
    mova         m0, [r0]
318
    mova         m1, [r0+r2]
319
    PAVGB        m0, [r1]
320
    PAVGB        m1, [r1+r2]
321
    mova       [r0], m0
322
    mova    [r0+r2], m1
323
    add          r1, r4
324
    add          r0, r4
325
    mova         m0, [r0]
326
    mova         m1, [r0+r2]
327
    PAVGB        m0, [r1]
328
    PAVGB        m1, [r1+r2]
329
    add          r1, r4
330
    mova       [r0], m0
331
    mova    [r0+r2], m1
332
    add          r0, r4
333
    sub         r3d, 4
334
    jne .loop
335
    REP_RET
336
%endmacro
337
 
338
INIT_MMX 3dnow
339
AVG_PIXELS8
340
 
341
 
342
; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
343
%macro AVG_PIXELS8_X2 0
344
cglobal avg_pixels8_x2, 4,5
345
    lea          r4, [r2*2]
346
.loop:
347
    mova         m0, [r1]
348
    mova         m2, [r1+r2]
349
    PAVGB        m0, [r1+1]
350
    PAVGB        m2, [r1+r2+1]
351
    PAVGB        m0, [r0]
352
    PAVGB        m2, [r0+r2]
353
    add          r1, r4
354
    mova       [r0], m0
355
    mova    [r0+r2], m2
356
    mova         m0, [r1]
357
    mova         m2, [r1+r2]
358
    PAVGB        m0, [r1+1]
359
    PAVGB        m2, [r1+r2+1]
360
    add          r0, r4
361
    add          r1, r4
362
    PAVGB        m0, [r0]
363
    PAVGB        m2, [r0+r2]
364
    mova       [r0], m0
365
    mova    [r0+r2], m2
366
    add          r0, r4
367
    sub         r3d, 4
368
    jne .loop
369
    REP_RET
370
%endmacro
371
 
372
INIT_MMX mmxext
373
AVG_PIXELS8_X2
374
INIT_MMX 3dnow
375
AVG_PIXELS8_X2
376
 
377
 
378
; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
379
%macro AVG_PIXELS8_Y2 0
380
cglobal avg_pixels8_y2, 4,5
381
    lea          r4, [r2*2]
382
    mova         m0, [r1]
383
    sub          r0, r2
384
.loop:
385
    mova         m1, [r1+r2]
386
    mova         m2, [r1+r4]
387
    add          r1, r4
388
    PAVGB        m0, m1
389
    PAVGB        m1, m2
390
    mova         m3, [r0+r2]
391
    mova         m4, [r0+r4]
392
    PAVGB        m0, m3
393
    PAVGB        m1, m4
394
    mova    [r0+r2], m0
395
    mova    [r0+r4], m1
396
    mova         m1, [r1+r2]
397
    mova         m0, [r1+r4]
398
    PAVGB        m2, m1
399
    PAVGB        m1, m0
400
    add          r0, r4
401
    add          r1, r4
402
    mova         m3, [r0+r2]
403
    mova         m4, [r0+r4]
404
    PAVGB        m2, m3
405
    PAVGB        m1, m4
406
    mova    [r0+r2], m2
407
    mova    [r0+r4], m1
408
    add          r0, r4
409
    sub         r3d, 4
410
    jne .loop
411
    REP_RET
412
%endmacro
413
 
414
INIT_MMX mmxext
415
AVG_PIXELS8_Y2
416
INIT_MMX 3dnow
417
AVG_PIXELS8_Y2
418
 
419
 
420
; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
421
%macro AVG_PIXELS8_XY2 0
422
cglobal avg_pixels8_xy2, 4,5
423
    mova         m6, [pb_1]
424
    lea          r4, [r2*2]
425
    mova         m0, [r1]
426
    pavgb        m0, [r1+1]
427
.loop:
428
    mova         m2, [r1+r4]
429
    mova         m1, [r1+r2]
430
    psubusb      m2, m6
431
    pavgb        m1, [r1+r2+1]
432
    pavgb        m2, [r1+r4+1]
433
    add          r1, r4
434
    pavgb        m0, m1
435
    pavgb        m1, m2
436
    pavgb        m0, [r0]
437
    pavgb        m1, [r0+r2]
438
    mova       [r0], m0
439
    mova    [r0+r2], m1
440
    mova         m1, [r1+r2]
441
    mova         m0, [r1+r4]
442
    pavgb        m1, [r1+r2+1]
443
    pavgb        m0, [r1+r4+1]
444
    add          r0, r4
445
    add          r1, r4
446
    pavgb        m2, m1
447
    pavgb        m1, m0
448
    pavgb        m2, [r0]
449
    pavgb        m1, [r0+r2]
450
    mova       [r0], m2
451
    mova    [r0+r2], m1
452
    add          r0, r4
453
    sub         r3d, 4
454
    jne .loop
455
    REP_RET
456
%endmacro
457
 
458
INIT_MMX mmxext
459
AVG_PIXELS8_XY2
460
INIT_MMX 3dnow
461
AVG_PIXELS8_XY2