Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
;******************************************************************************
2
;* SIMD optimized SAO functions for HEVC decoding
3
;*
4
;* Copyright (c) 2013 Pierre-Edouard LEPERE
5
;* Copyright (c) 2014 James Almer
6
;*
7
;* This file is part of FFmpeg.
8
;*
9
;* FFmpeg is free software; you can redistribute it and/or
10
;* modify it under the terms of the GNU Lesser General Public
11
;* License as published by the Free Software Foundation; either
12
;* version 2.1 of the License, or (at your option) any later version.
13
;*
14
;* FFmpeg is distributed in the hope that it will be useful,
15
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
;* Lesser General Public License for more details.
18
;*
19
;* You should have received a copy of the GNU Lesser General Public
20
;* License along with FFmpeg; if not, write to the Free Software
21
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
;******************************************************************************
23
 
24
%include "libavutil/x86/x86util.asm"
25
 
26
SECTION_RODATA 32
27
 
28
pw_mask10: times 16 dw 0x03FF
29
pw_mask12: times 16 dw 0x0FFF
30
pw_m2:     times 16 dw -2
31
pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
32
pb_eo:                   db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
33
cextern pw_m1
34
cextern pw_1
35
cextern pw_2
36
cextern pb_1
37
cextern pb_2
38
 
39
SECTION .text
40
 
41
%define MAX_PB_SIZE  64
42
%define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
43
 
44
;******************************************************************************
45
;SAO Band Filter
46
;******************************************************************************
47
 
48
%macro HEVC_SAO_BAND_FILTER_INIT 1
49
    and            leftq, 31
50
    movd             xm0, leftd
51
    add            leftq, 1
52
    and            leftq, 31
53
    movd             xm1, leftd
54
    add            leftq, 1
55
    and            leftq, 31
56
    movd             xm2, leftd
57
    add            leftq, 1
58
    and            leftq, 31
59
    movd             xm3, leftd
60
 
61
    SPLATW            m0, xm0
62
    SPLATW            m1, xm1
63
    SPLATW            m2, xm2
64
    SPLATW            m3, xm3
65
%if mmsize > 16
66
    SPLATW            m4, [offsetq + 2]
67
    SPLATW            m5, [offsetq + 4]
68
    SPLATW            m6, [offsetq + 6]
69
    SPLATW            m7, [offsetq + 8]
70
%else
71
    movq              m7, [offsetq + 2]
72
    SPLATW            m4, m7, 0
73
    SPLATW            m5, m7, 1
74
    SPLATW            m6, m7, 2
75
    SPLATW            m7, m7, 3
76
%endif
77
 
78
%if ARCH_X86_64
79
%if %1 > 8
80
    mova             m13, [pw_mask %+ %1]
81
%endif
82
    pxor             m14, m14
83
 
84
%else ; ARCH_X86_32
85
    mova  [rsp+mmsize*0], m0
86
    mova  [rsp+mmsize*1], m1
87
    mova  [rsp+mmsize*2], m2
88
    mova  [rsp+mmsize*3], m3
89
    mova  [rsp+mmsize*4], m4
90
    mova  [rsp+mmsize*5], m5
91
    mova  [rsp+mmsize*6], m6
92
    pxor              m0, m0
93
%if %1 > 8
94
    mova              m1, [pw_mask %+ %1]
95
%endif
96
    %assign MMSIZE mmsize
97
    %define m14 m0
98
    %define m13 m1
99
    %define  m9 m2
100
    %define  m8 m3
101
%endif ; ARCH
102
DEFINE_ARGS dst, src, dststride, srcstride, offset, height
103
    mov          heightd, r7m
104
%endmacro
105
 
106
%macro HEVC_SAO_BAND_FILTER_COMPUTE 3
107
    psraw             %2, %3, %1-5
108
%if ARCH_X86_64
109
    pcmpeqw          m10, %2, m0
110
    pcmpeqw          m11, %2, m1
111
    pcmpeqw          m12, %2, m2
112
    pcmpeqw           %2, m3
113
    pand             m10, m4
114
    pand             m11, m5
115
    pand             m12, m6
116
    pand              %2, m7
117
    por              m10, m11
118
    por              m12, %2
119
    por              m10, m12
120
    paddw             %3, m10
121
%else ; ARCH_X86_32
122
    pcmpeqw           m4, %2, [rsp+MMSIZE*0]
123
    pcmpeqw           m5, %2, [rsp+MMSIZE*1]
124
    pcmpeqw           m6, %2, [rsp+MMSIZE*2]
125
    pcmpeqw           %2, [rsp+MMSIZE*3]
126
    pand              m4, [rsp+MMSIZE*4]
127
    pand              m5, [rsp+MMSIZE*5]
128
    pand              m6, [rsp+MMSIZE*6]
129
    pand              %2, m7
130
    por               m4, m5
131
    por               m6, %2
132
    por               m4, m6
133
    paddw             %3, m4
134
%endif ; ARCH
135
%endmacro
136
 
137
;void ff_hevc_sao_band_filter__8_(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
138
;                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);
139
%macro HEVC_SAO_BAND_FILTER_8 2
140
cglobal hevc_sao_band_filter_%1_8, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
141
    HEVC_SAO_BAND_FILTER_INIT 8
142
 
143
align 16
144
.loop:
145
%if %1 == 8
146
    movq              m8, [srcq]
147
    punpcklbw         m8, m14
148
    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
149
    packuswb          m8, m14
150
    movq          [dstq], m8
151
%endif ; %1 == 8
152
 
153
%assign i 0
154
%rep %2
155
    mova             m13, [srcq + i]
156
    punpcklbw         m8, m13, m14
157
    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9,  m8
158
    punpckhbw        m13, m14
159
    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
160
    packuswb          m8, m13
161
    mova      [dstq + i], m8
162
%assign i i+mmsize
163
%endrep
164
 
165
%if %1 == 48
166
INIT_XMM cpuname
167
 
168
    mova             m13, [srcq + i]
169
    punpcklbw         m8, m13, m14
170
    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9,  m8
171
    punpckhbw        m13, m14
172
    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
173
    packuswb          m8, m13
174
    mova      [dstq + i], m8
175
%if cpuflag(avx2)
176
INIT_YMM cpuname
177
%endif
178
%endif ; %1 == 48
179
 
180
    add             dstq, dststrideq             ; dst += dststride
181
    add             srcq, srcstrideq             ; src += srcstride
182
    dec          heightd                         ; cmp height
183
    jnz               .loop                      ; height loop
184
    REP_RET
185
%endmacro
186
 
187
;void ff_hevc_sao_band_filter___(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
188
;                                                   int16_t *sao_offset_val, int sao_left_class, int width, int height);
189
%macro HEVC_SAO_BAND_FILTER_16 3
190
cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
191
    HEVC_SAO_BAND_FILTER_INIT %1
192
 
193
align 16
194
.loop:
195
%if %2 == 8
196
    movu              m8, [srcq]
197
    HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
198
    CLIPW             m8, m14, m13
199
    movu          [dstq], m8
200
%endif
201
 
202
%assign i 0
203
%rep %3
204
    mova              m8, [srcq + i]
205
    HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
206
    CLIPW             m8, m14, m13
207
    mova      [dstq + i], m8
208
 
209
    mova              m9, [srcq + i + mmsize]
210
    HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
211
    CLIPW             m9, m14, m13
212
    mova      [dstq + i + mmsize], m9
213
%assign i i+mmsize*2
214
%endrep
215
 
216
%if %2 == 48
217
INIT_XMM cpuname
218
    mova              m8, [srcq + i]
219
    HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
220
    CLIPW             m8, m14, m13
221
    mova      [dstq + i], m8
222
 
223
    mova              m9, [srcq + i + mmsize]
224
    HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
225
    CLIPW             m9, m14, m13
226
    mova      [dstq + i + mmsize], m9
227
%if cpuflag(avx2)
228
INIT_YMM cpuname
229
%endif
230
%endif ; %1 == 48
231
 
232
    add             dstq, dststrideq
233
    add             srcq, srcstrideq
234
    dec          heightd
235
    jg .loop
236
    REP_RET
237
%endmacro
238
 
239
%macro HEVC_SAO_BAND_FILTER_FUNCS 0
240
HEVC_SAO_BAND_FILTER_8       8, 0
241
HEVC_SAO_BAND_FILTER_8      16, 1
242
HEVC_SAO_BAND_FILTER_8      32, 2
243
HEVC_SAO_BAND_FILTER_8      48, 2
244
HEVC_SAO_BAND_FILTER_8      64, 4
245
 
246
HEVC_SAO_BAND_FILTER_16 10,  8, 0
247
HEVC_SAO_BAND_FILTER_16 10, 16, 1
248
HEVC_SAO_BAND_FILTER_16 10, 32, 2
249
HEVC_SAO_BAND_FILTER_16 10, 48, 2
250
HEVC_SAO_BAND_FILTER_16 10, 64, 4
251
 
252
HEVC_SAO_BAND_FILTER_16 12,  8, 0
253
HEVC_SAO_BAND_FILTER_16 12, 16, 1
254
HEVC_SAO_BAND_FILTER_16 12, 32, 2
255
HEVC_SAO_BAND_FILTER_16 12, 48, 2
256
HEVC_SAO_BAND_FILTER_16 12, 64, 4
257
%endmacro
258
 
259
INIT_XMM sse2
260
HEVC_SAO_BAND_FILTER_FUNCS
261
INIT_XMM avx
262
HEVC_SAO_BAND_FILTER_FUNCS
263
 
264
%if HAVE_AVX2_EXTERNAL
265
INIT_XMM avx2
266
HEVC_SAO_BAND_FILTER_8       8, 0
267
HEVC_SAO_BAND_FILTER_8      16, 1
268
INIT_YMM avx2
269
HEVC_SAO_BAND_FILTER_8      32, 1
270
HEVC_SAO_BAND_FILTER_8      48, 1
271
HEVC_SAO_BAND_FILTER_8      64, 2
272
 
273
INIT_XMM avx2
274
HEVC_SAO_BAND_FILTER_16 10,  8, 0
275
HEVC_SAO_BAND_FILTER_16 10, 16, 1
276
INIT_YMM avx2
277
HEVC_SAO_BAND_FILTER_16 10, 32, 1
278
HEVC_SAO_BAND_FILTER_16 10, 48, 1
279
HEVC_SAO_BAND_FILTER_16 10, 64, 2
280
 
281
INIT_XMM avx2
282
HEVC_SAO_BAND_FILTER_16 12,  8, 0
283
HEVC_SAO_BAND_FILTER_16 12, 16, 1
284
INIT_YMM avx2
285
HEVC_SAO_BAND_FILTER_16 12, 32, 1
286
HEVC_SAO_BAND_FILTER_16 12, 48, 1
287
HEVC_SAO_BAND_FILTER_16 12, 64, 2
288
%endif
289
 
290
;******************************************************************************
291
;SAO Edge Filter
292
;******************************************************************************
293
 
294
%define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
295
 
296
%macro HEVC_SAO_EDGE_FILTER_INIT 1
297
%if WIN64
298
    movsxd           eoq, dword eom
299
%elif ARCH_X86_64
300
    movsxd           eoq, eod
301
%else
302
    mov              eoq, r4m
303
%endif
304
    lea            tmp2q, [pb_eo]
305
    movsx      a_strideq, byte [tmp2q+eoq*4+1]
306
    movsx      b_strideq, byte [tmp2q+eoq*4+3]
307
    imul       a_strideq, EDGE_SRCSTRIDE>>%1
308
    imul       b_strideq, EDGE_SRCSTRIDE>>%1
309
    movsx           tmpq, byte [tmp2q+eoq*4]
310
    add        a_strideq, tmpq
311
    movsx           tmpq, byte [tmp2q+eoq*4+2]
312
    add        b_strideq, tmpq
313
%endmacro
314
 
315
%macro HEVC_SAO_EDGE_FILTER_COMPUTE_8 1
316
    pminub            m4, m1, m2
317
    pminub            m5, m1, m3
318
    pcmpeqb           m2, m4
319
    pcmpeqb           m3, m5
320
    pcmpeqb           m4, m1
321
    pcmpeqb           m5, m1
322
    psubb             m4, m2
323
    psubb             m5, m3
324
    paddb             m4, m6
325
    paddb             m4, m5
326
 
327
    pshufb            m2, m0, m4
328
%if %1 > 8
329
    punpckhbw         m5, m7, m1
330
    punpckhbw         m4, m2, m7
331
    punpcklbw         m3, m7, m1
332
    punpcklbw         m2, m7
333
    pmaddubsw         m5, m4
334
    pmaddubsw         m3, m2
335
    packuswb          m3, m5
336
%else
337
    punpcklbw         m3, m7, m1
338
    punpcklbw         m2, m7
339
    pmaddubsw         m3, m2
340
    packuswb          m3, m3
341
%endif
342
%endmacro
343
 
344
;void ff_hevc_sao_edge_filter__8_(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
345
;                                             int eo, int width, int height);
346
%macro HEVC_SAO_EDGE_FILTER_8 2-3
347
%if ARCH_X86_64
348
cglobal hevc_sao_edge_filter_%1_8, 4, 9, 8, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
349
%define tmp2q heightq
350
    HEVC_SAO_EDGE_FILTER_INIT 0
351
    mov          heightd, r6m
352
 
353
%else ; ARCH_X86_32
354
cglobal hevc_sao_edge_filter_%1_8, 1, 6, 8, dst, src, dststride, a_stride, b_stride, height
355
%define eoq   srcq
356
%define tmpq  heightq
357
%define tmp2q dststrideq
358
%define offsetq heightq
359
    HEVC_SAO_EDGE_FILTER_INIT 0
360
    mov             srcq, srcm
361
    mov          offsetq, r3m
362
    mov       dststrideq, dststridem
363
%endif ; ARCH
364
 
365
%if mmsize > 16
366
    vbroadcasti128    m0, [offsetq]
367
%else
368
    movu              m0, [offsetq]
369
%endif
370
    mova              m1, [pb_edge_shuffle]
371
    packsswb          m0, m0
372
    mova              m7, [pb_1]
373
    pshufb            m0, m1
374
    mova              m6, [pb_2]
375
%if ARCH_X86_32
376
    mov          heightd, r6m
377
%endif
378
 
379
align 16
380
.loop:
381
 
382
%if %1 == 8
383
    movq              m1, [srcq]
384
    movq              m2, [srcq + a_strideq]
385
    movq              m3, [srcq + b_strideq]
386
    HEVC_SAO_EDGE_FILTER_COMPUTE_8 %1
387
    movq          [dstq], m3
388
%endif
389
 
390
%assign i 0
391
%rep %2
392
    mova              m1, [srcq + i]
393
    movu              m2, [srcq + a_strideq + i]
394
    movu              m3, [srcq + b_strideq + i]
395
    HEVC_SAO_EDGE_FILTER_COMPUTE_8 %1
396
    mov%3     [dstq + i], m3
397
%assign i i+mmsize
398
%endrep
399
 
400
%if %1 == 48
401
INIT_XMM cpuname
402
 
403
    mova              m1, [srcq + i]
404
    movu              m2, [srcq + a_strideq + i]
405
    movu              m3, [srcq + b_strideq + i]
406
    HEVC_SAO_EDGE_FILTER_COMPUTE_8 %1
407
    mova      [dstq + i], m3
408
%if cpuflag(avx2)
409
INIT_YMM cpuname
410
%endif
411
%endif
412
 
413
    add             dstq, dststrideq
414
    add             srcq, EDGE_SRCSTRIDE
415
    dec          heightd
416
    jg .loop
417
    RET
418
%endmacro
419
 
420
%macro PMINUW 4
421
%if cpuflag(sse4)
422
    pminuw            %1, %2, %3
423
%else
424
    psubusw           %4, %2, %3
425
    psubw             %1, %2, %4
426
%endif
427
%endmacro
428
 
429
%macro HEVC_SAO_EDGE_FILTER_COMPUTE_10 0
430
    PMINUW            m4, m1, m2, m6
431
    PMINUW            m5, m1, m3, m7
432
    pcmpeqw           m2, m4
433
    pcmpeqw           m3, m5
434
    pcmpeqw           m4, m1
435
    pcmpeqw           m5, m1
436
    psubw             m4, m2
437
    psubw             m5, m3
438
 
439
    paddw             m4, m5
440
    pcmpeqw           m2, m4, [pw_m2]
441
%if ARCH_X86_64
442
    pcmpeqw           m3, m4, m13
443
    pcmpeqw           m5, m4, m0
444
    pcmpeqw           m6, m4, m14
445
    pcmpeqw           m7, m4, m15
446
    pand              m2, m8
447
    pand              m3, m9
448
    pand              m5, m10
449
    pand              m6, m11
450
    pand              m7, m12
451
%else
452
    pcmpeqw           m3, m4, [pw_m1]
453
    pcmpeqw           m5, m4, m0
454
    pcmpeqw           m6, m4, [pw_1]
455
    pcmpeqw           m7, m4, [pw_2]
456
    pand              m2, [rsp+MMSIZE*0]
457
    pand              m3, [rsp+MMSIZE*1]
458
    pand              m5, [rsp+MMSIZE*2]
459
    pand              m6, [rsp+MMSIZE*3]
460
    pand              m7, [rsp+MMSIZE*4]
461
%endif
462
    paddw             m2, m3
463
    paddw             m5, m6
464
    paddw             m2, m7
465
    paddw             m2, m1
466
    paddw             m2, m5
467
%endmacro
468
 
469
;void ff_hevc_sao_edge_filter___(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
470
;                                                   int eo, int width, int height);
471
%macro HEVC_SAO_EDGE_FILTER_16 3
472
%if ARCH_X86_64
473
cglobal hevc_sao_edge_filter_%2_%1, 4, 9, 16, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
474
%define tmp2q heightq
475
    HEVC_SAO_EDGE_FILTER_INIT 1
476
    mov          heightd, r6m
477
    add        a_strideq, a_strideq
478
    add        b_strideq, b_strideq
479
 
480
%else ; ARCH_X86_32
481
cglobal hevc_sao_edge_filter_%2_%1, 1, 6, 8, 5*mmsize, dst, src, dststride, a_stride, b_stride, height
482
%assign MMSIZE mmsize
483
%define eoq   srcq
484
%define tmpq  heightq
485
%define tmp2q dststrideq
486
%define offsetq heightq
487
%define m8 m1
488
%define m9 m2
489
%define m10 m3
490
%define m11 m4
491
%define m12 m5
492
    HEVC_SAO_EDGE_FILTER_INIT 1
493
    mov             srcq, srcm
494
    mov          offsetq, r3m
495
    mov       dststrideq, dststridem
496
    add        a_strideq, a_strideq
497
    add        b_strideq, b_strideq
498
 
499
%endif ; ARCH
500
 
501
%if cpuflag(avx2)
502
    SPLATW            m8, [offsetq+2]
503
    SPLATW            m9, [offsetq+4]
504
    SPLATW           m10, [offsetq+0]
505
    SPLATW           m11, [offsetq+6]
506
    SPLATW           m12, [offsetq+8]
507
%else
508
    movq             m10, [offsetq+0]
509
    movd             m12, [offsetq+6]
510
    SPLATW            m8, xm10, 1
511
    SPLATW            m9, xm10, 2
512
    SPLATW           m10, xm10, 0
513
    SPLATW           m11, xm12, 0
514
    SPLATW           m12, xm12, 1
515
%endif
516
    pxor              m0, m0
517
%if ARCH_X86_64
518
    mova             m13, [pw_m1]
519
    mova             m14, [pw_1]
520
    mova             m15, [pw_2]
521
%else
522
    mov          heightd, r6m
523
    mova  [rsp+mmsize*0], m8
524
    mova  [rsp+mmsize*1], m9
525
    mova  [rsp+mmsize*2], m10
526
    mova  [rsp+mmsize*3], m11
527
    mova  [rsp+mmsize*4], m12
528
%endif
529
 
530
align 16
531
.loop:
532
 
533
%if %2 == 8
534
    mova              m1, [srcq]
535
    movu              m2, [srcq+a_strideq]
536
    movu              m3, [srcq+b_strideq]
537
 
538
    HEVC_SAO_EDGE_FILTER_COMPUTE_10
539
    CLIPW             m2, m0, [pw_mask %+ %1]
540
    movu          [dstq], m2
541
%endif
542
 
543
%assign i 0
544
%rep %3
545
    mova              m1, [srcq + i]
546
    movu              m2, [srcq+a_strideq + i]
547
    movu              m3, [srcq+b_strideq + i]
548
    HEVC_SAO_EDGE_FILTER_COMPUTE_10
549
    CLIPW             m2, m0, [pw_mask %+ %1]
550
    mova      [dstq + i], m2
551
 
552
    mova              m1, [srcq + i + mmsize]
553
    movu              m2, [srcq+a_strideq + i + mmsize]
554
    movu              m3, [srcq+b_strideq + i + mmsize]
555
    HEVC_SAO_EDGE_FILTER_COMPUTE_10
556
    CLIPW             m2, m0, [pw_mask %+ %1]
557
    mova [dstq + i + mmsize], m2
558
%assign i i+mmsize*2
559
%endrep
560
 
561
%if %2 == 48
562
INIT_XMM cpuname
563
    mova              m1, [srcq + i]
564
    movu              m2, [srcq+a_strideq + i]
565
    movu              m3, [srcq+b_strideq + i]
566
    HEVC_SAO_EDGE_FILTER_COMPUTE_10
567
    CLIPW             m2, m0, [pw_mask %+ %1]
568
    mova              [dstq + i], m2
569
 
570
    mova              m1, [srcq + i + mmsize]
571
    movu              m2, [srcq+a_strideq + i + mmsize]
572
    movu              m3, [srcq+b_strideq + i + mmsize]
573
    HEVC_SAO_EDGE_FILTER_COMPUTE_10
574
    CLIPW             m2, m0, [pw_mask %+ %1]
575
    mova [dstq + i + mmsize], m2
576
%if cpuflag(avx2)
577
INIT_YMM cpuname
578
%endif
579
%endif
580
 
581
    add             dstq, dststrideq
582
    add             srcq, EDGE_SRCSTRIDE
583
    dec          heightd
584
    jg .loop
585
    RET
586
%endmacro
587
 
588
INIT_XMM ssse3
589
HEVC_SAO_EDGE_FILTER_8       8, 0
590
HEVC_SAO_EDGE_FILTER_8      16, 1, a
591
HEVC_SAO_EDGE_FILTER_8      32, 2, a
592
HEVC_SAO_EDGE_FILTER_8      48, 2, a
593
HEVC_SAO_EDGE_FILTER_8      64, 4, a
594
 
595
%if HAVE_AVX2_EXTERNAL
596
INIT_YMM avx2
597
HEVC_SAO_EDGE_FILTER_8      32, 1, a
598
HEVC_SAO_EDGE_FILTER_8      48, 1, u
599
HEVC_SAO_EDGE_FILTER_8      64, 2, a
600
%endif
601
 
602
INIT_XMM sse2
603
HEVC_SAO_EDGE_FILTER_16 10,  8, 0
604
HEVC_SAO_EDGE_FILTER_16 10, 16, 1
605
HEVC_SAO_EDGE_FILTER_16 10, 32, 2
606
HEVC_SAO_EDGE_FILTER_16 10, 48, 2
607
HEVC_SAO_EDGE_FILTER_16 10, 64, 4
608
 
609
HEVC_SAO_EDGE_FILTER_16 12,  8, 0
610
HEVC_SAO_EDGE_FILTER_16 12, 16, 1
611
HEVC_SAO_EDGE_FILTER_16 12, 32, 2
612
HEVC_SAO_EDGE_FILTER_16 12, 48, 2
613
HEVC_SAO_EDGE_FILTER_16 12, 64, 4
614
 
615
%if HAVE_AVX2_EXTERNAL
616
INIT_YMM avx2
617
HEVC_SAO_EDGE_FILTER_16 10, 32, 1
618
HEVC_SAO_EDGE_FILTER_16 10, 48, 1
619
HEVC_SAO_EDGE_FILTER_16 10, 64, 2
620
 
621
HEVC_SAO_EDGE_FILTER_16 12, 32, 1
622
HEVC_SAO_EDGE_FILTER_16 12, 48, 1
623
HEVC_SAO_EDGE_FILTER_16 12, 64, 2
624
%endif