Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * Copyright (c) 2014 RISC OS Open Ltd
3
 * Author: Ben Avison 
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
 
22
#include "libavutil/arm/asm.S"
23
 
24
#define MAX_CHANNELS        8
25
#define MAX_FIR_ORDER       8
26
#define MAX_IIR_ORDER       4
27
#define MAX_RATEFACTOR      4
28
#define MAX_BLOCKSIZE       (40 * MAX_RATEFACTOR)
29
 
30
PST     .req    a1
31
PCO     .req    a2
32
AC0     .req    a3
33
AC1     .req    a4
34
CO0     .req    v1
35
CO1     .req    v2
36
CO2     .req    v3
37
CO3     .req    v4
38
ST0     .req    v5
39
ST1     .req    v6
40
ST2     .req    sl
41
ST3     .req    fp
42
I       .req    ip
43
PSAMP   .req    lr
44
 
45
 
46
.macro branch_pic_label first, remainder:vararg
47
A       .word           \first   - 4
48
T       .hword          (\first) / 2
49
.ifnb   \remainder
50
        branch_pic_label \remainder
51
.endif
52
.endm
53
 
54
// Some macros that do loads/multiplies where the register number is determined
55
// from an assembly-time expression. Boy is GNU assembler's syntax ugly...
56
 
57
.macro load  group, index, base, offset
58
       .altmacro
59
       load_ \group, %(\index), \base, \offset
60
       .noaltmacro
61
.endm
62
 
63
.macro load_ group, index, base, offset
64
        ldr     \group\index, [\base, #\offset]
65
.endm
66
 
67
.macro loadd  group, index, base, offset
68
       .altmacro
69
       loadd_ \group, %(\index), %(\index+1), \base, \offset
70
       .noaltmacro
71
.endm
72
 
73
.macro loadd_ group, index0, index1, base, offset
74
A .if \offset >= 256
75
A       ldr     \group\index0, [\base, #\offset]
76
A       ldr     \group\index1, [\base, #(\offset) + 4]
77
A .else
78
        ldrd    \group\index0, \group\index1, [\base, #\offset]
79
A .endif
80
.endm
81
 
82
.macro multiply  index, accumulate, long
83
        .altmacro
84
        multiply_ %(\index), \accumulate, \long
85
        .noaltmacro
86
.endm
87
 
88
.macro multiply_  index, accumulate, long
89
 .if \long
90
  .if \accumulate
91
        smlal   AC0, AC1, CO\index, ST\index
92
  .else
93
        smull   AC0, AC1, CO\index, ST\index
94
  .endif
95
 .else
96
  .if \accumulate
97
        mla     AC0, CO\index, ST\index, AC0
98
  .else
99
        mul     AC0, CO\index, ST\index
100
  .endif
101
 .endif
102
.endm
103
 
104
// A macro to update the load register number and load offsets
105
 
106
.macro inc  howmany
107
  .set LOAD_REG, (LOAD_REG + \howmany) & 3
108
  .set OFFSET_CO, OFFSET_CO + 4 * \howmany
109
  .set OFFSET_ST, OFFSET_ST + 4 * \howmany
110
  .if FIR_REMAIN > 0
111
    .set FIR_REMAIN, FIR_REMAIN - \howmany
112
    .if FIR_REMAIN == 0
113
      .set OFFSET_CO, 4 * MAX_FIR_ORDER
114
      .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
115
    .endif
116
  .elseif IIR_REMAIN > 0
117
    .set IIR_REMAIN, IIR_REMAIN - \howmany
118
  .endif
119
.endm
120
 
121
// Macro to implement the inner loop for one specific combination of parameters
122
 
123
.macro implement_filter  mask_minus1, shift_0, shift_8, iir_taps, fir_taps
124
  .set TOTAL_TAPS, \iir_taps + \fir_taps
125
 
126
  // Deal with register allocation...
127
  .set DEFINED_SHIFT, 0
128
  .set DEFINED_MASK, 0
129
  .set SHUFFLE_SHIFT, 0
130
  .set SHUFFLE_MASK, 0
131
  .set SPILL_SHIFT, 0
132
  .set SPILL_MASK, 0
133
  .if TOTAL_TAPS == 0
134
    // Little register pressure in this case - just keep MASK where it was
135
    .if !\mask_minus1
136
      MASK .req ST1
137
      .set DEFINED_MASK, 1
138
    .endif
139
  .else
140
    .if \shift_0
141
      .if !\mask_minus1
142
        // AC1 is unused with shift 0
143
        MASK .req AC1
144
        .set DEFINED_MASK, 1
145
        .set SHUFFLE_MASK, 1
146
      .endif
147
    .elseif \shift_8
148
      .if !\mask_minus1
149
        .if TOTAL_TAPS <= 4
150
        // All coefficients are preloaded (so pointer not needed)
151
          MASK .req PCO
152
          .set DEFINED_MASK, 1
153
          .set SHUFFLE_MASK, 1
154
        .else
155
          .set SPILL_MASK, 1
156
        .endif
157
      .endif
158
    .else // shift not 0 or 8
159
      .if TOTAL_TAPS <= 3
160
        // All coefficients are preloaded, and at least one CO register is unused
161
        .if \fir_taps & 1
162
          SHIFT .req CO0
163
          .set DEFINED_SHIFT, 1
164
          .set SHUFFLE_SHIFT, 1
165
        .else
166
          SHIFT .req CO3
167
          .set DEFINED_SHIFT, 1
168
          .set SHUFFLE_SHIFT, 1
169
        .endif
170
        .if !\mask_minus1
171
          MASK .req PCO
172
          .set DEFINED_MASK, 1
173
          .set SHUFFLE_MASK, 1
174
        .endif
175
      .elseif TOTAL_TAPS == 4
176
        // All coefficients are preloaded
177
        SHIFT .req PCO
178
        .set DEFINED_SHIFT, 1
179
        .set SHUFFLE_SHIFT, 1
180
        .if !\mask_minus1
181
          .set SPILL_MASK, 1
182
        .endif
183
      .else
184
        .set SPILL_SHIFT, 1
185
        .if !\mask_minus1
186
          .set SPILL_MASK, 1
187
        .endif
188
      .endif
189
    .endif
190
  .endif
191
  .if SPILL_SHIFT
192
    SHIFT .req ST0
193
    .set DEFINED_SHIFT, 1
194
  .endif
195
  .if SPILL_MASK
196
    MASK .req ST1
197
    .set DEFINED_MASK, 1
198
  .endif
199
 
200
        // Preload coefficients if possible
201
  .if TOTAL_TAPS <= 4
202
    .set OFFSET_CO, 0
203
    .if \fir_taps & 1
204
      .set LOAD_REG, 1
205
    .else
206
      .set LOAD_REG, 0
207
    .endif
208
    .rept \fir_taps
209
        load    CO, LOAD_REG, PCO, OFFSET_CO
210
      .set LOAD_REG, (LOAD_REG + 1) & 3
211
      .set OFFSET_CO, OFFSET_CO + 4
212
    .endr
213
    .set OFFSET_CO, 4 * MAX_FIR_ORDER
214
    .rept \iir_taps
215
        load    CO, LOAD_REG, PCO, OFFSET_CO
216
      .set LOAD_REG, (LOAD_REG + 1) & 3
217
      .set OFFSET_CO, OFFSET_CO + 4
218
    .endr
219
  .endif
220
 
221
        // Move mask/shift to final positions if necessary
222
        // Need to do this after preloading, because in some cases we
223
        // reuse the coefficient pointer register
224
  .if SHUFFLE_SHIFT
225
        mov     SHIFT, ST0
226
  .endif
227
  .if SHUFFLE_MASK
228
        mov     MASK, ST1
229
  .endif
230
 
231
        // Begin loop
232
01:
233
  .if TOTAL_TAPS == 0
234
        // Things simplify a lot in this case
235
        // In fact this could be pipelined further if it's worth it...
236
        ldr     ST0, [PSAMP]
237
        subs    I, I, #1
238
    .if !\mask_minus1
239
        and     ST0, ST0, MASK
240
    .endif
241
        str     ST0, [PST, #-4]!
242
        str     ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
243
        str     ST0, [PSAMP], #4 * MAX_CHANNELS
244
        bne     01b
245
  .else
246
    .if \fir_taps & 1
247
      .set LOAD_REG, 1
248
    .else
249
      .set LOAD_REG, 0
250
    .endif
251
    .set LOAD_BANK, 0
252
    .set FIR_REMAIN, \fir_taps
253
    .set IIR_REMAIN, \iir_taps
254
    .if FIR_REMAIN == 0 // only IIR terms
255
      .set OFFSET_CO, 4 * MAX_FIR_ORDER
256
      .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
257
    .else
258
      .set OFFSET_CO, 0
259
      .set OFFSET_ST, 0
260
    .endif
261
    .set MUL_REG, LOAD_REG
262
    .set COUNTER, 0
263
    .rept TOTAL_TAPS + 2
264
        // Do load(s)
265
     .if FIR_REMAIN != 0 || IIR_REMAIN != 0
266
      .if COUNTER == 0
267
       .if TOTAL_TAPS > 4
268
        load    CO, LOAD_REG, PCO, OFFSET_CO
269
       .endif
270
        load    ST, LOAD_REG, PST, OFFSET_ST
271
        inc     1
272
      .elseif COUNTER == 1 && (\fir_taps & 1) == 0
273
       .if TOTAL_TAPS > 4
274
        load    CO, LOAD_REG, PCO, OFFSET_CO
275
       .endif
276
        load    ST, LOAD_REG, PST, OFFSET_ST
277
        inc     1
278
      .elseif LOAD_BANK == 0
279
       .if TOTAL_TAPS > 4
280
        .if FIR_REMAIN == 0 && IIR_REMAIN == 1
281
        load    CO, LOAD_REG, PCO, OFFSET_CO
282
        .else
283
        loadd   CO, LOAD_REG, PCO, OFFSET_CO
284
        .endif
285
       .endif
286
       .set LOAD_BANK, 1
287
      .else
288
       .if FIR_REMAIN == 0 && IIR_REMAIN == 1
289
        load    ST, LOAD_REG, PST, OFFSET_ST
290
        inc     1
291
       .else
292
        loadd   ST, LOAD_REG, PST, OFFSET_ST
293
        inc     2
294
       .endif
295
       .set LOAD_BANK, 0
296
      .endif
297
     .endif
298
 
299
        // Do interleaved multiplies, slightly delayed
300
     .if COUNTER >= 2
301
        multiply MUL_REG, COUNTER > 2, !\shift_0
302
      .set MUL_REG, (MUL_REG + 1) & 3
303
     .endif
304
     .set COUNTER, COUNTER + 1
305
    .endr
306
 
307
        // Post-process the result of the multiplies
308
    .if SPILL_SHIFT
309
        ldr     SHIFT, [sp, #9*4 + 0*4]
310
    .endif
311
    .if SPILL_MASK
312
        ldr     MASK, [sp, #9*4 + 1*4]
313
    .endif
314
        ldr     ST2, [PSAMP]
315
        subs    I, I, #1
316
    .if \shift_8
317
        mov     AC0, AC0, lsr #8
318
        orr     AC0, AC0, AC1, lsl #24
319
    .elseif !\shift_0
320
        rsb     ST3, SHIFT, #32
321
        mov     AC0, AC0, lsr SHIFT
322
A       orr     AC0, AC0, AC1, lsl ST3
323
T       mov     AC1, AC1, lsl ST3
324
T       orr     AC0, AC0, AC1
325
    .endif
326
    .if \mask_minus1
327
        add     ST3, ST2, AC0
328
    .else
329
        add     ST2, ST2, AC0
330
        and     ST3, ST2, MASK
331
        sub     ST2, ST3, AC0
332
    .endif
333
        str     ST3, [PST, #-4]!
334
        str     ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
335
        str     ST3, [PSAMP], #4 * MAX_CHANNELS
336
        bne     01b
337
  .endif
338
        b       99f
339
 
340
  .if DEFINED_SHIFT
341
    .unreq SHIFT
342
  .endif
343
  .if DEFINED_MASK
344
    .unreq MASK
345
  .endif
346
.endm
347
 
348
.macro switch_on_fir_taps  mask_minus1, shift_0, shift_8, iir_taps
349
A       ldr     CO0, [pc, a3, lsl #2]   // firorder is in range 0-(8-iir_taps)
350
A       add     pc,  pc,  CO0
351
T       tbh     [pc, a3, lsl #1]
352
0:
353
        branch_pic_label (70f - 0b), (71f - 0b), (72f - 0b), (73f - 0b)
354
        branch_pic_label (74f - 0b)
355
 .if \iir_taps <= 3
356
        branch_pic_label (75f - 0b)
357
  .if \iir_taps <= 2
358
        branch_pic_label (76f - 0b)
359
   .if \iir_taps <= 1
360
        branch_pic_label (77f - 0b)
361
    .if \iir_taps == 0
362
        branch_pic_label (78f - 0b)
363
    .endif
364
   .endif
365
  .endif
366
 .endif
367
70:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 0
368
71:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 1
369
72:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 2
370
73:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 3
371
74:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 4
372
 .if \iir_taps <= 3
373
75:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 5
374
  .if \iir_taps <= 2
375
76:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 6
376
   .if \iir_taps <= 1
377
77:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 7
378
    .if \iir_taps == 0
379
78:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 8
380
    .endif
381
   .endif
382
  .endif
383
 .endif
384
.endm
385
 
386
.macro switch_on_iir_taps  mask_minus1, shift_0, shift_8
387
A       ldr     CO0, [pc, a4, lsl #2]   // irorder is in range 0-4
388
A       add     pc,  pc,  CO0
389
T       tbh     [pc, a4, lsl #1]
390
0:
391
        branch_pic_label (60f - 0b), (61f - 0b), (62f - 0b), (63f - 0b)
392
        branch_pic_label (64f - 0b)
393
60:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 0
394
61:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 1
395
62:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 2
396
63:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 3
397
64:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 4
398
.endm
399
 
400
/* void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
401
 *                                int firorder, int iirorder,
402
 *                                unsigned int filter_shift, int32_t mask,
403
 *                                int blocksize, int32_t *sample_buffer);
404
 */
405
function ff_mlp_filter_channel_arm, export=1
406
        push    {v1-fp,lr}
407
        add     v1, sp, #9*4 // point at arguments on stack
408
        ldm     v1, {ST0,ST1,I,PSAMP}
409
        cmp     ST1, #-1
410
        bne     30f
411
        movs    ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
412
        bne     20f
413
        bcs     10f
414
        switch_on_iir_taps 1, 1, 0
415
10:     switch_on_iir_taps 1, 0, 1
416
20:     switch_on_iir_taps 1, 0, 0
417
30:     movs    ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
418
        bne     50f
419
        bcs     40f
420
        switch_on_iir_taps 0, 1, 0
421
40:     switch_on_iir_taps 0, 0, 1
422
50:     switch_on_iir_taps 0, 0, 0
423
99:     pop     {v1-fp,pc}
424
endfunc
425
 
426
        .unreq  PST
427
        .unreq  PCO
428
        .unreq  AC0
429
        .unreq  AC1
430
        .unreq  CO0
431
        .unreq  CO1
432
        .unreq  CO2
433
        .unreq  CO3
434
        .unreq  ST0
435
        .unreq  ST1
436
        .unreq  ST2
437
        .unreq  ST3
438
        .unreq  I
439
        .unreq  PSAMP
440
 
441
/********************************************************************/
442
 
443
PSA     .req    a1 // samples
444
PCO     .req    a2 // coeffs
445
PBL     .req    a3 // bypassed_lsbs
446
INDEX   .req    a4
447
CO0     .req    v1
448
CO1     .req    v2
449
CO2     .req    v3
450
CO3     .req    v4
451
SA0     .req    v5
452
SA1     .req    v6
453
SA2     .req    sl
454
SA3     .req    fp
455
AC0     .req    ip
456
AC1     .req    lr
457
NOISE   .req    SA0
458
LSB     .req    SA1
459
DCH     .req    SA2 // dest_ch
460
MASK    .req    SA3
461
 
462
    // INDEX is used as follows:
463
    // bits 0..6   index2 (values up to 17, but wider so that we can
464
    //               add to index field without needing to mask)
465
    // bits 7..14  i (values up to 160)
466
    // bit 15      underflow detect for i
467
    // bits 25..31 (if access_unit_size_pow2 == 128)  \ index
468
    // bits 26..31 (if access_unit_size_pow2 == 64)   /
469
 
470
.macro implement_rematrix  shift, index_mask, mask_minus1, maxchan
471
    .if \maxchan == 1
472
        // We can just leave the coefficients in registers in this case
473
        ldrd    CO0, CO1, [PCO]
474
    .endif
475
1:
476
    .if \maxchan == 1
477
        ldrd    SA0, SA1, [PSA]
478
        smull   AC0, AC1, CO0, SA0
479
    .elseif \maxchan == 5
480
        ldr     CO0, [PCO, #0]
481
        ldr     SA0, [PSA, #0]
482
        ldr     CO1, [PCO, #4]
483
        ldr     SA1, [PSA, #4]
484
        ldrd    CO2, CO3, [PCO, #8]
485
        smull   AC0, AC1, CO0, SA0
486
        ldrd    SA2, SA3, [PSA, #8]
487
        smlal   AC0, AC1, CO1, SA1
488
        ldrd    CO0, CO1, [PCO, #16]
489
        smlal   AC0, AC1, CO2, SA2
490
        ldrd    SA0, SA1, [PSA, #16]
491
        smlal   AC0, AC1, CO3, SA3
492
        smlal   AC0, AC1, CO0, SA0
493
    .else // \maxchan == 7
494
        ldr     CO2, [PCO, #0]
495
        ldr     SA2, [PSA, #0]
496
        ldr     CO3, [PCO, #4]
497
        ldr     SA3, [PSA, #4]
498
        ldrd    CO0, CO1, [PCO, #8]
499
        smull   AC0, AC1, CO2, SA2
500
        ldrd    SA0, SA1, [PSA, #8]
501
        smlal   AC0, AC1, CO3, SA3
502
        ldrd    CO2, CO3, [PCO, #16]
503
        smlal   AC0, AC1, CO0, SA0
504
        ldrd    SA2, SA3, [PSA, #16]
505
        smlal   AC0, AC1, CO1, SA1
506
        ldrd    CO0, CO1, [PCO, #24]
507
        smlal   AC0, AC1, CO2, SA2
508
        ldrd    SA0, SA1, [PSA, #24]
509
        smlal   AC0, AC1, CO3, SA3
510
        smlal   AC0, AC1, CO0, SA0
511
    .endif
512
        ldm     sp, {NOISE, DCH, MASK}
513
        smlal   AC0, AC1, CO1, SA1
514
    .if \shift != 0
515
      .if \index_mask == 63
516
        add     NOISE, NOISE, INDEX, lsr #32-6
517
        ldrb    LSB, [PBL], #MAX_CHANNELS
518
        ldrsb   NOISE, [NOISE]
519
        add     INDEX, INDEX, INDEX, lsl #32-6
520
      .else // \index_mask == 127
521
        add     NOISE, NOISE, INDEX, lsr #32-7
522
        ldrb    LSB, [PBL], #MAX_CHANNELS
523
        ldrsb   NOISE, [NOISE]
524
        add     INDEX, INDEX, INDEX, lsl #32-7
525
      .endif
526
        sub     INDEX, INDEX, #1<<7
527
        adds    AC0, AC0, NOISE, lsl #\shift + 7
528
        adc     AC1, AC1, NOISE, asr #31
529
    .else
530
        ldrb    LSB, [PBL], #MAX_CHANNELS
531
        sub     INDEX, INDEX, #1<<7
532
    .endif
533
        add     PSA, PSA, #MAX_CHANNELS*4
534
        mov     AC0, AC0, lsr #14
535
        orr     AC0, AC0, AC1, lsl #18
536
    .if !\mask_minus1
537
        and     AC0, AC0, MASK
538
    .endif
539
        add     AC0, AC0, LSB
540
        tst     INDEX, #1<<15
541
        str     AC0, [PSA, DCH, lsl #2]  // DCH is precompensated for the early increment of PSA
542
        beq     1b
543
        b       98f
544
.endm
545
 
546
.macro switch_on_maxchan  shift, index_mask, mask_minus1
547
        cmp     v4, #5
548
        blo     51f
549
        beq     50f
550
        implement_rematrix  \shift, \index_mask, \mask_minus1, 7
551
50:     implement_rematrix  \shift, \index_mask, \mask_minus1, 5
552
51:     implement_rematrix  \shift, \index_mask, \mask_minus1, 1
553
.endm
554
 
555
.macro switch_on_mask  shift, index_mask
556
        cmp     sl, #-1
557
        bne     40f
558
        switch_on_maxchan  \shift, \index_mask, 1
559
40:     switch_on_maxchan  \shift, \index_mask, 0
560
.endm
561
 
562
.macro switch_on_au_size  shift
563
  .if \shift == 0
564
        switch_on_mask  \shift, undefined
565
  .else
566
        teq     v6, #64
567
        bne     30f
568
        orr     INDEX, INDEX, v1, lsl #32-6
569
        switch_on_mask  \shift, 63
570
30:     orr     INDEX, INDEX, v1, lsl #32-7
571
        switch_on_mask  \shift, 127
572
  .endif
573
.endm
574
 
575
/* void ff_mlp_rematrix_channel_arm(int32_t *samples,
576
 *                                  const int32_t *coeffs,
577
 *                                  const uint8_t *bypassed_lsbs,
578
 *                                  const int8_t *noise_buffer,
579
 *                                  int index,
580
 *                                  unsigned int dest_ch,
581
 *                                  uint16_t blockpos,
582
 *                                  unsigned int maxchan,
583
 *                                  int matrix_noise_shift,
584
 *                                  int access_unit_size_pow2,
585
 *                                  int32_t mask);
586
 */
587
function ff_mlp_rematrix_channel_arm, export=1
588
        push    {v1-fp,lr}
589
        add     v1, sp, #9*4 // point at arguments on stack
590
        ldm     v1, {v1-sl}
591
        teq     v4, #1
592
        itt     ne
593
        teqne   v4, #5
594
        teqne   v4, #7
595
        bne     99f
596
        teq     v6, #64
597
        it      ne
598
        teqne   v6, #128
599
        bne     99f
600
        sub     v2, v2, #MAX_CHANNELS
601
        push    {a4,v2,sl}          // initialise NOISE,DCH,MASK; make sp dword-aligned
602
        movs    INDEX, v3, lsl #7
603
        beq     98f                 // just in case, do nothing if blockpos = 0
604
        subs    INDEX, INDEX, #1<<7 // offset by 1 so we borrow at the right time
605
        adc     lr, v1, v1          // calculate index2 (C was set by preceding subs)
606
        orr     INDEX, INDEX, lr
607
        // Switch on matrix_noise_shift: values 0 and 1 are
608
        // disproportionately common so do those in a form the branch
609
        // predictor can accelerate. Values can only go up to 15.
610
        cmp     v5, #1
611
        beq     11f
612
        blo     10f
613
A       ldr     v5,  [pc,  v5,  lsl #2]
614
A       add     pc,  pc,  v5
615
T       tbh     [pc, v5, lsl #1]
616
0:
617
        branch_pic_label          0,          0, (12f - 0b), (13f - 0b)
618
        branch_pic_label (14f - 0b), (15f - 0b), (16f - 0b), (17f - 0b)
619
        branch_pic_label (18f - 0b), (19f - 0b), (20f - 0b), (21f - 0b)
620
        branch_pic_label (22f - 0b), (23f - 0b), (24f - 0b), (25f - 0b)
621
10:     switch_on_au_size  0
622
11:     switch_on_au_size  1
623
12:     switch_on_au_size  2
624
13:     switch_on_au_size  3
625
14:     switch_on_au_size  4
626
15:     switch_on_au_size  5
627
16:     switch_on_au_size  6
628
17:     switch_on_au_size  7
629
18:     switch_on_au_size  8
630
19:     switch_on_au_size  9
631
20:     switch_on_au_size  10
632
21:     switch_on_au_size  11
633
22:     switch_on_au_size  12
634
23:     switch_on_au_size  13
635
24:     switch_on_au_size  14
636
25:     switch_on_au_size  15
637
 
638
98:     add     sp, sp, #3*4
639
        pop     {v1-fp,pc}
640
99:     // Can't handle these parameters, drop back to C
641
        pop     {v1-fp,lr}
642
        b       X(ff_mlp_rematrix_channel)
643
endfunc
644
 
645
        .unreq  PSA
646
        .unreq  PCO
647
        .unreq  PBL
648
        .unreq  INDEX
649
        .unreq  CO0
650
        .unreq  CO1
651
        .unreq  CO2
652
        .unreq  CO3
653
        .unreq  SA0
654
        .unreq  SA1
655
        .unreq  SA2
656
        .unreq  SA3
657
        .unreq  AC0
658
        .unreq  AC1
659
        .unreq  NOISE
660
        .unreq  LSB
661
        .unreq  DCH
662
        .unreq  MASK