Subversion Repositories Kolibri OS

Rev

Rev 2455 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
5077 clevermous 3
;; Copyright (C) KolibriOS team 2006-2014. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
;   (C) copyright Serge 2006
9
;   email: infinity_sound@mail.ru
10
 
11
 
12
align 4
13
 
14
mix_list rd 32*3
15
 
16
align 4
17
proc new_mix stdcall, output:dword
18
           locals
19
             main_count   rd 1
20
             fpu_state    rb 528   ;512+16
21
           endl
22
 
23
        mov     [main_count], 32
24
        call    prepare_playlist
25
        cmp     [play_count], 0
26
        je      .clear
27
 
28
        lea     eax, [fpu_state+16]
29
        and     eax, -16            ;must be 16b aligned
5077 clevermous 30
        invoke  FpuSave
2288 clevermous 31
 
32
        call    update_streams
33
.mix:
34
        lea     eax, [mix_list]
35
        call    do_mix_list
36
        test    eax, eax
37
        je      .done
38
 
39
if USE_SSE2_MIXER
40
 
41
        cmp     eax, 1
42
        ja      @F
43
                                    ;use fast path
44
        mov     edi, [output]
45
        lea     edx, [mix_list]
46
        call    mix_fast
47
        jmp     .next
48
@@:
49
        cmp     eax, 2
50
        ja      @F
51
 
52
        mov     edi, [output]
53
        lea     edx, [mix_list]
54
        call    mix_fast_2_stream
55
        jmp     .next
56
@@:
57
 
58
end if
59
 
60
        lea     ebx, [mix_list]
61
        stdcall mix_all, [output], ebx, eax
62
.next:
63
        add     [output], 512
64
        dec     [main_count]
65
        jnz     .mix
66
.exit:
67
        lea     eax, [fpu_state+16]
68
        and     eax, -16
5077 clevermous 69
        invoke  FpuRestore
2288 clevermous 70
        ret
71
.done:
72
        mov     ecx, [main_count]
73
        shl     ecx, 7    ;ecx*= 512/4
74
 
75
        mov     edi, [output]
76
        xor     eax, eax
77
        cld
78
        rep stosd
79
        jmp     .exit
80
.clear:
81
        mov     edi, [output]
82
        mov     ecx, 4096
83
        xor     eax, eax
84
        cld
85
        rep stosd
86
        ret
87
endp
88
 
89
align 4
90
proc update_streams
91
           locals
92
             stream_index  dd ?
93
             event         rd 6
94
           endl
95
 
96
        mov     [stream_index], 0
97
.l1:
98
        mov     edx, [stream_index]
99
        mov     esi, [play_list+edx*4]
100
 
101
        add     dword [esi+STREAM.time_stamp], 4096
102
        adc     dword [esi+STREAM.time_stamp+4], 0
103
        mov     dword [esi+STREAM.last_ts], 0
104
 
105
        mov     eax, [esi+STREAM.out_rp]
106
        cmp     eax, [esi+STREAM.out_top]
107
        jb      @f
108
        sub     eax, 64*1024
109
@@:
110
        mov     [esi+STREAM.out_rp], eax
111
 
112
        cmp     [esi+STREAM.out_count], 16384
113
        ja      .skip
114
 
115
        test    [esi+STREAM.format], PCM_RING
116
        jnz     .ring
117
 
118
        stdcall refill, esi
119
.skip:
120
        inc     [stream_index]
121
        dec     [play_count]
122
        jnz     .l1
123
        ret
124
.ring:
125
        stdcall refill_ring, esi
126
        jmp     .skip
127
endp
128
 
129
align 4
130
proc refill stdcall, str:dword
131
           locals
132
             r_size    rd 1
133
           endl
134
 
135
        mov     ebx, [str]
136
        mov     edi, [ebx+STREAM.out_wp]
137
        cmp     edi, [ebx+STREAM.out_top]
138
        jb      @F
139
        sub     edi, 0x10000
140
        mov     [ebx+STREAM.out_wp], edi
141
@@:
142
        mov     eax, [ebx+STREAM.in_count]
143
        test    eax, eax
144
        jz      .done
145
 
146
        mov     ecx, [ebx+STREAM.r_size]
147
        cmp     eax, ecx
148
        jle     @F
149
 
150
        mov     eax, ecx
151
@@:
152
        mov     ecx, eax
153
        cmp     word [ebx+STREAM.format], PCM_1_16_8
154
        ja      @F
155
 
156
        shr     eax, 1                  ;two channles
157
@@:
158
        test    [ebx+STREAM.format], 1  ;even formats mono
159
        jz      @F
160
 
161
        shr     eax, 1                  ;eax= samples
162
@@:
163
        shl     eax, 15   ;eax*=32768 =r_end
164
 
165
        mov     [r_size], ecx
166
 
167
        mov     esi, [ebx+STREAM.in_rp]
168
        mov     edi, [ebx+STREAM.out_wp]
169
 
170
        stdcall [ebx+STREAM.resample], edi, esi, \
171
                [ebx+STREAM.r_dt], ecx, eax
172
 
173
        mov     ebx, [str]
174
 
175
        add     [ebx+STREAM.out_count], eax;
176
        add     [ebx+STREAM.out_wp], eax;
177
 
178
        mov     eax, [ebx+STREAM.in_rp]
179
        mov     ecx, [r_size]
180
        add     eax, ecx
181
        add     [ebx+STREAM.in_free], ecx
182
        sub     [ebx+STREAM.in_count], ecx
183
 
184
        cmp     eax, [ebx+STREAM.in_top]
185
        jb      @f
186
 
187
        sub     eax, [ebx+STREAM.in_size]
188
@@:
189
        mov     [ebx+STREAM.in_rp], eax
190
 
191
.done:
192
        mov     eax, [ebx+STREAM.notify_event]
193
        test    eax, eax
194
        jz      .exit
195
 
196
        mov     ebx, [ebx+STREAM.notify_id]
197
        mov     edx, EVENT_WATCHED
198
        xor     esi, esi
5077 clevermous 199
        invoke  RaiseEvent   ;eax, ebx, edx, esi
2288 clevermous 200
.exit:
201
        ret
202
endp
203
 
204
align 4
205
proc refill_ring stdcall, str:dword
206
           locals
207
             event     rd 6
208
           endl
209
 
210
        mov     ebx, [str]
211
        mov     edi, [ebx+STREAM.out_wp]
212
        cmp     edi, [ebx+STREAM.out_top]
213
        jb      @F
214
        sub     edi, 0x10000
215
        mov     [ebx+STREAM.out_wp], edi
216
@@:
217
        mov     ecx, [ebx+STREAM.r_size]
218
        mov     eax, ecx
219
        cmp     word [ebx+STREAM.format], PCM_1_16_8
220
        ja      @F
221
 
222
        shr     eax, 1                  ;two channles
223
@@:
224
        test    [ebx+STREAM.format], 1  ;even formats mono
225
        jz      @F
226
 
227
        shr     eax, 1                  ;eax= samples
228
@@:
229
        shl     eax, 15   ;eax*=32768 =r_end
230
 
231
        mov     esi, [ebx+STREAM.in_rp]
232
        mov     edi, [ebx+STREAM.out_wp]
233
 
234
        stdcall [ebx+STREAM.resample], edi, esi, \
235
                [ebx+STREAM.r_dt], ecx, eax
236
 
237
        mov     ebx, [str]
238
 
239
        add     [ebx+STREAM.out_count], eax;
240
        add     [ebx+STREAM.out_wp], eax;
241
 
242
        mov     eax, [ebx+STREAM.in_rp]
243
        mov     ecx, [ebx+STREAM.r_size]
244
        add     eax, ecx
245
        add     [ebx+STREAM.in_free], ecx
246
        sub     [ebx+STREAM.in_count], ecx
247
 
248
        cmp     eax, [ebx+STREAM.in_top]
249
        jb      @f
250
 
251
        sub     eax, [ebx+STREAM.in_size]
252
@@:
253
        mov     [ebx+STREAM.in_rp], eax
254
 
255
        sub     eax, [ebx+STREAM.in_base]
256
        sub     eax, 128
257
        lea     esi, [event]
258
 
259
        mov     dword [esi], RT_INP_EMPTY
260
        mov     dword [esi+4], 0
261
        mov     dword [esi+8], ebx
262
        mov     dword [esi+12], eax
263
 
264
        mov     eax, [ebx+STREAM.notify_event]
265
        test    eax, eax
266
        jz      .exit
267
 
268
        mov     ebx, [ebx+STREAM.notify_id]
269
        xor     edx, edx
5077 clevermous 270
        invoke  RaiseEvent   ;eax, ebx, edx, esi
2288 clevermous 271
.exit:
272
        ret
273
endp
274
 
275
if USE_SSE2_MIXER
276
 
277
align 4
278
proc mix_all stdcall, dest:dword, list:dword, count:dword
279
 
280
        mov     edi, [dest]
281
        mov     ebx, 32
282
.mix:
283
        mov     edx, [list]
284
        mov     ecx, [count]
285
 
286
        mov     eax, [edx]
287
 
288
        movdqa  xmm1, [eax]
289
        movss   xmm2, [edx+4]
290
        movss   xmm3, [edx+8]
291
 
292
        punpcklwd xmm0, xmm1
293
        punpckhwd xmm1, xmm1
294
 
295
        shufps  xmm2, xmm3, 0
296
        shufps  xmm2, xmm2, 0x88
297
 
298
        psrad   xmm0, 16
299
        psrad   xmm1, 16
300
        cvtdq2ps xmm0, xmm0
301
        cvtdq2ps xmm1, xmm1
302
        mulps   xmm0, xmm2
303
        mulps   xmm1, xmm2
304
 
305
.mix_loop:
306
        add     dword [edx], 16
307
        add     edx, 12
308
        dec     ecx
309
        jz      @F
310
 
311
        mov     eax, [edx]
312
 
313
        movdqa  xmm3, [eax]
314
        movss   xmm4, [edx+4]
315
        movss   xmm5, [edx+8]
316
 
317
        punpcklwd xmm2, xmm3
318
        punpckhwd xmm3, xmm3
319
 
320
        shufps  xmm4, xmm5, 0
321
        shufps  xmm4, xmm4, 0x88
322
 
323
        psrad   xmm2, 16
324
        psrad   xmm3, 16
325
 
326
        cvtdq2ps xmm2, xmm2
327
        cvtdq2ps xmm3, xmm3
328
 
329
        mulps   xmm2, xmm4
330
        mulps   xmm3, xmm4
331
        addps   xmm0, xmm2
332
        addps   xmm1, xmm3
333
 
334
        jmp     .mix_loop
335
@@:
336
        cvtps2dq xmm0, xmm0
337
        cvtps2dq xmm1, xmm1
338
        packssdw xmm0, xmm0
339
        packssdw xmm1, xmm1
340
        punpcklqdq xmm0, xmm1
341
        movntdq [edi], xmm0
342
 
343
        add     edi, 16
344
        dec     ebx
345
        jnz     .mix
346
 
347
        ret
348
endp
349
 
350
; param
351
;  edi = dest
352
;  edx = mix_list
353
 
354
align 4
355
mix_fast:
356
 
357
        mov     ebx, 32
358
        mov     eax, [edx]
359
 
360
        movss   xmm2, [edx+4]        ; vol Lf
361
        movss   xmm3, [edx+8]        ; vol Rf
362
        shufps  xmm2, xmm3, 0        ; Rf Rf Lf Lf
363
        shufps  xmm2, xmm2, 0x88     ; volume level  Rf Lf Rf Lf
364
.mix:
365
        movdqa  xmm1, [eax]          ; R3w L3w  R2w L2w  R1w L1w  R0w L0w
366
        add     eax, 16
367
        punpcklwd xmm0, xmm1         ; R1w R1w  L1w L1W  R0w R0w  L0w L0w
368
        punpckhwd xmm1, xmm1         ; R3w R3w  L3w L3w  R2w R2w  L2w L2w
369
 
370
        psrad   xmm0, 16             ; R1d L1d R0d L0d
371
        psrad   xmm1, 16             ; R3d L3d R2d L2d
372
 
373
        cvtdq2ps xmm0, xmm0          ; time to use all power
374
        cvtdq2ps xmm1, xmm1          ; of the dark side
375
 
376
        mulps   xmm0, xmm2           ; R1f' L1f' R0f' L0f'
377
        mulps   xmm1, xmm2           ; R3f' L3f' R2f' L2f'
378
 
379
        cvtps2dq xmm0, xmm0          ; R1d' L1d' R0d' L0d'
380
        cvtps2dq xmm1, xmm1          ; R3d' L3d' R2d' L2d'
381
        packssdw xmm0, xmm0          ; R1w' L1w'  R0w' L0w'  R1w' L1w'  R0w' L0w'
382
        packssdw xmm1, xmm1          ; R3w' L3w'  R2w' L2w'  R3w' L3w'  R2w' L2w'
383
        punpcklqdq xmm0, xmm1        ; R3w' L3w'  R2w' L2w'  R1w' L1w'  R0w' L0w'
384
        movntdq [edi], xmm0
385
 
386
        add     edi, 16
387
        dec     ebx
388
        jnz     .mix
389
 
390
        ret
391
 
392
align 4
393
mix_fast_2_stream:
394
 
395
        mov     ebx, 32
396
        mov     eax, [edx]
397
 
398
        movss   xmm4, [edx+4]        ; vol Lf
399
        movss   xmm5, [edx+8]        ; vol Rf
400
        mov     ecx, [edx+12]
401
 
402
        movss   xmm6, [edx+16]       ; vol Lf
403
        movss   xmm7, [edx+20]       ; vol Rf
404
 
405
        shufps  xmm4, xmm5, 0        ; Rf Rf Lf Lf
406
        shufps  xmm4, xmm4, 0x88     ; volume level  Rf Lf Rf Lf
407
 
408
        shufps  xmm6, xmm7, 0        ; Rf Rf Lf Lf
409
        shufps  xmm6, xmm6, 0x88     ; volume level  Rf Lf Rf Lf
410
 
411
.mix:
412
        movdqa  xmm1, [eax]          ; R3w L3w  R2w L2w  R1w L1w  R0w L0w
413
        movdqa  xmm3, [ecx]          ; R3w L3w  R2w L2w  R1w L1w  R0w L0w
414
 
415
        add     eax, 16
416
        add     ecx, 16
417
 
418
        punpcklwd xmm0, xmm1         ; R1w R1w  L1w L1W  R0w R0w  L0w L0w
419
        punpckhwd xmm1, xmm1         ; R3w R3w  L3w L3w  R2w R2w  L2w L2w
420
 
421
        psrad   xmm0, 16             ; R1d L1d R0d L0d
422
        psrad   xmm1, 16             ; R3d L3d R2d L2d
423
 
424
        cvtdq2ps xmm0, xmm0          ; time to use all power
425
        cvtdq2ps xmm1, xmm1          ; of the dark side
426
 
427
        mulps   xmm0, xmm4           ; R1f' L1f' R0f' L0f'
428
        mulps   xmm1, xmm4           ; R3f' L3f' R2f' L2f'
429
 
430
        punpcklwd xmm2, xmm3         ; R1w R1w  L1w L1W  R0w R0w  L0w L0w
431
        punpckhwd xmm3, xmm3         ; R3w R3w  L3w L3w  R2w R2w  L2w L2w
432
 
433
        psrad   xmm2, 16             ; R1d L1d R0d L0d
434
        psrad   xmm3, 16             ; R3d L3d R2d L2d
435
 
436
        cvtdq2ps xmm2, xmm2          ; time to use all power
437
        cvtdq2ps xmm3, xmm3          ; of the dark side
438
 
439
        mulps   xmm2, xmm6           ; R1f' L1f' R0f' L0f'
440
        mulps   xmm3, xmm6           ; R3f' L3f' R2f' L2f'
441
 
442
        addps   xmm0, xmm2
443
        addps   xmm1, xmm3
444
 
445
        cvtps2dq xmm0, xmm0          ; R1d' L1d' R0d' L0d'
446
        cvtps2dq xmm1, xmm1          ; R3d' L3d' R2d' L2d'
447
        packssdw xmm0, xmm0          ; R1w' L1w'  R0w' L0w'  R1w' L1w'  R0w' L0w'
448
        packssdw xmm1, xmm1          ; R3w' L3w'  R2w' L2w'  R3w' L3w'  R2w' L2w'
449
        punpcklqdq xmm0, xmm1        ; R3w' L3w'  R2w' L2w'  R1w' L1w'  R0w' L0w'
450
        movntdq [edi], xmm0
451
 
452
        add     edi, 16
453
        dec     ebx
454
        jnz     .mix
455
 
456
        ret
457
 
458
else                                    ; fixed point mmx version
459
 
460
align 4
461
proc mix_all stdcall, dest:dword, list:dword, count:dword
462
 
463
        mov     edi, [dest]
464
        mov     ebx, 64
465
.mix:
466
        mov     edx, [list]
467
        mov     ecx, [count]
468
 
469
        mov     eax, [edx]
470
 
471
        movq    mm0, [eax]
472
 
473
        movd    mm1, [edx+4]
474
        punpckldq mm1, mm1
475
        pmulhw  mm0, mm1
476
        psllw   mm0, 1
477
 
478
.mix_loop:
479
        add     dword [edx], 8
480
        add     edx, 12
481
        dec     ecx
482
        jz      @F
483
 
484
        mov     eax, [edx]
485
        movq    mm1, [eax]
486
        movd    mm2, [edx+4]
487
        punpckldq mm2, mm2
488
        pmulhw  mm1, mm2
489
        psllw   mm1, 1
490
        paddsw  mm0, mm1
491
        jmp     .mix_loop
492
@@:
493
        movq    [edi], mm0
494
        add     edi, 8
495
        dec     ebx
496
        jnz     .mix
497
 
498
        ret
499
endp
500
 
501
end if
502
 
503
 
504
align 4
505
proc resample_1 stdcall, dest:dword,src:dword,\
506
                       r_dt:dword, r_size:dword,r_end:dword
507
 
508
; dest equ esp+8
509
; src  equ esp+12
510
; r_dt equ esp+16
511
; r_size equ esp+20
512
; r_end equ esp+24
513
 
514
        mov     edi, [dest]
515
        mov     edx, [src]
516
        sub     edx, 32*2
517
        mov     eax, 16
518
 
519
align 4
520
.l1:
521
        mov     ecx, eax
522
        mov     esi, eax
523
        and     ecx, 0x7FFF
524
        shr     esi, 15
525
        lea     esi, [edx+esi*2]
526
 
527
        movsx   ebp, word [esi]
528
        movsx   esi, word [esi+2]
529
        mov     ebx, 32768
530
        imul    esi, ecx
531
        sub     ebx, ecx
532
        imul    ebx, ebp
533
        lea     ecx, [ebx+esi+16384]
534
        sar     ecx, 15
535
        cmp     ecx, 32767        ; 00007fffH
536
        jle     @f
537
        mov     ecx, 32767        ; 00007fffH
538
        jmp     .write
539
@@:
540
        cmp     ecx, -32768       ; ffff8000H
541
        jge     .write
542
        mov     ecx, -32768       ; ffff8000H
543
.write:
544
        mov     ebx, ecx
545
        shl     ebx, 16
546
        mov     bx, cx
547
        mov     [edi], ebx
548
        add     edi, 4
549
 
550
        add     eax, [esp+16]
551
        cmp     eax, [esp+24]
552
        jb      .l1
553
 
554
        mov     ebp, esp
555
 
556
        sub     edi, [dest]
557
        mov     eax, edi
558
        ret
559
endp
560
 
561
align 4
562
proc resample_18 stdcall, dest:dword,src:dword,\
563
                       r_dt:dword, r_size:dword,r_end:dword
564
 
565
 
566
        mov     edi, [dest]
567
        mov     edx, [src]
568
        sub     edx, 32
569
 
570
        mov     esi, 16
571
 
572
align 4
573
.l1:
574
        mov     ecx, esi
575
        mov     eax, esi
576
        and     ecx, 0x7FFF
577
        shr     eax, 15
578
        lea     eax, [edx+eax]
579
 
580
        mov     bx, word [eax]
581
        sub     bh, 0x80
582
        sub     bl, 0x80
583
        movsx   eax, bh
584
        shl     eax, 8
585
        movsx   ebp, bl
586
        shl     ebp, 8
587
        mov     ebx, 32768
588
        imul    eax, ecx
589
        sub     ebx, ecx
590
        imul    ebx, ebp
591
        lea     ecx, [ebx+eax+16384]
592
        sar     ecx, 15
593
        cmp     ecx, 32767        ; 00007fffH
594
        jle     @f
595
        mov     ecx, 32767        ; 00007fffH
596
        jmp     .write
597
@@:
598
        cmp     ecx, -32768       ; ffff8000H
599
        jge     .write
600
        mov     ecx, -32768       ; ffff8000H
601
.write:
602
        mov     ebx, ecx
603
        shl     ebx, 16
604
        mov     bx, cx
605
        mov     [edi], ebx
606
        add     edi, 4
607
 
608
        add     esi, [esp+16]
609
        cmp     esi, [esp+24]
610
        jb      .l1
611
 
612
        mov     ebp, esp
613
        sub     edi, [dest]
614
        mov     eax, edi
615
        ret
616
endp
617
 
618
align 4
619
proc copy_stream stdcall, dest:dword,src:dword,\
620
                       r_dt:dword, r_size:dword,r_end:dword
621
 
622
        mov     ecx, [r_size]
623
        mov     eax, ecx
624
        shr     ecx, 2
625
        mov     esi, [src]
626
        mov     edi, [dest]
627
        cld
628
        rep movsd
629
        ret
630
endp
631
 
632
align 4
633
proc resample_2 stdcall, dest:dword,src:dword,\
634
                       r_dt:dword, r_size:dword,r_end:dword
635
 
636
        mov     edx, [src]
637
        sub     edx, 32*4
638
        mov     edi, [dest]
639
        mov     ebx, [r_dt]
640
        mov     eax, 16
641
        emms
642
 
643
align 4
644
.l1:
645
        mov     ecx, eax
646
        mov     esi, eax
647
        and     ecx, 0x7FFF
648
        shr     esi, 15
649
        lea     esi, [edx+esi*4]
650
 
651
        movq    mm0, [esi]
652
        movq    mm1, mm0
653
 
654
        movd    mm2, ecx
655
        punpcklwd mm2, mm2
656
        movq    mm3, qword [m7]    ;0x8000
657
 
658
        psubw   mm3, mm2  ;        ;0x8000 - iconst
659
        punpckldq mm3, mm2
660
 
661
        pmulhw  mm0, mm3
662
        pmullw  mm1, mm3
663
 
664
        movq    mm4, mm1
665
        punpcklwd mm1, mm0
666
        punpckhwd mm4, mm0
667
        paddd   mm1, mm4
668
        psrad   mm1, 15
669
        packssdw mm1, mm1
670
        movd    [edi], mm1
671
        add     edi, 4
672
 
673
        add     eax, ebx
674
        cmp     eax, [r_end]
675
        jb      .l1
676
        emms
677
 
678
        sub     edi, [dest]
679
        mov     eax, edi
680
        ret
681
endp
682
 
683
align 4
684
proc resample_28 stdcall, dest:dword,src:dword,\
685
                       r_dt:dword, r_size:dword,r_end:dword
686
 
687
        mov     edx, [src]
688
        sub     edx, 32*2
689
        mov     edi, [dest]
690
        mov     ebx, [r_dt]
691
        mov     eax, 16
692
        emms
693
        movq    mm7, [mm80]
694
        movq    mm6, [mm_mask]
695
 
696
align 4
697
.l1:
698
        mov     ecx, eax
699
        mov     esi, eax
700
        and     ecx, 0x7FFF
701
        shr     esi, 15
702
        lea     esi, [edx+esi*2]
703
 
704
        movq    mm0, [esi]
705
        psubb   mm0, mm7
706
        punpcklbw mm0, mm0
707
        pand    mm0, mm6
708
 
709
        movq    mm1, mm0
710
 
711
        movd    mm2, ecx
712
        punpcklwd mm2, mm2
713
        movq    mm3, qword [m7] ;                  // 0x8000
714
 
715
        psubw   mm3, mm2        ;         // 0x8000 - iconst
716
        punpckldq mm3, mm2
717
 
718
        pmulhw  mm0, mm3
719
        pmullw  mm1, mm3
720
 
721
        movq    mm4, mm1
722
        punpcklwd mm1, mm0
723
        punpckhwd mm4, mm0
724
        paddd   mm1, mm4
725
        psrad   mm1, 15
726
        packssdw mm1, mm1
727
        movd    [edi], mm1
728
        add     edi, 4
729
 
730
        add     eax, ebx
731
        cmp     eax, [r_end]
732
        jb      .l1
733
        emms
734
 
735
 
736
        sub     edi, [dest]
737
        mov     eax, edi
738
        ret
739
endp
740
 
741
 
742
proc m16_stereo stdcall, dest:dword,src:dword,\
743
                       r_dt:dword, r_size:dword,r_end:dword
744
 
745
        mov     esi, [src]
746
        mov     edi, [dest]
747
        mov     ecx, [r_size]
748
        shr     ecx, 8
749
@@:
750
        call    m16_s_mmx
751
        add     edi, 128
752
        add     esi, 64
753
        call    m16_s_mmx
754
        add     edi, 128
755
        add     esi, 64
756
        call    m16_s_mmx
757
        add     edi, 128
758
        add     esi, 64
759
        call    m16_s_mmx
760
        add     edi, 128
761
        add     esi, 64
762
        dec     ecx
763
        jnz     @b
764
 
765
        mov     eax, [r_size]
766
        add     eax, eax
767
        ret
768
endp
769
 
770
align 4
771
proc s8_stereo stdcall, dest:dword,src:dword,\
772
                       r_dt:dword, r_size:dword,r_end:dword
773
 
774
        mov     esi, [src]
775
        mov     edi, [dest]
776
        mov     ecx, [r_size]
777
        shr     ecx, 7
778
 
779
        movq    mm7, [mm80]
780
        movq    mm6, [mm_mask]
781
@@:
782
        call    s8_s_mmx
783
        add     edi, 64
784
        add     esi, 32
785
        call    s8_s_mmx
786
        add     edi, 64
787
        add     esi, 32
788
        call    s8_s_mmx
789
        add     edi, 64
790
        add     esi, 32
791
        call    s8_s_mmx
792
        add     edi, 64
793
        add     esi, 32
794
        dec     ecx
795
        jnz     @b
796
 
797
        mov     eax, [r_size]
798
        add     eax, eax
799
        ret
800
endp
801
 
802
proc m8_stereo stdcall, dest:dword,src:dword,\
803
                       r_dt:dword, r_size:dword,r_end:dword
804
 
805
        mov     esi, [src]
806
        mov     edi, [dest]
807
        mov     ecx, [r_size]
808
        shr     ecx, 6
809
 
810
        movq    mm7, [mm80]
811
        movq    mm6, [mm_mask]
812
@@:
813
        call    m8_s_mmx
814
        add     edi, 64
815
        add     esi, 16
816
        call    m8_s_mmx
817
        add     edi, 64
818
        add     esi, 16
819
        call    m8_s_mmx
820
        add     edi, 64
821
        add     esi, 16
822
        call    m8_s_mmx
823
        add     edi, 64
824
        add     esi, 16
825
        dec     ecx
826
        jnz     @b
827
 
828
        mov     eax, [r_size]
829
        add     eax, eax
830
        add     eax, eax
831
        ret
832
endp
833
 
834
align 4
835
proc alloc_mix_buff
836
 
837
        bsf     eax, [mix_buff_map]
838
        jnz     .find
839
        xor     eax, eax
840
        ret
841
.find:
842
        btr     [mix_buff_map], eax
843
        shl     eax, 9
844
        add     eax, [mix_buff]
845
        ret
846
endp
847
 
848
align 4
849
proc m16_s_mmx
850
 
851
        movq    mm0, [esi]
852
        movq    mm1, mm0
853
        punpcklwd mm0, mm0
854
        punpckhwd mm1, mm1
855
        movq    [edi], mm0
856
        movq    [edi+8], mm1
857
 
858
        movq    mm0, [esi+8]
859
        movq    mm1, mm0
860
        punpcklwd mm0, mm0
861
        punpckhwd mm1, mm1
862
        movq    [edi+16], mm0
863
        movq    [edi+24], mm1
864
 
865
        movq    mm0, [esi+16]
866
        movq    mm1, mm0
867
        punpcklwd mm0, mm0
868
        punpckhwd mm1, mm1
869
        movq    [edi+32], mm0
870
        movq    [edi+40], mm1
871
 
872
        movq    mm0, [esi+24]
873
        movq    mm1, mm0
874
        punpcklwd mm0, mm0
875
        punpckhwd mm1, mm1
876
        movq    [edi+48], mm0
877
        movq    [edi+56], mm1
878
 
879
        movq    mm0, [esi+32]
880
        movq    mm1, mm0
881
        punpcklwd mm0, mm0
882
        punpckhwd mm1, mm1
883
        movq    [edi+64], mm0
884
        movq    [edi+72], mm1
885
 
886
        movq    mm0, [esi+40]
887
        movq    mm1, mm0
888
        punpcklwd mm0, mm0
889
        punpckhwd mm1, mm1
890
        movq    [edi+80], mm0
891
        movq    [edi+88], mm1
892
 
893
 
894
        movq    mm0, [esi+48]
895
        movq    mm1, mm0
896
        punpcklwd mm0, mm0
897
        punpckhwd mm1, mm1
898
        movq    [edi+96], mm0
899
        movq    [edi+104], mm1
900
 
901
        movq    mm0, [esi+56]
902
        movq    mm1, mm0
903
        punpcklwd mm0, mm0
904
        punpckhwd mm1, mm1
905
        movq    [edi+112], mm0
906
        movq    [edi+120], mm1
907
 
908
        ret
909
endp
910
 
911
align 4
912
proc s8_s_mmx
913
 
914
        movq    mm0, [esi]
915
        psubb   mm0, mm7
916
        movq    mm1, mm0
917
        punpcklbw mm0, mm0
918
        pand    mm0, mm6
919
        punpckhbw mm1, mm1
920
        pand    mm1, mm6
921
        movq    [edi], mm0
922
        movq    [edi+8], mm1
923
 
924
        movq    mm0, [esi+8]
925
        psubb   mm0, mm7
926
        movq    mm1, mm0
927
        punpcklbw mm0, mm0
928
        pand    mm0, mm6
929
        punpckhbw mm1, mm1
930
        pand    mm1, mm6
931
        movq    [edi+16], mm0
932
        movq    [edi+24], mm1
933
 
934
        movq    mm0, [esi+16]
935
        psubb   mm0, mm7
936
        movq    mm1, mm0
937
        punpcklbw mm0, mm0
938
        pand    mm0, mm6
939
        punpckhbw mm1, mm1
940
        pand    mm1, mm6
941
        movq    [edi+32], mm0
942
        movq    [edi+40], mm1
943
 
944
        movq    mm0, [esi+24]
945
        psubb   mm0, mm7
946
        movq    mm1, mm0
947
        punpcklbw mm0, mm0
948
        pand    mm0, mm6
949
        punpckhbw mm1, mm1
950
        pand    mm1, mm6
951
        movq    [edi+48], mm0
952
        movq    [edi+56], mm1
953
 
954
        ret
955
 
956
endp
957
 
958
align 4
959
proc m8_s_mmx
960
 
961
        movq    mm0, [esi]
962
        psubb   mm0, mm7
963
        movq    mm1, mm0
964
        punpcklbw mm0, mm0
965
        pand    mm0, mm6
966
        punpckhbw mm1, mm1
967
        pand    mm1, mm6
968
        movq    mm2, mm0
969
        punpcklwd mm0, mm0
970
        punpckhwd mm2, mm2
971
 
972
        movq    mm3, mm1
973
        punpcklwd mm1, mm1
974
        punpckhwd mm3, mm3
975
 
976
        movq    [edi], mm0
977
        movq    [edi+8], mm2
978
        movq    [edi+16], mm1
979
        movq    [edi+24], mm3
980
 
981
        movq    mm0, [esi+8]
982
        psubb   mm0, mm7
983
        movq    mm1, mm0
984
        punpcklbw mm0, mm0
985
        pand    mm0, mm6
986
        punpckhbw mm1, mm1
987
        pand    mm1, mm6
988
        movq    mm2, mm0
989
        punpcklwd mm0, mm0
990
        punpckhwd mm2, mm2
991
 
992
        movq    mm3, mm1
993
        punpcklwd mm1, mm1
994
        punpckhwd mm3, mm3
995
 
996
        movq    [edi+32], mm0
997
        movq    [edi+40], mm2
998
        movq    [edi+48], mm1
999
        movq    [edi+56], mm3
1000
 
1001
        ret
1002
endp
1003
 
1004
align 4
1005
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
1006
 
1007
        mov     edi, [output]
1008
        mov     eax, [str0]
1009
        mov     ebx, [str1]
1010
        mov     esi, 128
1011
        call    [mix_2_core]   ;edi, eax, ebx
1012
 
1013
        add     edi, esi
1014
        add     eax, esi
1015
        add     ebx, esi
1016
        call    [mix_2_core]   ;edi, eax, ebx
1017
 
1018
        add     edi, esi
1019
        add     eax, esi
1020
        add     ebx, esi
1021
        call    [mix_2_core]   ;edi, eax, ebx
1022
 
1023
        add     edi, esi
1024
        add     eax, esi
1025
        add     ebx, esi
1026
        call    [mix_2_core]   ;edi, eax, ebx
1027
        ret
1028
endp
1029
 
1030
align 4
1031
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
1032
 
1033
        mov     edi, [output]
1034
        mov     eax, [str0]
1035
        mov     ebx, [str1]
1036
        mov     ecx, [str2]
1037
        mov     esi, 128
1038
        call    [mix_3_core]
1039
 
1040
        add     edi, esi
1041
        add     eax, esi
1042
        add     ebx, esi
1043
        add     ecx, esi
1044
        call    [mix_3_core]
1045
 
1046
        add     edi, esi
1047
        add     eax, esi
1048
        add     ebx, esi
1049
        add     ecx, esi
1050
        call    [mix_3_core]
1051
 
1052
        add     edi, esi
1053
        add     eax, esi
1054
        add     ebx, esi
1055
        add     ecx, esi
1056
        call    [mix_3_core]
1057
        ret
1058
endp
1059
 
1060
align 4
1061
proc mix_4_1 stdcall, str0:dword, str1:dword,\
1062
                      str2:dword, str3:dword
1063
 
1064
           local output:DWORD
1065
 
1066
        call    alloc_mix_buff
1067
        and     eax, eax
1068
        jz      .err
1069
 
1070
        mov     [output], eax
1071
 
1072
        mov     edi, eax
1073
        mov     eax, [str0]
1074
        mov     ebx, [str1]
1075
        mov     ecx, [str2]
1076
        mov     edx, [str3]
1077
        mov     esi, 128
1078
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1079
 
1080
        add     edi, esi
1081
        add     eax, esi
1082
        add     ebx, esi
1083
        add     ecx, esi
1084
        add     edx, esi
1085
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1086
 
1087
        add     edi, esi
1088
        add     eax, esi
1089
        add     ebx, esi
1090
        add     ecx, esi
1091
        add     edx, esi
1092
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1093
 
1094
        add     edi, esi
1095
        add     eax, esi
1096
        add     ebx, esi
1097
        add     ecx, esi
1098
        add     edx, esi
1099
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1100
        mov     eax, [output]
1101
        ret
1102
.err:
1103
        xor     eax, eax
1104
        ret
1105
endp
1106
 
1107
 
1108
align 4
1109
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
1110
                        str2:dword, str3:dword
1111
 
1112
        mov     edi, [output]
1113
 
1114
        mov     eax, [str0]
1115
        mov     ebx, [str1]
1116
        mov     ecx, [str2]
1117
        mov     edx, [str3]
1118
        mov     esi, 128
1119
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1120
 
1121
        add     edi, esi
1122
        add     eax, esi
1123
        add     ebx, esi
1124
        add     ecx, esi
1125
        add     edx, esi
1126
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1127
 
1128
        add     edi, esi
1129
        add     eax, esi
1130
        add     ebx, esi
1131
        add     ecx, esi
1132
        add     edx, esi
1133
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1134
 
1135
        add     edi, esi
1136
        add     eax, esi
1137
        add     ebx, esi
1138
        add     ecx, esi
1139
        add     edx, esi
1140
        call    [mix_4_core]  ;edi, eax, ebx, ecx, edx
1141
        ret
1142
endp
1143
 
1144
align 4
1145
proc copy_mem stdcall, output:dword, input:dword
1146
 
1147
        mov     edi, [output]
1148
        mov     esi, [input]
1149
        mov     ecx, 0x80
1150
.l1:
1151
        mov     eax, [esi]
1152
        mov     [edi], eax
1153
        add     esi, 4
1154
        add     edi, 4
1155
        loop    .l1
1156
 
1157
        ret
1158
endp
1159
 
1160
proc memcpy
1161
@@:
1162
        mov     eax, [esi]
1163
        mov     [edi], eax
1164
        add     esi, 4
1165
        add     edi, 4
1166
        dec     ecx
1167
        jnz     @B
1168
        ret
1169
endp
1170
 
1171
if 0
1172
 
1173
align 4
1174
proc new_mix stdcall, output:dword
1175
           locals
1176
             mixCounter  dd ?
1177
             mixIndex  dd ?
1178
             streamIndex dd ?
1179
             inputCount  dd ?
1180
             main_count  dd ?
1181
             blockCount  dd ?
1182
             mix_out  dd ?
1183
           endl
1184
 
1185
        call    prepare_playlist
1186
 
1187
        cmp     [play_count], 0
1188
        je      .exit
1189
        call    FpuSave
1190
        mov     [main_count], 32;
1191
.l00:
1192
        mov     [mix_buff_map], 0x0000FFFF;
1193
        xor     eax, eax
1194
        mov     [mixCounter], eax
1195
        mov     [mixIndex], eax
1196
        mov     [streamIndex], eax;
1197
        mov     ebx, [play_count]
1198
        mov     [inputCount], ebx
1199
.l0:
1200
        mov     ecx, 4
1201
.l1:
1202
        mov     ebx, [streamIndex]
1203
        mov     esi, [play_list+ebx*4]
1204
        mov     eax, [esi+STREAM.work_read]
1205
        add     [esi+STREAM.work_read], 512
1206
 
1207
        mov     ebx, [mixIndex]
1208
        mov     [mix_input+ebx*4], eax
1209
        inc     [mixCounter]
1210
        inc     [mixIndex]
1211
        inc     [streamIndex]
1212
        dec     [inputCount]
1213
        jz      .m2
1214
 
1215
        dec     ecx
1216
        jnz     .l1
1217
 
1218
        cmp     [mixCounter], 4
1219
        jnz     .m2
1220
 
1221
        stdcall mix_4_1, [mix_input], [mix_input+4], [mix_input+8], [mix_input+12]
1222
        sub     [mixIndex], 4
1223
        mov     ebx, [mixIndex]
1224
        mov     [mix_input+ebx*4], eax
1225
        inc     [mixIndex]
1226
        mov     [mixCounter], 0
1227
 
1228
        cmp     [inputCount], 0
1229
        jnz     .l0
1230
.m2:
1231
        cmp     [mixIndex], 1
1232
        jne     @f
1233
        stdcall copy_mem, [output], [mix_input]
1234
        jmp     .m3
1235
@@:
1236
        cmp     [mixIndex], 2
1237
        jne     @f
1238
        stdcall mix_2_1, [output], [mix_input], [mix_input+4]
1239
        jmp     .m3
1240
@@:
1241
        cmp     [mixIndex], 3
1242
        jne     @f
1243
        stdcall mix_3_1, [output], [mix_input], [mix_input+4], [mix_input+8]
1244
        jmp     .m3
1245
@@:
1246
        stdcall final_mix, [output], [mix_input], [mix_input+4], [mix_input+8], [mix_input+12]
1247
.m3:
1248
        add     [output], 512
1249
 
1250
        dec     [main_count]
1251
        jnz     .l00
1252
 
1253
        call    update_stream
1254
        emms
1255
        call    FpuRestore
1256
        ret
1257
.exit:
1258
        mov     edi, [output]
1259
        mov     ecx, 0x1000
1260
        xor     eax, eax
1261
        cld
1262
        rep stosd
1263
        ret
1264
endp
1265
 
1266
end if