Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
168 serge 1
;
2
;   This file is part of the Infinity sound library.
3
;   (C) copyright Serge 2006
4
;   email: infinity_sound@mail.ru
5
;
6
;   This program is free software; you can redistribute it and/or modify
7
;   it under the terms of the GNU General Public License as published by
8
;   the Free Software Foundation; either version 2 of the License, or
9
;   (at your option) any later version.
10
;
11
;   This program is distributed in the hope that it will be useful,
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;   GNU General Public License for more details.
15
 
16
align 4
17
proc new_mix stdcall, output:dword
18
	   locals
19
	     mixCounter  dd ?
20
	     mixIndex	 dd ?
21
	     streamIndex dd ?
22
	     inputCount  dd ?
23
	     main_count  dd ?
24
	     blockCount  dd ?
25
	     mix_out	 dd ?
26
	   endl
27
 
28
	   call prepare_playlist
29
 
30
	   cmp [play_count], 0
31
	   je .exit
32
;           mov eax, fpu_state
33
;           fnsave [eax]
34
           call [FpuSave]
35
           emms
36
	   mov [main_count], 32;
37
 
38
.l00:
39
	   mov [mix_buff_map], 0x0000FFFF;
40
	   xor eax, eax
41
	   mov [mixCounter], eax
42
	   mov [mixIndex],eax
43
	   mov [streamIndex], eax;
44
	   mov ebx, [play_count]
45
	   mov [inputCount], ebx
46
.l0:
47
	   mov ecx, 4
48
.l1:
49
	   mov ebx, [streamIndex]
50
	   mov esi, [play_list+ebx*4]
51
	   mov eax, [esi+STREAM.work_read]
52
	   add [esi+STREAM.work_read], 512
53
 
54
	   mov ebx, [mixIndex]
55
	   mov [mix_input+ebx*4], eax
56
	   inc [mixCounter]
57
	   inc [mixIndex]
58
	   inc [streamIndex]
59
	   dec [inputCount]
60
	   jz .m2
61
 
62
	   dec ecx
63
	   jnz .l1
64
 
65
	   cmp [mixCounter], 4
66
	   jnz .m2
67
 
68
	   stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
69
	   sub [mixIndex],4
70
	   mov ebx, [mixIndex]
71
	   mov [mix_input+ebx*4], eax
72
	   inc [mixIndex]
73
	   mov [mixCounter], 0
74
 
75
	   cmp [inputCount], 0
76
	   jnz .l0
77
.m2:
78
	   cmp [mixIndex], 1
79
	   jne @f
80
	   stdcall copy_mem, [output], [mix_input]
81
	   jmp .m3
82
@@:
83
	   cmp [mixIndex], 2
84
	   jne @f
85
	   stdcall mix_2_1, [output], [mix_input], [mix_input+4]
86
	   jmp .m3
87
@@:
88
	   cmp [mixIndex], 3
89
	   jne @f
90
	   stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
91
	   jmp .m3
92
@@:
93
	   stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
94
.m3:
95
	   add [output],512
96
 
97
	   sub [main_count], 1
98
	   jnz .l00
99
 
100
	   call update_stream
101
           call [FpuRestore]
102
	   ret
103
.exit:
104
	   mov edi, [output]
105
	   mov ecx, 0x1000
106
	   xor eax, eax
107
	   cld
108
	   rep stosd
109
	   ret
110
endp
111
 
112
 
113
align 4
114
proc update_stream
115
	   locals
116
	     stream_index  dd 0
117
	   endl
118
 
119
	   mov [stream_index], 0
120
.l1:
121
	   mov edx, [stream_index]
122
	   mov esi, [play_list+edx*4]
123
 
124
	   mov eax, [esi+STREAM.work_read]
125
	   cmp eax, [esi+STREAM.work_top]
126
	   jb @f
127
	   mov eax, [esi+STREAM.work_buff]
128
@@:
129
	   mov [esi+STREAM.work_read], eax
130
 
131
	   cmp [esi+STREAM.format], PCM_2_16_48
132
	   je .copy
133
 
134
	   sub [esi+STREAM.work_count], 16384
135
 
136
	   cmp [esi+STREAM.work_count], 32768
137
	   ja @f
138
 
139
	   stdcall refill, esi
140
@@:
141
	   inc [stream_index]
142
	   dec [play_count]
143
	   jnz .l1
144
 
145
	   ret
146
.copy:
147
	   mov ebx, esi
148
	   mov edi, [ebx+STREAM.work_write]
149
	   cmp edi, [ebx+STREAM.work_top]
150
	   jb @f
151
	   mov edi, [ebx+STREAM.work_buff]
152
	   mov [ebx+STREAM.work_write], edi
153
@@:
154
	   mov esi, [ebx+STREAM.curr_seg]
155
	   mov ecx, 16384/4
156
	   cld
157
	   rep movsd
158
 
159
           mov [ebx+STREAM.work_write], edi
160
 
161
	   cmp esi, [ebx+STREAM.limit]
162
	   jb @f
163
 
164
	   mov esi, [ebx+STREAM.base]
165
@@:
166
	   mov [ebx+STREAM.curr_seg], esi
167
 
168
	   xor ecx, ecx
169
	   cmp esi, [ebx+STREAM.notify_off2]
170
	   je @f
171
 
172
	   mov ecx,0x8000
173
	   cmp esi, [ebx+STREAM.notify_off1]
174
	   je @f
175
 
176
	   inc [stream_index]
177
	   dec [play_count]
178
	   jnz .l1
179
 
180
	   ret
181
@@:
182
           mov eax, [ebx+STREAM.notify_task]
183
           call pid_to_slot
184
           test eax, eax
185
           jnz @f
186
           not eax
187
           mov [ebx+STREAM.notify_task], eax      ;-1
188
           jmp .l_end
189
@@:
190
           shl eax, 8
191
           mov [eax+PROC_BASE+32],ecx
192
           or dword [eax+PROC_BASE+0xA8],EVENT_NOTIFY
193
.l_end:
194
	   inc [stream_index]
195
	   dec [play_count]
196
	   jnz .l1
197
	   ret
198
endp
199
 
200
align 4
201
proc refill stdcall, str:dword
202
 
203
;    if DEBUG
204
;           mov    esi, msgUser
205
;           call   [SysMsgBoardStr]
206
;     end if
207
 
208
	   mov ebx, [str]
209
 
210
	   mov ecx, [ebx+STREAM.work_write]
211
	   cmp ecx, [ebx+STREAM.work_top]
212
	   jbe .m2
213
	   mov esi, [ebx+STREAM.work_top]
214
	   sub ecx, esi
215
	   mov edi, [ebx+STREAM.work_buff]
216
	   shr ecx, 2
217
	   rep movsd	   ;call memcpy
218
 
219
	   mov [ebx+STREAM.work_write], edi
220
.m2:
221
	   mov esi, [ebx+STREAM.curr_seg]
222
	   mov edi, [ebx+STREAM.work_write]
223
	   mov edx, [ebx+STREAM.r_buff]
224
 
225
	   stdcall [ebx+STREAM.resample], edi, esi, edx,\
226
	      [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
227
 
228
	   mov ebx, [str]
229
 
230
	   add [ebx+STREAM.work_count], eax;
231
	   add [ebx+STREAM.work_write], eax;
232
 
233
	   mov eax, [ebx+STREAM.curr_seg]
234
	   add eax, [ebx+STREAM.r_size]
235
	   cmp eax, [ebx+STREAM.limit]
236
	   jb @f
237
	   mov eax, [ebx+STREAM.base]
238
@@:
239
	   mov [ebx+STREAM.curr_seg], eax
240
 
241
	   xor ecx, ecx
242
	   cmp eax, [ebx+STREAM.notify_off2]
243
	   je @f
244
 
245
	   mov ecx,0x8000
246
	   cmp eax, [ebx+STREAM.notify_off1]
247
	   je @f
248
 
249
	   ret
250
@@:
251
           mov eax, [ebx+STREAM.notify_task]
252
           call pid_to_slot
253
           test eax, eax
254
           jnz @f
255
           not eax
256
           mov [ebx+STREAM.notify_task], eax      ;-1
257
           ret
258
@@:
259
           shl eax, 8
260
           mov [eax+PROC_BASE+32],ecx
261
           or dword [eax+PROC_BASE+0xA8],EVENT_NOTIFY
262
	   ret
263
endp
264
 
265
align 4
266
proc resample_1 stdcall, dest:dword,src:dword,r_buff:dword,\
267
		       r_dt:dword, r_size:dword,r_end:dword
268
 
269
	   mov edi, [r_buff]
270
	   add edi, 32*2
271
	   mov esi, [src]
272
	   mov ecx, [r_size]
273
	   shr ecx, 2
274
	   rep movsd
275
 
276
	   mov edi, [dest]
277
	   mov edx, [r_buff]
278
	   mov eax, 16
279
 
280
align 16
281
.l1:
282
	   mov ecx, eax
283
	   mov esi, eax
284
	   and ecx, 0x7FFF
285
	   shr esi, 15
286
	   lea esi, [edx+esi*2]
287
 
288
	   movsx ebp, word [esi]
289
	   movsx esi, word [esi+2]
290
	   mov ebx, 32768
291
	   imul esi, ecx
292
	   sub ebx, ecx
293
	   imul ebx, ebp
294
	   lea ecx, [ebx+esi+16384]
295
	   sar ecx, 15
296
	   cmp ecx, 32767	  ; 00007fffH
297
	   jle @f
298
	   mov ecx, 32767	  ; 00007fffH
299
	   jmp .write
300
@@:
301
	   cmp ecx, -32768	  ; ffff8000H
302
	   jge .write
303
	   mov ecx, -32768	  ; ffff8000H
304
.write:
305
	   mov ebx, ecx
306
	   shl ebx, 16
307
	   mov bx, cx
308
	   mov [edi], ebx
309
	   add edi, 4
310
 
311
	   add eax, [esp+20]  ;rdt
312
	   cmp eax, [esp+28]  ;r_end
313
	   jb .l1
314
 
315
	   mov ebp, esp
316
 
317
	   mov esi, [src]
318
	   add esi, [r_size]
319
	   sub esi, 32*2
320
	   mov edx, [r_buff]
321
	   mov ecx, 16
322
@@:
323
	   mov ebx, [esi]
324
	   mov [edx], ebx
325
	   add esi, 4
326
	   add edx, 4
327
	   dec ecx
328
	   jnz @B
329
 
330
	   sub edi, [dest]
331
	   mov eax, edi
332
	   ret
333
endp
334
 
335
align 4
336
proc resample_18 stdcall, dest:dword,src:dword,r_buff:dword,\
337
		       r_dt:dword, r_size:dword,r_end:dword
338
 
339
	   mov edi, [r_buff]
340
	   add edi, 32
341
	   mov esi, [src]
342
	   mov ecx, [r_size]
343
	   shr ecx, 2
344
	   rep movsd
345
 
346
	   mov edi, [dest]
347
	   mov edx, [r_buff]
348
	   mov esi, 16
349
 
350
align 16
351
.l1:
352
	   mov ecx, esi
353
	   mov eax, esi
354
	   and ecx, 0x7FFF
355
	   shr eax, 15
356
	   lea eax, [edx+eax]
357
 
358
	   mov bx, word [eax]
359
	   sub bh, 0x80
360
	   sub bl, 0x80
361
	   movsx eax, bh
362
	   shl eax,8
363
	   movsx ebp, bl
364
	   shl ebp,8
365
	   mov ebx, 32768
366
	   imul eax, ecx
367
	   sub ebx, ecx
368
	   imul ebx, ebp
369
	   lea ecx, [ebx+eax+16384]
370
	   sar ecx, 15
371
	   cmp ecx, 32767	  ; 00007fffH
372
	   jle @f
373
	   mov ecx, 32767	  ; 00007fffH
374
	   jmp .write
375
@@:
376
	   cmp ecx, -32768	  ; ffff8000H
377
	   jge .write
378
	   mov ecx, -32768	  ; ffff8000H
379
.write:
380
	   mov ebx, ecx
381
	   shl ebx, 16
382
	   mov bx, cx
383
	   mov [edi], ebx
384
	   add edi, 4
385
 
386
	   add esi, [esp+20]  ;rdt
387
	   cmp esi, [esp+28]  ;r_end
388
	   jb .l1
389
 
390
	   mov ebp, esp
391
 
392
	   mov esi, [src]
393
	   add esi, [r_size]
394
	   sub esi, 32
395
	   mov edx, [r_buff]
396
	   mov ecx, 8
397
@@:
398
	   mov ebx, [esi]
399
	   mov [edx], ebx
400
	   add esi, 4
401
	   add edx, 4
402
	   dec ecx
403
	   jnz @B
404
 
405
	   sub edi, [dest]
406
	   mov eax, edi
407
	   ret
408
endp
409
 
410
align 4
411
proc copy_stream stdcall, dest:dword,src:dword,r_buff:dword,\
412
		       r_dt:dword, r_size:dword,r_end:dword
413
 
414
	   mov ecx, [r_size]
415
	   mov eax, ecx
416
	   shr ecx, 2
417
	   mov esi, [src]
418
	   mov edi, [dest]
419
	   rep movsd
420
	   mov eax, 16384
421
	   ret
422
endp
423
 
424
align 4
425
proc resample_2 stdcall, dest:dword,src:dword,r_buff:dword,\
426
		       r_dt:dword, r_size:dword,r_end:dword
427
 
428
	   mov edi, [r_buff]
429
	   add edi, 32*4
430
	   mov esi, [src]
431
	   mov ecx, [r_size]
432
	   shr ecx, 2
433
	   rep movsd	  ;call memcpy
434
 
435
	   mov edx, [r_buff]
436
	   mov edi, [dest]
437
	   mov ebx, [r_dt]
438
	   mov eax, 16
439
	   emms
440
 
441
align 16
442
.l1:
443
	   mov ecx, eax
444
	   mov esi, eax
445
	   and ecx, 0x7FFF
446
	   shr esi, 15
447
	   lea esi, [edx+esi*4]
448
 
449
	   movq mm0, [esi]
450
	   movq mm1, mm0
451
 
452
	   movd mm2, ecx
453
	   punpcklwd mm2, mm2
454
	   movq mm3, qword [m7] ;                  // 0x8000
455
 
456
	   psubw mm3, mm2	;         // 0x8000 - iconst
457
	   punpckldq mm3, mm2
458
 
459
	   pmulhw mm0, mm3
460
	   pmullw mm1, mm3
461
 
462
	   movq mm4, mm1
463
	   punpcklwd mm1, mm0
464
	   punpckhwd mm4, mm0
465
	   paddd mm1, mm4
466
	   psrad  mm1, 15
467
	   packssdw mm1, mm1
468
	   movd [edi], mm1
469
	   add edi, 4
470
 
471
	   add eax, ebx
472
	   cmp eax, [r_end]
473
	   jb .l1
474
	   emms
475
 
476
	   mov esi, [src]
477
	   add esi, [r_size]
478
	   sub esi, 32*4
479
	   mov edx, [r_buff]
480
	   mov ecx, 32
481
@@:
482
	   mov ebx, [esi]
483
	   mov [edx], ebx
484
	   add esi, 4
485
	   add edx, 4
486
	   dec ecx
487
	   jnz @B
488
 
489
	   sub edi, [dest]
490
	   mov eax, edi
491
	   ret
492
endp
493
 
494
align 4
495
proc resample_28 stdcall, dest:dword,src:dword,r_buff:dword,\
496
		       r_dt:dword, r_size:dword,r_end:dword
497
 
498
	   mov edi, [r_buff]
499
	   add edi, 32*2
500
	   mov esi, [src]
501
	   mov ecx, [r_size]
502
	   shr ecx, 2
503
	   rep movsd	  ;call memcpy
504
 
505
	   mov edx, [r_buff]
506
	   mov edi, [dest]
507
	   mov ebx, [r_dt]
508
	   mov eax, 16
509
	   emms
510
	   movq mm7,[mm80]
511
	   movq mm6,[mm_mask]
512
 
513
align 16
514
.l1:
515
	   mov ecx, eax
516
	   mov esi, eax
517
	   and ecx, 0x7FFF
518
	   shr esi, 15
519
	   lea esi, [edx+esi*2]
520
 
521
	   movq mm0, [esi]
522
	   psubb mm0,mm7
523
	   punpcklbw mm0,mm0
524
	   pand mm0,mm6
525
 
526
	   movq mm1, mm0
527
 
528
	   movd mm2, ecx
529
	   punpcklwd mm2, mm2
530
	   movq mm3, qword [m7] ;                  // 0x8000
531
 
532
	   psubw mm3, mm2	;         // 0x8000 - iconst
533
	   punpckldq mm3, mm2
534
 
535
	   pmulhw mm0, mm3
536
	   pmullw mm1, mm3
537
 
538
	   movq mm4, mm1
539
	   punpcklwd mm1, mm0
540
	   punpckhwd mm4, mm0
541
	   paddd mm1, mm4
542
	   psrad  mm1, 15
543
	   packssdw mm1, mm1
544
	   movd [edi], mm1
545
	   add edi, 4
546
 
547
	   add eax, ebx
548
	   cmp eax, [r_end]
549
	   jb .l1
550
	   emms
551
 
552
	   mov esi, [src]
553
	   add esi, [r_size]
554
	   sub esi, 32*2
555
	   mov edx, [r_buff]
556
	   mov ecx, 16
557
@@:
558
	   mov ebx, [esi]
559
	   mov [edx], ebx
560
	   add esi, 4
561
	   add edx, 4
562
	   dec ecx
563
	   jnz @B
564
 
565
	   sub edi, [dest]
566
	   mov eax, edi
567
	   ret
568
endp
569
 
570
 
571
proc m16_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
572
		       r_dt:dword, r_size:dword,r_end:dword
573
 
574
	   mov esi, [src]
575
	   mov edi, [dest]
576
	   mov ecx, [r_size]
577
	   shr ecx,8
578
@@:
579
	   call m16_s_mmx
580
	   add edi, 128
581
	   add esi, 64
582
	   call m16_s_mmx
583
	   add edi, 128
584
	   add esi, 64
585
	   call m16_s_mmx
586
	   add edi, 128
587
	   add esi, 64
588
	   call m16_s_mmx
589
	   add edi, 128
590
	   add esi, 64
591
	   dec ecx
592
	   jnz @b
593
 
594
	   mov eax, [r_size]
595
	   add eax, eax
596
	   ret
597
endp
598
 
599
align 4
600
proc s8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
601
		       r_dt:dword, r_size:dword,r_end:dword
602
 
603
 
604
	   mov esi, [src]
605
	   mov edi, [dest]
606
	   mov ecx, [r_size]
607
	   shr ecx, 7
608
 
609
	   movq mm7, [mm80]
610
	   movq mm6, [mm_mask]
611
@@:
612
	   call s8_s_mmx
613
	   add edi, 64
614
	   add esi, 32
615
	   call s8_s_mmx
616
	   add edi, 64
617
	   add esi, 32
618
	   call s8_s_mmx
619
	   add edi, 64
620
	   add esi, 32
621
	   call s8_s_mmx
622
	   add edi, 64
623
	   add esi, 32
624
           dec ecx
625
	   jnz @b
626
 
627
	   mov eax, [r_size]
628
	   add eax, eax
629
	   ret
630
endp
631
 
632
proc m8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
633
		       r_dt:dword, r_size:dword,r_end:dword
634
 
635
	   mov esi, [src]
636
	   mov edi, [dest]
637
	   mov ecx, [r_size]
638
	   shr ecx, 6
639
 
640
	   movq mm7, [mm80]
641
	   movq mm6, [mm_mask]
642
@@:
643
	   call m8_s_mmx
644
	   add edi, 64
645
	   add esi, 16
646
	   call m8_s_mmx
647
	   add edi, 64
648
	   add esi, 16
649
	   call m8_s_mmx
650
	   add edi, 64
651
	   add esi, 16
652
	   call m8_s_mmx
653
	   add edi, 64
654
	   add esi, 16
655
           dec ecx
656
	   jnz @b
657
 
658
	   mov eax, [r_size]
659
	   add eax, eax
660
	   add eax, eax
661
	   ret
662
endp
663
 
664
align 4
665
proc alloc_mix_buff
666
 
667
	   bsf eax, [mix_buff_map]
668
	   jnz .find
669
	   xor eax, eax
670
	   ret
671
.find:
672
	   btr [mix_buff_map], eax
673
	   shl eax, 9
674
	   add eax, [mix_buff]
675
	   ret
676
endp
677
 
678
proc m16_s_mmx
679
 
680
	   movq    mm0, [esi]
681
	   movq    mm1, mm0
682
	   punpcklwd mm0, mm0
683
	   punpckhwd mm1, mm1
684
	   movq    [edi], mm0
685
	   movq    [edi+8], mm1
686
 
687
	   movq    mm0, [esi+8]
688
	   movq    mm1, mm0
689
	   punpcklwd mm0, mm0
690
	   punpckhwd mm1, mm1
691
	   movq    [edi+16], mm0
692
	   movq    [edi+24], mm1
693
 
694
	   movq    mm0, [esi+16]
695
	   movq    mm1, mm0
696
	   punpcklwd mm0, mm0
697
	   punpckhwd mm1, mm1
698
	   movq    [edi+32], mm0
699
	   movq    [edi+40], mm1
700
 
701
	   movq    mm0, [esi+24]
702
	   movq    mm1, mm0
703
	   punpcklwd mm0, mm0
704
	   punpckhwd mm1, mm1
705
	   movq    [edi+48], mm0
706
	   movq    [edi+56], mm1
707
 
708
	   movq    mm0, [esi+32]
709
	   movq    mm1, mm0
710
	   punpcklwd mm0, mm0
711
	   punpckhwd mm1, mm1
712
	   movq    [edi+64], mm0
713
	   movq    [edi+72], mm1
714
 
715
	   movq    mm0, [esi+40]
716
	   movq    mm1, mm0
717
	   punpcklwd mm0, mm0
718
	   punpckhwd mm1, mm1
719
	   movq    [edi+80], mm0
720
	   movq    [edi+88], mm1
721
 
722
 
723
	   movq    mm0, [esi+48]
724
	   movq    mm1, mm0
725
	   punpcklwd mm0, mm0
726
	   punpckhwd mm1, mm1
727
	   movq    [edi+96], mm0
728
	   movq    [edi+104], mm1
729
 
730
	   movq    mm0, [esi+56]
731
	   movq    mm1, mm0
732
	   punpcklwd mm0, mm0
733
	   punpckhwd mm1, mm1
734
	   movq    [edi+112], mm0
735
	   movq    [edi+120], mm1
736
 
737
	   ret
738
endp
739
 
740
align 4
741
proc s8_s_mmx
742
 
743
	   movq    mm0, [esi]
744
	   psubb   mm0, mm7
745
	   movq    mm1, mm0
746
	   punpcklbw mm0, mm0
747
	   pand mm0, mm6
748
	   punpckhbw mm1, mm1
749
	   pand mm1, mm6
750
	   movq    [edi], mm0
751
	   movq    [edi+8], mm1
752
 
753
	   movq    mm0, [esi+8]
754
	   psubb   mm0, mm7
755
	   movq    mm1, mm0
756
	   punpcklbw mm0, mm0
757
	   pand mm0, mm6
758
	   punpckhbw mm1, mm1
759
	   pand mm1, mm6
760
	   movq    [edi+16], mm0
761
	   movq    [edi+24], mm1
762
 
763
	   movq    mm0, [esi+16]
764
	   psubb   mm0, mm7
765
	   movq    mm1, mm0
766
	   punpcklbw mm0, mm0
767
	   pand mm0, mm6
768
	   punpckhbw mm1, mm1
769
	   pand mm1, mm6
770
	   movq    [edi+32], mm0
771
	   movq    [edi+40], mm1
772
 
773
	   movq    mm0, [esi+24]
774
	   psubb   mm0, mm7
775
	   movq    mm1, mm0
776
	   punpcklbw mm0, mm0
777
	   pand mm0, mm6
778
	   punpckhbw mm1, mm1
779
	   pand mm1, mm6
780
	   movq    [edi+48], mm0
781
	   movq    [edi+56], mm1
782
 
783
	   ret
784
 
785
endp
786
 
787
align 4
788
proc m8_s_mmx
789
 
790
	   movq    mm0, [esi]
791
	   psubb   mm0, mm7
792
	   movq    mm1, mm0
793
	   punpcklbw mm0, mm0
794
	   pand mm0, mm6
795
	   punpckhbw mm1, mm1
796
	   pand mm1, mm6
797
	   movq mm2, mm0
798
	   punpcklwd mm0, mm0
799
	   punpckhwd mm2, mm2
800
 
801
	   movq mm3, mm1
802
	   punpcklwd mm1, mm1
803
	   punpckhwd mm3, mm3
804
 
805
	   movq    [edi], mm0
806
	   movq    [edi+8], mm2
807
	   movq    [edi+16], mm1
808
	   movq    [edi+24], mm3
809
 
810
	   movq    mm0, [esi+8]
811
	   psubb   mm0, mm7
812
	   movq    mm1, mm0
813
	   punpcklbw mm0, mm0
814
	   pand mm0, mm6
815
	   punpckhbw mm1, mm1
816
	   pand mm1, mm6
817
	   movq mm2, mm0
818
	   punpcklwd mm0, mm0
819
	   punpckhwd mm2, mm2
820
 
821
	   movq mm3, mm1
822
	   punpcklwd mm1, mm1
823
	   punpckhwd mm3, mm3
824
 
825
	   movq    [edi+32], mm0
826
	   movq    [edi+40], mm2
827
	   movq    [edi+48], mm1
828
	   movq    [edi+56], mm3
829
 
830
	   ret
831
endp
832
 
833
 
834
align 4
835
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
836
 
837
	   mov edi, [output]
838
 
839
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
840
	   add edi, 128
841
	   add [str0], 128
842
	   add [str1], 128
843
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
844
	   add edi, 128
845
	   add [str0], 128
846
	   add [str1], 128
847
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
848
	   add edi, 128
849
	   add [str0], 128
850
	   add [str1], 128
851
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
852
 
853
	   ret
854
endp
855
 
856
 
857
align 4
858
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
859
 
860
	   mov edi, [output]
861
 
862
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
863
	   add edi, 128
864
	   add [str0], 128
865
	   add [str1], 128
866
	   add [str2], 128
867
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
868
	   add edi, 128
869
	   add [str0], 128
870
	   add [str1], 128
871
	   add [str2], 128
872
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
873
	   add edi, 128
874
	   add [str0], 128
875
	   add [str1], 128
876
	   add [str2], 128
877
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
878
 
879
	   ret
880
endp
881
 
882
align 4
883
proc mix_4_1 stdcall, str0:dword, str1:dword,\
884
		      str2:dword, str3:dword
885
 
886
	   local output:DWORD
887
 
888
	   call alloc_mix_buff
889
	   and eax, eax
890
	   jz .err
891
	   mov [output], eax
892
 
893
	   mov edi, eax
894
 
895
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
896
	   add edi, 128
897
	   add [str0], 128
898
	   add [str1], 128
899
	   add [str2], 128
900
	   add [str3], 128
901
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
902
	   add edi, 128
903
	   add [str0], 128
904
	   add [str1], 128
905
	   add [str2], 128
906
	   add [str3], 128
907
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
908
	   add edi, 128
909
	   add [str0], 128
910
	   add [str1], 128
911
	   add [str2], 128
912
	   add [str3], 128
913
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
914
	   mov eax, [output]
915
	   ret
916
.err:
917
	   xor eax, eax
918
	   ret
919
endp
920
 
921
 
922
align 4
923
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
924
			str2:dword, str3:dword
925
 
926
	   mov edi, [output]
927
 
928
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
929
	   add edi, 128
930
	   add [str0], 128
931
	   add [str1], 128
932
	   add [str2], 128
933
	   add [str3], 128
934
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
935
	   add edi, 128
936
	   add [str0], 128
937
	   add [str1], 128
938
	   add [str2], 128
939
	   add [str3], 128
940
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
941
	   add edi, 128
942
	   add [str0], 128
943
	   add [str1], 128
944
	   add [str2], 128
945
	   add [str3], 128
946
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
947
 
948
	   ret
949
endp
950
 
951
align 4
952
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
953
 
954
	mov edx, [output]
955
	mov eax, [str0]
956
	mov ecx, [str1]
957
 
958
	movq	mm0, [eax]
959
	paddsw	mm0, [ecx]
960
       ; psraw   mm0, 1
961
	movq	[edx], mm0
962
 
963
	movq	mm1, [eax+8]
964
	paddsw	mm1,[ecx+8]
965
       ; psraw   mm1, 1
966
	movq	[edx+8], mm1
967
 
968
	movq	mm2, [eax+16]
969
	paddsw	mm2, [ecx+16]
970
       ; psraw   mm2, 1
971
	movq	[edx+16], mm2
972
 
973
	movq	mm3, [eax+24]
974
	paddsw	mm3, [ecx+24]
975
       ; psraw   mm3, 1
976
	movq [edx+24], mm3
977
 
978
	movq	mm0, [eax+32]
979
	paddsw	mm0, [ecx+32]
980
       ; psraw   mm0, 1
981
	movq	[edx+32], mm0
982
 
983
	movq	mm1, [eax+40]
984
	paddsw	mm1, [ecx+40]
985
       ; psraw   mm1, 1
986
	movq	[edx+40], mm1
987
 
988
	movq	mm2, [eax+48]
989
	paddsw	mm2, [ecx+48]
990
       ; psraw   mm2, 1
991
	movq	[edx+48], mm2
992
 
993
	movq	mm3, [eax+56]
994
	paddsw	mm3, [ecx+56]
995
       ; psraw   mm3, 1
996
	movq [edx+56], mm3
997
 
998
	movq	mm0, [eax+64]
999
	paddsw	mm0, [ecx+64]
1000
       ; psraw   mm0, 1
1001
	movq	[edx+64], mm0
1002
 
1003
	movq	mm1, [eax+72]
1004
	paddsw	mm1, [ecx+72]
1005
       ; psraw   mm1, 1
1006
	movq	[edx+72], mm1
1007
 
1008
	movq	mm2, [eax+80]
1009
	paddsw	mm2, [ecx+80]
1010
       ; psraw   mm2, 1
1011
	movq	[edx+80], mm2
1012
 
1013
	movq	mm3, [eax+88]
1014
	paddsw	mm3, [ecx+88]
1015
       ; psraw   mm3, 1
1016
 
1017
	movq [edx+88], mm3
1018
 
1019
	movq	mm0, [eax+96]
1020
	paddsw	mm0, [ecx+96]
1021
       ; psraw   mm0, 1
1022
 
1023
	movq	[edx+96], mm0
1024
 
1025
	movq	mm1, [eax+104]
1026
	paddsw	mm1, [ecx+104]
1027
       ; psraw   mm1, 1
1028
 
1029
	movq	[edx+104], mm1
1030
 
1031
	movq	mm2, [eax+112]
1032
	paddsw	mm2, [ecx+112]
1033
       ; psraw   mm2, 1
1034
 
1035
	movq	[edx+112], mm2
1036
 
1037
	movq	mm3, [eax+120]
1038
	paddsw	mm3, [ecx+120]
1039
       ; psraw   mm3, 1
1040
 
1041
	movq [edx+120], mm3
1042
 
1043
	ret
1044
endp
1045
 
1046
align 4
1047
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
1048
 
1049
	mov edx, [output]
1050
	mov eax, [str0]
1051
	mov ebx, [str1]
1052
	mov ecx, [str2]
1053
 
1054
	movq	mm0, [eax]
1055
	paddsw	mm0, [ebx]
1056
	paddsw	mm0, [ecx]
1057
	movq	[edx], mm0
1058
 
1059
	movq	mm1, [eax+8]
1060
	paddsw	mm1,[ebx+8]
1061
	paddsw	mm1,[ecx+8]
1062
	movq	[edx+8], mm1
1063
 
1064
	movq	mm2, [eax+16]
1065
	paddsw	mm2, [ebx+16]
1066
	paddsw	mm2, [ecx+16]
1067
	movq	[edx+16], mm2
1068
 
1069
	movq	mm3, [eax+24]
1070
	paddsw	mm3, [ebx+24]
1071
	paddsw	mm3, [ecx+24]
1072
	movq [edx+24], mm3
1073
 
1074
	movq	mm0, [eax+32]
1075
	paddsw	mm0, [ebx+32]
1076
	paddsw	mm0, [ecx+32]
1077
	movq	[edx+32], mm0
1078
 
1079
	movq	mm1, [eax+40]
1080
	paddsw	mm1, [ebx+40]
1081
	paddsw	mm1, [ecx+40]
1082
	movq	[edx+40], mm1
1083
 
1084
	movq	mm2, [eax+48]
1085
	paddsw	mm2, [ebx+48]
1086
	paddsw	mm2, [ecx+48]
1087
	movq	[edx+48], mm2
1088
 
1089
	movq	mm3, [eax+56]
1090
	paddsw	mm3, [ebx+56]
1091
	paddsw	mm3, [ecx+56]
1092
	movq [edx+56], mm3
1093
 
1094
	movq	mm0, [eax+64]
1095
	paddsw	mm0, [ebx+64]
1096
	paddsw	mm0, [ecx+64]
1097
	movq	[edx+64], mm0
1098
 
1099
	movq	mm1, [eax+72]
1100
	paddsw	mm1, [ebx+72]
1101
	paddsw	mm1, [ecx+72]
1102
	movq	[edx+72], mm1
1103
 
1104
	movq	mm2, [eax+80]
1105
	paddsw	mm2, [ebx+80]
1106
	paddsw	mm2, [ecx+80]
1107
	movq	[edx+80], mm2
1108
 
1109
	movq	mm3, [eax+88]
1110
	paddsw	mm3, [ebx+88]
1111
	paddsw	mm3, [ecx+88]
1112
	movq [edx+88], mm3
1113
 
1114
	movq	mm0, [eax+96]
1115
	paddsw	mm0, [ebx+96]
1116
	paddsw	mm0, [ecx+96]
1117
	movq	[edx+96], mm0
1118
 
1119
	movq	mm1, [eax+104]
1120
	paddsw	mm1, [ebx+104]
1121
	paddsw	mm1, [ecx+104]
1122
	movq	[edx+104], mm1
1123
 
1124
	movq	mm2, [eax+112]
1125
	paddsw	mm2, [ebx+112]
1126
	paddsw	mm2, [ecx+112]
1127
	movq	[edx+112], mm2
1128
 
1129
	movq	mm3, [eax+120]
1130
	paddsw	mm3, [ebx+120]
1131
	paddsw	mm3, [ecx+120]
1132
	movq [edx+120], mm3
1133
 
1134
	ret
1135
endp
1136
 
1137
align 4
1138
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
1139
			   str2:dword, str3:dword
1140
 
1141
	mov edx, [output]
1142
	mov esi, [str0]
1143
	mov eax, [str1]
1144
	mov ebx, [str2]
1145
	mov ecx, [str3]
1146
 
1147
	movq	mm0, [esi]
1148
	movq	mm1, [eax]
1149
	paddsw	mm0, [ebx]
1150
	paddsw	mm1, [ecx]
1151
	paddsw	mm0, mm1
1152
	movq	[edx], mm0
1153
 
1154
	movq	mm2, [esi+8]
1155
	movq	mm3, [eax+8]
1156
	paddsw	mm2, [ebx+8]
1157
	paddsw	mm3, [ecx+8]
1158
	paddsw	mm2, mm3
1159
	movq	[edx+8], mm2
1160
 
1161
	movq	mm0, [esi+16]
1162
	movq	mm1, [eax+16]
1163
	paddsw	mm0, [ebx+16]
1164
	paddsw	mm1, [ecx+16]
1165
	paddsw	mm0, mm1
1166
	movq	[edx+16], mm0
1167
 
1168
	movq	mm2, [esi+24]
1169
	movq	mm3, [eax+24]
1170
	paddsw	mm2, [ebx+24]
1171
	paddsw	mm3, [ecx+24]
1172
	paddsw	mm2, mm3
1173
	movq	[edx+24], mm2
1174
 
1175
	movq	mm0, [esi+32]
1176
	movq	mm1, [eax+32]
1177
	paddsw	mm0, [ebx+32]
1178
	paddsw	mm1, [ecx+32]
1179
	paddsw	mm0, mm1
1180
	movq	[edx+32], mm0
1181
 
1182
	movq	mm2, [esi+40]
1183
	movq	mm3, [eax+40]
1184
	paddsw	mm2, [ebx+40]
1185
	paddsw	mm3, [ecx+40]
1186
	paddsw	mm2, mm3
1187
	movq	[edx+40], mm2
1188
 
1189
	movq	mm0, [esi+48]
1190
	movq	mm1, [eax+48]
1191
	paddsw	mm0, [ebx+48]
1192
	paddsw	mm1, [ecx+48]
1193
	paddsw	mm0, mm1
1194
	movq	[edx+48], mm0
1195
 
1196
	movq	mm2, [esi+56]
1197
	movq	mm3, [eax+56]
1198
	paddsw	mm2, [ebx+56]
1199
	paddsw	mm3, [ecx+56]
1200
	paddsw	mm2, mm3
1201
	movq	[edx+56], mm2
1202
 
1203
	movq	mm0, [esi+64]
1204
	movq	mm1, [eax+64]
1205
	paddsw	mm0, [ebx+64]
1206
	paddsw	mm1, [ecx+64]
1207
	paddsw	mm0, mm1
1208
	movq	[edx+64], mm0
1209
 
1210
	movq	mm2, [esi+72]
1211
	movq	mm3, [eax+72]
1212
	paddsw	mm2, [ebx+72]
1213
	paddsw	mm3, [ecx+72]
1214
	paddsw	mm2, mm3
1215
	movq	[edx+72], mm2
1216
 
1217
	movq	mm2, [esi+80]
1218
	movq	mm3, [eax+80]
1219
	paddsw	mm2, [ebx+80]
1220
	paddsw	mm3, [ecx+80]
1221
	paddsw	mm2, mm3
1222
	movq	[edx+80], mm2
1223
 
1224
	movq	mm2, [esi+88]
1225
	movq	mm3, [eax+88]
1226
	paddsw	mm2, [ebx+88]
1227
	paddsw	mm3, [ecx+88]
1228
	paddsw	mm2, mm3
1229
	movq	[edx+88], mm2
1230
 
1231
	movq	mm2, [esi+96]
1232
	movq	mm3, [eax+96]
1233
	paddsw	mm2, [ebx+96]
1234
	paddsw	mm3, [ecx+96]
1235
	paddsw	mm2, mm3
1236
	movq	[edx+96], mm2
1237
 
1238
	movq	mm2, [esi+104]
1239
	movq	mm3, [eax+104]
1240
	paddsw	mm2, [ebx+104]
1241
	paddsw	mm3, [ecx+104]
1242
	paddsw	mm2, mm3
1243
	movq	[edx+104], mm2
1244
 
1245
	movq	mm2, [esi+112]
1246
	movq	mm3, [eax+112]
1247
	paddsw	mm2, [ebx+112]
1248
	paddsw	mm3, [ecx+112]
1249
	paddsw	mm2, mm3
1250
	movq	[edx+112], mm2
1251
 
1252
	movq	mm2, [esi+120]
1253
	movq	mm3, [eax+120]
1254
	paddsw	mm2, [ebx+120]
1255
	paddsw	mm3, [ecx+120]
1256
	paddsw	mm2, mm3
1257
	movq	[edx+120], mm2
1258
 
1259
	ret
1260
endp
1261
 
1262
align 4
1263
proc copy_mem stdcall, output:dword, input:dword
1264
 
1265
	   mov edi, [output]
1266
	   mov esi, [input]
1267
	   mov ecx, 0x80
1268
.l1:
1269
	   mov eax, [esi]
1270
	   mov [edi], eax
1271
	   add esi, 4
1272
	   add edi, 4
1273
	   loop .l1
1274
 
1275
	   ret
1276
endp
1277
 
1278
proc memcpy
1279
@@:
1280
	   mov eax, [esi]
1281
	   mov [edi], eax
1282
	   add esi, 4
1283
	   add edi, 4
1284
	   dec ecx
1285
	   jnz @B
1286
	   ret
1287
endp
1288