Subversion Repositories Kolibri OS

Rev

Rev 203 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
168 serge 1
;
2
;   This file is part of the Infinity sound library.
3
;   (C) copyright Serge 2006
4
;   email: infinity_sound@mail.ru
5
;
6
;   This program is free software; you can redistribute it and/or modify
7
;   it under the terms of the GNU General Public License as published by
8
;   the Free Software Foundation; either version 2 of the License, or
9
;   (at your option) any later version.
10
;
11
;   This program is distributed in the hope that it will be useful,
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;   GNU General Public License for more details.
15
 
16
align 4
17
proc new_mix stdcall, output:dword
18
	   locals
19
	     mixCounter  dd ?
20
	     mixIndex	 dd ?
21
	     streamIndex dd ?
22
	     inputCount  dd ?
23
	     main_count  dd ?
24
	     blockCount  dd ?
25
	     mix_out	 dd ?
26
	   endl
27
 
28
	   call prepare_playlist
29
 
30
	   cmp [play_count], 0
31
	   je .exit
188 serge 32
           call FpuSave
168 serge 33
	   mov [main_count], 32;
34
.l00:
35
	   mov [mix_buff_map], 0x0000FFFF;
36
	   xor eax, eax
37
	   mov [mixCounter], eax
38
	   mov [mixIndex],eax
39
	   mov [streamIndex], eax;
40
	   mov ebx, [play_count]
41
	   mov [inputCount], ebx
42
.l0:
43
	   mov ecx, 4
44
.l1:
45
	   mov ebx, [streamIndex]
46
	   mov esi, [play_list+ebx*4]
47
	   mov eax, [esi+STREAM.work_read]
48
	   add [esi+STREAM.work_read], 512
49
 
50
	   mov ebx, [mixIndex]
51
	   mov [mix_input+ebx*4], eax
52
	   inc [mixCounter]
53
	   inc [mixIndex]
54
	   inc [streamIndex]
55
	   dec [inputCount]
56
	   jz .m2
57
 
58
	   dec ecx
59
	   jnz .l1
60
 
61
	   cmp [mixCounter], 4
62
	   jnz .m2
63
 
64
	   stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
65
	   sub [mixIndex],4
66
	   mov ebx, [mixIndex]
67
	   mov [mix_input+ebx*4], eax
68
	   inc [mixIndex]
69
	   mov [mixCounter], 0
70
 
71
	   cmp [inputCount], 0
72
	   jnz .l0
73
.m2:
74
	   cmp [mixIndex], 1
75
	   jne @f
76
	   stdcall copy_mem, [output], [mix_input]
77
	   jmp .m3
78
@@:
79
	   cmp [mixIndex], 2
80
	   jne @f
81
	   stdcall mix_2_1, [output], [mix_input], [mix_input+4]
82
	   jmp .m3
83
@@:
84
	   cmp [mixIndex], 3
85
	   jne @f
86
	   stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
87
	   jmp .m3
88
@@:
89
	   stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
90
.m3:
91
	   add [output],512
92
 
93
	   sub [main_count], 1
94
	   jnz .l00
95
 
96
	   call update_stream
203 serge 97
           emms
188 serge 98
           call FpuRestore
168 serge 99
	   ret
100
.exit:
101
	   mov edi, [output]
102
	   mov ecx, 0x1000
103
	   xor eax, eax
104
	   cld
105
	   rep stosd
106
	   ret
107
endp
108
 
109
 
110
align 4
111
proc update_stream
112
	   locals
170 serge 113
             stream_index  dd ?
227 serge 114
             ev_code       dd ?  ;EVENT
115
             ev_offs       dd ?
116
                           rd 4
168 serge 117
	   endl
118
 
119
	   mov [stream_index], 0
120
.l1:
121
	   mov edx, [stream_index]
122
	   mov esi, [play_list+edx*4]
123
 
124
	   mov eax, [esi+STREAM.work_read]
125
	   cmp eax, [esi+STREAM.work_top]
126
	   jb @f
127
	   mov eax, [esi+STREAM.work_buff]
128
@@:
129
	   mov [esi+STREAM.work_read], eax
130
 
131
	   cmp [esi+STREAM.format], PCM_2_16_48
132
	   je .copy
133
 
134
	   sub [esi+STREAM.work_count], 16384
135
 
136
	   cmp [esi+STREAM.work_count], 32768
137
	   ja @f
138
 
139
	   stdcall refill, esi
140
@@:
141
	   inc [stream_index]
142
	   dec [play_count]
143
	   jnz .l1
144
 
145
	   ret
146
.copy:
147
	   mov ebx, esi
148
	   mov edi, [ebx+STREAM.work_write]
149
	   cmp edi, [ebx+STREAM.work_top]
150
	   jb @f
151
	   mov edi, [ebx+STREAM.work_buff]
152
	   mov [ebx+STREAM.work_write], edi
153
@@:
154
	   mov esi, [ebx+STREAM.curr_seg]
155
	   mov ecx, 16384/4
156
	   cld
157
	   rep movsd
158
 
159
           mov [ebx+STREAM.work_write], edi
160
 
161
	   cmp esi, [ebx+STREAM.limit]
162
	   jb @f
163
 
164
	   mov esi, [ebx+STREAM.base]
165
@@:
166
	   mov [ebx+STREAM.curr_seg], esi
167
 
168
	   xor ecx, ecx
169
	   cmp esi, [ebx+STREAM.notify_off2]
170
	   je @f
171
 
172
	   mov ecx,0x8000
173
	   cmp esi, [ebx+STREAM.notify_off1]
174
	   je @f
175
 
176
	   inc [stream_index]
177
	   dec [play_count]
178
	   jnz .l1
179
 
180
	   ret
181
@@:
227 serge 182
           mov [ev_code], 0xFF000001
183
           mov [ev_offs], ecx
168 serge 184
           mov eax, [ebx+STREAM.notify_task]
227 serge 185
 
186
           lea edx, [ev_code]
187
           push ebx
188
           stdcall SendEvent, eax, edx
189
           pop ebx
168 serge 190
           test eax, eax
227 serge 191
           jnz .l_end
192
 
168 serge 193
           not eax
194
           mov [ebx+STREAM.notify_task], eax      ;-1
195
.l_end:
196
	   inc [stream_index]
197
	   dec [play_count]
198
	   jnz .l1
199
	   ret
200
endp
201
 
202
align 4
203
proc refill stdcall, str:dword
227 serge 204
	   locals
205
             ev_code       dd ?  ;EVENT
206
             ev_offs       dd ?
207
                           rd 4
208
	   endl
168 serge 209
 
210
	   mov ebx, [str]
211
	   mov ecx, [ebx+STREAM.work_write]
212
	   cmp ecx, [ebx+STREAM.work_top]
213
	   jbe .m2
214
	   mov esi, [ebx+STREAM.work_top]
215
	   sub ecx, esi
216
	   mov edi, [ebx+STREAM.work_buff]
217
	   shr ecx, 2
218
	   rep movsd	   ;call memcpy
219
 
220
	   mov [ebx+STREAM.work_write], edi
221
.m2:
222
	   mov esi, [ebx+STREAM.curr_seg]
223
	   mov edi, [ebx+STREAM.work_write]
224
	   mov edx, [ebx+STREAM.r_buff]
225
 
226
	   stdcall [ebx+STREAM.resample], edi, esi, edx,\
227
	      [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
228
 
229
	   mov ebx, [str]
230
 
231
	   add [ebx+STREAM.work_count], eax;
232
	   add [ebx+STREAM.work_write], eax;
233
 
234
	   mov eax, [ebx+STREAM.curr_seg]
235
	   add eax, [ebx+STREAM.r_size]
236
	   cmp eax, [ebx+STREAM.limit]
237
	   jb @f
238
	   mov eax, [ebx+STREAM.base]
239
@@:
240
	   mov [ebx+STREAM.curr_seg], eax
241
 
242
	   xor ecx, ecx
243
	   cmp eax, [ebx+STREAM.notify_off2]
244
	   je @f
245
 
246
	   mov ecx,0x8000
247
	   cmp eax, [ebx+STREAM.notify_off1]
248
	   je @f
249
 
250
	   ret
251
@@:
227 serge 252
           mov [ev_code], 0xFF000001
253
           mov [ev_offs], ecx
168 serge 254
           mov eax, [ebx+STREAM.notify_task]
227 serge 255
 
256
           lea edx, [ev_code]
257
           push ebx
258
           stdcall SendEvent, eax, edx
259
           pop ebx
168 serge 260
           test eax, eax
227 serge 261
           jnz @F
168 serge 262
           not eax
263
           mov [ebx+STREAM.notify_task], eax      ;-1
264
@@:
265
	   ret
266
endp
267
 
268
align 4
269
proc resample_1 stdcall, dest:dword,src:dword,r_buff:dword,\
270
		       r_dt:dword, r_size:dword,r_end:dword
271
 
272
	   mov edi, [r_buff]
273
	   add edi, 32*2
274
	   mov esi, [src]
275
	   mov ecx, [r_size]
276
	   shr ecx, 2
277
	   rep movsd
278
 
279
	   mov edi, [dest]
280
	   mov edx, [r_buff]
281
	   mov eax, 16
282
 
283
align 16
284
.l1:
285
	   mov ecx, eax
286
	   mov esi, eax
287
	   and ecx, 0x7FFF
288
	   shr esi, 15
289
	   lea esi, [edx+esi*2]
290
 
291
	   movsx ebp, word [esi]
292
	   movsx esi, word [esi+2]
293
	   mov ebx, 32768
294
	   imul esi, ecx
295
	   sub ebx, ecx
296
	   imul ebx, ebp
297
	   lea ecx, [ebx+esi+16384]
298
	   sar ecx, 15
299
	   cmp ecx, 32767	  ; 00007fffH
300
	   jle @f
301
	   mov ecx, 32767	  ; 00007fffH
302
	   jmp .write
303
@@:
304
	   cmp ecx, -32768	  ; ffff8000H
305
	   jge .write
306
	   mov ecx, -32768	  ; ffff8000H
307
.write:
308
	   mov ebx, ecx
309
	   shl ebx, 16
310
	   mov bx, cx
311
	   mov [edi], ebx
312
	   add edi, 4
313
 
314
	   add eax, [esp+20]  ;rdt
315
	   cmp eax, [esp+28]  ;r_end
316
	   jb .l1
317
 
318
	   mov ebp, esp
319
 
320
	   mov esi, [src]
321
	   add esi, [r_size]
322
	   sub esi, 32*2
323
	   mov edx, [r_buff]
324
	   mov ecx, 16
325
@@:
326
	   mov ebx, [esi]
327
	   mov [edx], ebx
328
	   add esi, 4
329
	   add edx, 4
330
	   dec ecx
331
	   jnz @B
332
 
333
	   sub edi, [dest]
334
	   mov eax, edi
335
	   ret
336
endp
337
 
338
align 4
339
proc resample_18 stdcall, dest:dword,src:dword,r_buff:dword,\
340
		       r_dt:dword, r_size:dword,r_end:dword
341
 
342
	   mov edi, [r_buff]
343
	   add edi, 32
344
	   mov esi, [src]
345
	   mov ecx, [r_size]
346
	   shr ecx, 2
347
	   rep movsd
348
 
349
	   mov edi, [dest]
350
	   mov edx, [r_buff]
351
	   mov esi, 16
352
 
353
align 16
354
.l1:
355
	   mov ecx, esi
356
	   mov eax, esi
357
	   and ecx, 0x7FFF
358
	   shr eax, 15
359
	   lea eax, [edx+eax]
360
 
361
	   mov bx, word [eax]
362
	   sub bh, 0x80
363
	   sub bl, 0x80
364
	   movsx eax, bh
365
	   shl eax,8
366
	   movsx ebp, bl
367
	   shl ebp,8
368
	   mov ebx, 32768
369
	   imul eax, ecx
370
	   sub ebx, ecx
371
	   imul ebx, ebp
372
	   lea ecx, [ebx+eax+16384]
373
	   sar ecx, 15
374
	   cmp ecx, 32767	  ; 00007fffH
375
	   jle @f
376
	   mov ecx, 32767	  ; 00007fffH
377
	   jmp .write
378
@@:
379
	   cmp ecx, -32768	  ; ffff8000H
380
	   jge .write
381
	   mov ecx, -32768	  ; ffff8000H
382
.write:
383
	   mov ebx, ecx
384
	   shl ebx, 16
385
	   mov bx, cx
386
	   mov [edi], ebx
387
	   add edi, 4
388
 
389
	   add esi, [esp+20]  ;rdt
390
	   cmp esi, [esp+28]  ;r_end
391
	   jb .l1
392
 
393
	   mov ebp, esp
394
 
395
	   mov esi, [src]
396
	   add esi, [r_size]
397
	   sub esi, 32
398
	   mov edx, [r_buff]
399
	   mov ecx, 8
400
@@:
401
	   mov ebx, [esi]
402
	   mov [edx], ebx
403
	   add esi, 4
404
	   add edx, 4
405
	   dec ecx
406
	   jnz @B
407
 
408
	   sub edi, [dest]
409
	   mov eax, edi
410
	   ret
411
endp
412
 
413
align 4
414
proc copy_stream stdcall, dest:dword,src:dword,r_buff:dword,\
415
		       r_dt:dword, r_size:dword,r_end:dword
416
 
417
	   mov ecx, [r_size]
418
	   mov eax, ecx
419
	   shr ecx, 2
420
	   mov esi, [src]
421
	   mov edi, [dest]
422
	   rep movsd
423
	   mov eax, 16384
424
	   ret
425
endp
426
 
427
align 4
428
proc resample_2 stdcall, dest:dword,src:dword,r_buff:dword,\
429
		       r_dt:dword, r_size:dword,r_end:dword
430
 
431
	   mov edi, [r_buff]
432
	   add edi, 32*4
433
	   mov esi, [src]
434
	   mov ecx, [r_size]
435
	   shr ecx, 2
436
	   rep movsd	  ;call memcpy
437
 
438
	   mov edx, [r_buff]
439
	   mov edi, [dest]
440
	   mov ebx, [r_dt]
441
	   mov eax, 16
442
	   emms
443
 
444
align 16
445
.l1:
446
	   mov ecx, eax
447
	   mov esi, eax
448
	   and ecx, 0x7FFF
449
	   shr esi, 15
450
	   lea esi, [edx+esi*4]
451
 
452
	   movq mm0, [esi]
453
	   movq mm1, mm0
454
 
455
	   movd mm2, ecx
456
	   punpcklwd mm2, mm2
457
	   movq mm3, qword [m7] ;                  // 0x8000
458
 
459
	   psubw mm3, mm2	;         // 0x8000 - iconst
460
	   punpckldq mm3, mm2
461
 
462
	   pmulhw mm0, mm3
463
	   pmullw mm1, mm3
464
 
465
	   movq mm4, mm1
466
	   punpcklwd mm1, mm0
467
	   punpckhwd mm4, mm0
468
	   paddd mm1, mm4
469
	   psrad  mm1, 15
470
	   packssdw mm1, mm1
471
	   movd [edi], mm1
472
	   add edi, 4
473
 
474
	   add eax, ebx
475
	   cmp eax, [r_end]
476
	   jb .l1
477
	   emms
478
 
479
	   mov esi, [src]
480
	   add esi, [r_size]
481
	   sub esi, 32*4
482
	   mov edx, [r_buff]
483
	   mov ecx, 32
484
@@:
485
	   mov ebx, [esi]
486
	   mov [edx], ebx
487
	   add esi, 4
488
	   add edx, 4
489
	   dec ecx
490
	   jnz @B
491
 
492
	   sub edi, [dest]
493
	   mov eax, edi
494
	   ret
495
endp
496
 
497
align 4
498
proc resample_28 stdcall, dest:dword,src:dword,r_buff:dword,\
499
		       r_dt:dword, r_size:dword,r_end:dword
500
 
501
	   mov edi, [r_buff]
502
	   add edi, 32*2
503
	   mov esi, [src]
504
	   mov ecx, [r_size]
505
	   shr ecx, 2
506
	   rep movsd	  ;call memcpy
507
 
508
	   mov edx, [r_buff]
509
	   mov edi, [dest]
510
	   mov ebx, [r_dt]
511
	   mov eax, 16
512
	   emms
513
	   movq mm7,[mm80]
514
	   movq mm6,[mm_mask]
515
 
516
align 16
517
.l1:
518
	   mov ecx, eax
519
	   mov esi, eax
520
	   and ecx, 0x7FFF
521
	   shr esi, 15
522
	   lea esi, [edx+esi*2]
523
 
524
	   movq mm0, [esi]
525
	   psubb mm0,mm7
526
	   punpcklbw mm0,mm0
527
	   pand mm0,mm6
528
 
529
	   movq mm1, mm0
530
 
531
	   movd mm2, ecx
532
	   punpcklwd mm2, mm2
533
	   movq mm3, qword [m7] ;                  // 0x8000
534
 
535
	   psubw mm3, mm2	;         // 0x8000 - iconst
536
	   punpckldq mm3, mm2
537
 
538
	   pmulhw mm0, mm3
539
	   pmullw mm1, mm3
540
 
541
	   movq mm4, mm1
542
	   punpcklwd mm1, mm0
543
	   punpckhwd mm4, mm0
544
	   paddd mm1, mm4
545
	   psrad  mm1, 15
546
	   packssdw mm1, mm1
547
	   movd [edi], mm1
548
	   add edi, 4
549
 
550
	   add eax, ebx
551
	   cmp eax, [r_end]
552
	   jb .l1
553
	   emms
554
 
555
	   mov esi, [src]
556
	   add esi, [r_size]
557
	   sub esi, 32*2
558
	   mov edx, [r_buff]
559
	   mov ecx, 16
560
@@:
561
	   mov ebx, [esi]
562
	   mov [edx], ebx
563
	   add esi, 4
564
	   add edx, 4
565
	   dec ecx
566
	   jnz @B
567
 
568
	   sub edi, [dest]
569
	   mov eax, edi
570
	   ret
571
endp
572
 
573
 
574
proc m16_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
575
		       r_dt:dword, r_size:dword,r_end:dword
576
 
577
	   mov esi, [src]
578
	   mov edi, [dest]
579
	   mov ecx, [r_size]
580
	   shr ecx,8
581
@@:
582
	   call m16_s_mmx
583
	   add edi, 128
584
	   add esi, 64
585
	   call m16_s_mmx
586
	   add edi, 128
587
	   add esi, 64
588
	   call m16_s_mmx
589
	   add edi, 128
590
	   add esi, 64
591
	   call m16_s_mmx
592
	   add edi, 128
593
	   add esi, 64
594
	   dec ecx
595
	   jnz @b
596
 
597
	   mov eax, [r_size]
598
	   add eax, eax
599
	   ret
600
endp
601
 
602
align 4
603
proc s8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
604
		       r_dt:dword, r_size:dword,r_end:dword
605
 
606
 
607
	   mov esi, [src]
608
	   mov edi, [dest]
609
	   mov ecx, [r_size]
610
	   shr ecx, 7
611
 
612
	   movq mm7, [mm80]
613
	   movq mm6, [mm_mask]
614
@@:
615
	   call s8_s_mmx
616
	   add edi, 64
617
	   add esi, 32
618
	   call s8_s_mmx
619
	   add edi, 64
620
	   add esi, 32
621
	   call s8_s_mmx
622
	   add edi, 64
623
	   add esi, 32
624
	   call s8_s_mmx
625
	   add edi, 64
626
	   add esi, 32
627
           dec ecx
628
	   jnz @b
629
 
630
	   mov eax, [r_size]
631
	   add eax, eax
632
	   ret
633
endp
634
 
635
proc m8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
636
		       r_dt:dword, r_size:dword,r_end:dword
637
 
638
	   mov esi, [src]
639
	   mov edi, [dest]
640
	   mov ecx, [r_size]
641
	   shr ecx, 6
642
 
643
	   movq mm7, [mm80]
644
	   movq mm6, [mm_mask]
645
@@:
646
	   call m8_s_mmx
647
	   add edi, 64
648
	   add esi, 16
649
	   call m8_s_mmx
650
	   add edi, 64
651
	   add esi, 16
652
	   call m8_s_mmx
653
	   add edi, 64
654
	   add esi, 16
655
	   call m8_s_mmx
656
	   add edi, 64
657
	   add esi, 16
658
           dec ecx
659
	   jnz @b
660
 
661
	   mov eax, [r_size]
662
	   add eax, eax
663
	   add eax, eax
664
	   ret
665
endp
666
 
667
align 4
668
proc alloc_mix_buff
669
 
670
	   bsf eax, [mix_buff_map]
671
	   jnz .find
672
	   xor eax, eax
673
	   ret
674
.find:
675
	   btr [mix_buff_map], eax
676
	   shl eax, 9
677
	   add eax, [mix_buff]
678
	   ret
679
endp
680
 
681
proc m16_s_mmx
682
 
683
	   movq    mm0, [esi]
684
	   movq    mm1, mm0
685
	   punpcklwd mm0, mm0
686
	   punpckhwd mm1, mm1
687
	   movq    [edi], mm0
688
	   movq    [edi+8], mm1
689
 
690
	   movq    mm0, [esi+8]
691
	   movq    mm1, mm0
692
	   punpcklwd mm0, mm0
693
	   punpckhwd mm1, mm1
694
	   movq    [edi+16], mm0
695
	   movq    [edi+24], mm1
696
 
697
	   movq    mm0, [esi+16]
698
	   movq    mm1, mm0
699
	   punpcklwd mm0, mm0
700
	   punpckhwd mm1, mm1
701
	   movq    [edi+32], mm0
702
	   movq    [edi+40], mm1
703
 
704
	   movq    mm0, [esi+24]
705
	   movq    mm1, mm0
706
	   punpcklwd mm0, mm0
707
	   punpckhwd mm1, mm1
708
	   movq    [edi+48], mm0
709
	   movq    [edi+56], mm1
710
 
711
	   movq    mm0, [esi+32]
712
	   movq    mm1, mm0
713
	   punpcklwd mm0, mm0
714
	   punpckhwd mm1, mm1
715
	   movq    [edi+64], mm0
716
	   movq    [edi+72], mm1
717
 
718
	   movq    mm0, [esi+40]
719
	   movq    mm1, mm0
720
	   punpcklwd mm0, mm0
721
	   punpckhwd mm1, mm1
722
	   movq    [edi+80], mm0
723
	   movq    [edi+88], mm1
724
 
725
 
726
	   movq    mm0, [esi+48]
727
	   movq    mm1, mm0
728
	   punpcklwd mm0, mm0
729
	   punpckhwd mm1, mm1
730
	   movq    [edi+96], mm0
731
	   movq    [edi+104], mm1
732
 
733
	   movq    mm0, [esi+56]
734
	   movq    mm1, mm0
735
	   punpcklwd mm0, mm0
736
	   punpckhwd mm1, mm1
737
	   movq    [edi+112], mm0
738
	   movq    [edi+120], mm1
739
 
740
	   ret
741
endp
742
 
743
align 4
744
proc s8_s_mmx
745
 
746
	   movq    mm0, [esi]
747
	   psubb   mm0, mm7
748
	   movq    mm1, mm0
749
	   punpcklbw mm0, mm0
750
	   pand mm0, mm6
751
	   punpckhbw mm1, mm1
752
	   pand mm1, mm6
753
	   movq    [edi], mm0
754
	   movq    [edi+8], mm1
755
 
756
	   movq    mm0, [esi+8]
757
	   psubb   mm0, mm7
758
	   movq    mm1, mm0
759
	   punpcklbw mm0, mm0
760
	   pand mm0, mm6
761
	   punpckhbw mm1, mm1
762
	   pand mm1, mm6
763
	   movq    [edi+16], mm0
764
	   movq    [edi+24], mm1
765
 
766
	   movq    mm0, [esi+16]
767
	   psubb   mm0, mm7
768
	   movq    mm1, mm0
769
	   punpcklbw mm0, mm0
770
	   pand mm0, mm6
771
	   punpckhbw mm1, mm1
772
	   pand mm1, mm6
773
	   movq    [edi+32], mm0
774
	   movq    [edi+40], mm1
775
 
776
	   movq    mm0, [esi+24]
777
	   psubb   mm0, mm7
778
	   movq    mm1, mm0
779
	   punpcklbw mm0, mm0
780
	   pand mm0, mm6
781
	   punpckhbw mm1, mm1
782
	   pand mm1, mm6
783
	   movq    [edi+48], mm0
784
	   movq    [edi+56], mm1
785
 
786
	   ret
787
 
788
endp
789
 
790
align 4
791
proc m8_s_mmx
792
 
793
	   movq    mm0, [esi]
794
	   psubb   mm0, mm7
795
	   movq    mm1, mm0
796
	   punpcklbw mm0, mm0
797
	   pand mm0, mm6
798
	   punpckhbw mm1, mm1
799
	   pand mm1, mm6
800
	   movq mm2, mm0
801
	   punpcklwd mm0, mm0
802
	   punpckhwd mm2, mm2
803
 
804
	   movq mm3, mm1
805
	   punpcklwd mm1, mm1
806
	   punpckhwd mm3, mm3
807
 
808
	   movq    [edi], mm0
809
	   movq    [edi+8], mm2
810
	   movq    [edi+16], mm1
811
	   movq    [edi+24], mm3
812
 
813
	   movq    mm0, [esi+8]
814
	   psubb   mm0, mm7
815
	   movq    mm1, mm0
816
	   punpcklbw mm0, mm0
817
	   pand mm0, mm6
818
	   punpckhbw mm1, mm1
819
	   pand mm1, mm6
820
	   movq mm2, mm0
821
	   punpcklwd mm0, mm0
822
	   punpckhwd mm2, mm2
823
 
824
	   movq mm3, mm1
825
	   punpcklwd mm1, mm1
826
	   punpckhwd mm3, mm3
827
 
828
	   movq    [edi+32], mm0
829
	   movq    [edi+40], mm2
830
	   movq    [edi+48], mm1
831
	   movq    [edi+56], mm3
832
 
833
	   ret
834
endp
835
 
836
 
837
align 4
838
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
839
 
840
	   mov edi, [output]
841
 
842
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
843
	   add edi, 128
844
	   add [str0], 128
845
	   add [str1], 128
846
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
847
	   add edi, 128
848
	   add [str0], 128
849
	   add [str1], 128
850
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
851
	   add edi, 128
852
	   add [str0], 128
853
	   add [str1], 128
854
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
855
 
856
	   ret
857
endp
858
 
859
 
860
align 4
861
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
862
 
863
	   mov edi, [output]
864
 
865
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
866
	   add edi, 128
867
	   add [str0], 128
868
	   add [str1], 128
869
	   add [str2], 128
870
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
871
	   add edi, 128
872
	   add [str0], 128
873
	   add [str1], 128
874
	   add [str2], 128
875
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
876
	   add edi, 128
877
	   add [str0], 128
878
	   add [str1], 128
879
	   add [str2], 128
880
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
881
 
882
	   ret
883
endp
884
 
885
align 4
886
proc mix_4_1 stdcall, str0:dword, str1:dword,\
887
		      str2:dword, str3:dword
888
 
889
	   local output:DWORD
890
 
891
	   call alloc_mix_buff
892
	   and eax, eax
893
	   jz .err
894
	   mov [output], eax
895
 
896
	   mov edi, eax
897
 
898
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
899
	   add edi, 128
900
	   add [str0], 128
901
	   add [str1], 128
902
	   add [str2], 128
903
	   add [str3], 128
904
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
905
	   add edi, 128
906
	   add [str0], 128
907
	   add [str1], 128
908
	   add [str2], 128
909
	   add [str3], 128
910
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
911
	   add edi, 128
912
	   add [str0], 128
913
	   add [str1], 128
914
	   add [str2], 128
915
	   add [str3], 128
916
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
917
	   mov eax, [output]
918
	   ret
919
.err:
920
	   xor eax, eax
921
	   ret
922
endp
923
 
924
 
925
align 4
926
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
927
			str2:dword, str3:dword
928
 
929
	   mov edi, [output]
930
 
931
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
932
	   add edi, 128
933
	   add [str0], 128
934
	   add [str1], 128
935
	   add [str2], 128
936
	   add [str3], 128
937
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
938
	   add edi, 128
939
	   add [str0], 128
940
	   add [str1], 128
941
	   add [str2], 128
942
	   add [str3], 128
943
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
944
	   add edi, 128
945
	   add [str0], 128
946
	   add [str1], 128
947
	   add [str2], 128
948
	   add [str3], 128
949
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
950
 
951
	   ret
952
endp
953
 
954
align 4
955
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
956
 
957
	mov edx, [output]
958
	mov eax, [str0]
959
	mov ecx, [str1]
960
 
961
	movq	mm0, [eax]
962
	paddsw	mm0, [ecx]
963
       ; psraw   mm0, 1
964
	movq	[edx], mm0
965
 
966
	movq	mm1, [eax+8]
967
	paddsw	mm1,[ecx+8]
968
       ; psraw   mm1, 1
969
	movq	[edx+8], mm1
970
 
971
	movq	mm2, [eax+16]
972
	paddsw	mm2, [ecx+16]
973
       ; psraw   mm2, 1
974
	movq	[edx+16], mm2
975
 
976
	movq	mm3, [eax+24]
977
	paddsw	mm3, [ecx+24]
978
       ; psraw   mm3, 1
979
	movq [edx+24], mm3
980
 
981
	movq	mm0, [eax+32]
982
	paddsw	mm0, [ecx+32]
983
       ; psraw   mm0, 1
984
	movq	[edx+32], mm0
985
 
986
	movq	mm1, [eax+40]
987
	paddsw	mm1, [ecx+40]
988
       ; psraw   mm1, 1
989
	movq	[edx+40], mm1
990
 
991
	movq	mm2, [eax+48]
992
	paddsw	mm2, [ecx+48]
993
       ; psraw   mm2, 1
994
	movq	[edx+48], mm2
995
 
996
	movq	mm3, [eax+56]
997
	paddsw	mm3, [ecx+56]
998
       ; psraw   mm3, 1
999
	movq [edx+56], mm3
1000
 
1001
	movq	mm0, [eax+64]
1002
	paddsw	mm0, [ecx+64]
1003
       ; psraw   mm0, 1
1004
	movq	[edx+64], mm0
1005
 
1006
	movq	mm1, [eax+72]
1007
	paddsw	mm1, [ecx+72]
1008
       ; psraw   mm1, 1
1009
	movq	[edx+72], mm1
1010
 
1011
	movq	mm2, [eax+80]
1012
	paddsw	mm2, [ecx+80]
1013
       ; psraw   mm2, 1
1014
	movq	[edx+80], mm2
1015
 
1016
	movq	mm3, [eax+88]
1017
	paddsw	mm3, [ecx+88]
1018
       ; psraw   mm3, 1
1019
 
1020
	movq [edx+88], mm3
1021
 
1022
	movq	mm0, [eax+96]
1023
	paddsw	mm0, [ecx+96]
1024
       ; psraw   mm0, 1
1025
 
1026
	movq	[edx+96], mm0
1027
 
1028
	movq	mm1, [eax+104]
1029
	paddsw	mm1, [ecx+104]
1030
       ; psraw   mm1, 1
1031
 
1032
	movq	[edx+104], mm1
1033
 
1034
	movq	mm2, [eax+112]
1035
	paddsw	mm2, [ecx+112]
1036
       ; psraw   mm2, 1
1037
 
1038
	movq	[edx+112], mm2
1039
 
1040
	movq	mm3, [eax+120]
1041
	paddsw	mm3, [ecx+120]
1042
       ; psraw   mm3, 1
1043
 
1044
	movq [edx+120], mm3
1045
 
1046
	ret
1047
endp
1048
 
1049
align 4
1050
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
1051
 
1052
	mov edx, [output]
1053
	mov eax, [str0]
1054
	mov ebx, [str1]
1055
	mov ecx, [str2]
1056
 
1057
	movq	mm0, [eax]
1058
	paddsw	mm0, [ebx]
1059
	paddsw	mm0, [ecx]
1060
	movq	[edx], mm0
1061
 
1062
	movq	mm1, [eax+8]
1063
	paddsw	mm1,[ebx+8]
1064
	paddsw	mm1,[ecx+8]
1065
	movq	[edx+8], mm1
1066
 
1067
	movq	mm2, [eax+16]
1068
	paddsw	mm2, [ebx+16]
1069
	paddsw	mm2, [ecx+16]
1070
	movq	[edx+16], mm2
1071
 
1072
	movq	mm3, [eax+24]
1073
	paddsw	mm3, [ebx+24]
1074
	paddsw	mm3, [ecx+24]
1075
	movq [edx+24], mm3
1076
 
1077
	movq	mm0, [eax+32]
1078
	paddsw	mm0, [ebx+32]
1079
	paddsw	mm0, [ecx+32]
1080
	movq	[edx+32], mm0
1081
 
1082
	movq	mm1, [eax+40]
1083
	paddsw	mm1, [ebx+40]
1084
	paddsw	mm1, [ecx+40]
1085
	movq	[edx+40], mm1
1086
 
1087
	movq	mm2, [eax+48]
1088
	paddsw	mm2, [ebx+48]
1089
	paddsw	mm2, [ecx+48]
1090
	movq	[edx+48], mm2
1091
 
1092
	movq	mm3, [eax+56]
1093
	paddsw	mm3, [ebx+56]
1094
	paddsw	mm3, [ecx+56]
1095
	movq [edx+56], mm3
1096
 
1097
	movq	mm0, [eax+64]
1098
	paddsw	mm0, [ebx+64]
1099
	paddsw	mm0, [ecx+64]
1100
	movq	[edx+64], mm0
1101
 
1102
	movq	mm1, [eax+72]
1103
	paddsw	mm1, [ebx+72]
1104
	paddsw	mm1, [ecx+72]
1105
	movq	[edx+72], mm1
1106
 
1107
	movq	mm2, [eax+80]
1108
	paddsw	mm2, [ebx+80]
1109
	paddsw	mm2, [ecx+80]
1110
	movq	[edx+80], mm2
1111
 
1112
	movq	mm3, [eax+88]
1113
	paddsw	mm3, [ebx+88]
1114
	paddsw	mm3, [ecx+88]
1115
	movq [edx+88], mm3
1116
 
1117
	movq	mm0, [eax+96]
1118
	paddsw	mm0, [ebx+96]
1119
	paddsw	mm0, [ecx+96]
1120
	movq	[edx+96], mm0
1121
 
1122
	movq	mm1, [eax+104]
1123
	paddsw	mm1, [ebx+104]
1124
	paddsw	mm1, [ecx+104]
1125
	movq	[edx+104], mm1
1126
 
1127
	movq	mm2, [eax+112]
1128
	paddsw	mm2, [ebx+112]
1129
	paddsw	mm2, [ecx+112]
1130
	movq	[edx+112], mm2
1131
 
1132
	movq	mm3, [eax+120]
1133
	paddsw	mm3, [ebx+120]
1134
	paddsw	mm3, [ecx+120]
1135
	movq [edx+120], mm3
1136
 
1137
	ret
1138
endp
1139
 
1140
align 4
1141
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
1142
			   str2:dword, str3:dword
1143
 
1144
	mov edx, [output]
1145
	mov esi, [str0]
1146
	mov eax, [str1]
1147
	mov ebx, [str2]
1148
	mov ecx, [str3]
1149
 
1150
	movq	mm0, [esi]
1151
	movq	mm1, [eax]
1152
	paddsw	mm0, [ebx]
1153
	paddsw	mm1, [ecx]
1154
	paddsw	mm0, mm1
1155
	movq	[edx], mm0
1156
 
1157
	movq	mm2, [esi+8]
1158
	movq	mm3, [eax+8]
1159
	paddsw	mm2, [ebx+8]
1160
	paddsw	mm3, [ecx+8]
1161
	paddsw	mm2, mm3
1162
	movq	[edx+8], mm2
1163
 
1164
	movq	mm0, [esi+16]
1165
	movq	mm1, [eax+16]
1166
	paddsw	mm0, [ebx+16]
1167
	paddsw	mm1, [ecx+16]
1168
	paddsw	mm0, mm1
1169
	movq	[edx+16], mm0
1170
 
1171
	movq	mm2, [esi+24]
1172
	movq	mm3, [eax+24]
1173
	paddsw	mm2, [ebx+24]
1174
	paddsw	mm3, [ecx+24]
1175
	paddsw	mm2, mm3
1176
	movq	[edx+24], mm2
1177
 
1178
	movq	mm0, [esi+32]
1179
	movq	mm1, [eax+32]
1180
	paddsw	mm0, [ebx+32]
1181
	paddsw	mm1, [ecx+32]
1182
	paddsw	mm0, mm1
1183
	movq	[edx+32], mm0
1184
 
1185
	movq	mm2, [esi+40]
1186
	movq	mm3, [eax+40]
1187
	paddsw	mm2, [ebx+40]
1188
	paddsw	mm3, [ecx+40]
1189
	paddsw	mm2, mm3
1190
	movq	[edx+40], mm2
1191
 
1192
	movq	mm0, [esi+48]
1193
	movq	mm1, [eax+48]
1194
	paddsw	mm0, [ebx+48]
1195
	paddsw	mm1, [ecx+48]
1196
	paddsw	mm0, mm1
1197
	movq	[edx+48], mm0
1198
 
1199
	movq	mm2, [esi+56]
1200
	movq	mm3, [eax+56]
1201
	paddsw	mm2, [ebx+56]
1202
	paddsw	mm3, [ecx+56]
1203
	paddsw	mm2, mm3
1204
	movq	[edx+56], mm2
1205
 
1206
	movq	mm0, [esi+64]
1207
	movq	mm1, [eax+64]
1208
	paddsw	mm0, [ebx+64]
1209
	paddsw	mm1, [ecx+64]
1210
	paddsw	mm0, mm1
1211
	movq	[edx+64], mm0
1212
 
1213
	movq	mm2, [esi+72]
1214
	movq	mm3, [eax+72]
1215
	paddsw	mm2, [ebx+72]
1216
	paddsw	mm3, [ecx+72]
1217
	paddsw	mm2, mm3
1218
	movq	[edx+72], mm2
1219
 
1220
	movq	mm2, [esi+80]
1221
	movq	mm3, [eax+80]
1222
	paddsw	mm2, [ebx+80]
1223
	paddsw	mm3, [ecx+80]
1224
	paddsw	mm2, mm3
1225
	movq	[edx+80], mm2
1226
 
1227
	movq	mm2, [esi+88]
1228
	movq	mm3, [eax+88]
1229
	paddsw	mm2, [ebx+88]
1230
	paddsw	mm3, [ecx+88]
1231
	paddsw	mm2, mm3
1232
	movq	[edx+88], mm2
1233
 
1234
	movq	mm2, [esi+96]
1235
	movq	mm3, [eax+96]
1236
	paddsw	mm2, [ebx+96]
1237
	paddsw	mm3, [ecx+96]
1238
	paddsw	mm2, mm3
1239
	movq	[edx+96], mm2
1240
 
1241
	movq	mm2, [esi+104]
1242
	movq	mm3, [eax+104]
1243
	paddsw	mm2, [ebx+104]
1244
	paddsw	mm3, [ecx+104]
1245
	paddsw	mm2, mm3
1246
	movq	[edx+104], mm2
1247
 
1248
	movq	mm2, [esi+112]
1249
	movq	mm3, [eax+112]
1250
	paddsw	mm2, [ebx+112]
1251
	paddsw	mm3, [ecx+112]
1252
	paddsw	mm2, mm3
1253
	movq	[edx+112], mm2
1254
 
1255
	movq	mm2, [esi+120]
1256
	movq	mm3, [eax+120]
1257
	paddsw	mm2, [ebx+120]
1258
	paddsw	mm3, [ecx+120]
1259
	paddsw	mm2, mm3
1260
	movq	[edx+120], mm2
1261
 
1262
	ret
1263
endp
1264
 
1265
align 4
1266
proc copy_mem stdcall, output:dword, input:dword
1267
 
1268
	   mov edi, [output]
1269
	   mov esi, [input]
1270
	   mov ecx, 0x80
1271
.l1:
1272
	   mov eax, [esi]
1273
	   mov [edi], eax
1274
	   add esi, 4
1275
	   add edi, 4
1276
	   loop .l1
1277
 
1278
	   ret
1279
endp
1280
 
1281
proc memcpy
1282
@@:
1283
	   mov eax, [esi]
1284
	   mov [edi], eax
1285
	   add esi, 4
1286
	   add edi, 4
1287
	   dec ecx
1288
	   jnz @B
1289
	   ret
1290
endp
1291