Subversion Repositories Kolibri OS

Rev

Rev 188 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
168 serge 1
;
2
;   This file is part of the Infinity sound library.
3
;   (C) copyright Serge 2006
4
;   email: infinity_sound@mail.ru
5
;
6
;   This program is free software; you can redistribute it and/or modify
7
;   it under the terms of the GNU General Public License as published by
8
;   the Free Software Foundation; either version 2 of the License, or
9
;   (at your option) any later version.
10
;
11
;   This program is distributed in the hope that it will be useful,
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;   GNU General Public License for more details.
15
 
16
align 4
17
proc new_mix stdcall, output:dword
18
	   locals
19
	     mixCounter  dd ?
20
	     mixIndex	 dd ?
21
	     streamIndex dd ?
22
	     inputCount  dd ?
23
	     main_count  dd ?
24
	     blockCount  dd ?
25
	     mix_out	 dd ?
26
	   endl
27
 
28
	   call prepare_playlist
29
 
30
	   cmp [play_count], 0
31
	   je .exit
188 serge 32
           call FpuSave
168 serge 33
	   mov [main_count], 32;
34
.l00:
35
	   mov [mix_buff_map], 0x0000FFFF;
36
	   xor eax, eax
37
	   mov [mixCounter], eax
38
	   mov [mixIndex],eax
39
	   mov [streamIndex], eax;
40
	   mov ebx, [play_count]
41
	   mov [inputCount], ebx
42
.l0:
43
	   mov ecx, 4
44
.l1:
45
	   mov ebx, [streamIndex]
46
	   mov esi, [play_list+ebx*4]
47
	   mov eax, [esi+STREAM.work_read]
48
	   add [esi+STREAM.work_read], 512
49
 
50
	   mov ebx, [mixIndex]
51
	   mov [mix_input+ebx*4], eax
52
	   inc [mixCounter]
53
	   inc [mixIndex]
54
	   inc [streamIndex]
55
	   dec [inputCount]
56
	   jz .m2
57
 
58
	   dec ecx
59
	   jnz .l1
60
 
61
	   cmp [mixCounter], 4
62
	   jnz .m2
63
 
64
	   stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
65
	   sub [mixIndex],4
66
	   mov ebx, [mixIndex]
67
	   mov [mix_input+ebx*4], eax
68
	   inc [mixIndex]
69
	   mov [mixCounter], 0
70
 
71
	   cmp [inputCount], 0
72
	   jnz .l0
73
.m2:
74
	   cmp [mixIndex], 1
75
	   jne @f
76
	   stdcall copy_mem, [output], [mix_input]
77
	   jmp .m3
78
@@:
79
	   cmp [mixIndex], 2
80
	   jne @f
81
	   stdcall mix_2_1, [output], [mix_input], [mix_input+4]
82
	   jmp .m3
83
@@:
84
	   cmp [mixIndex], 3
85
	   jne @f
86
	   stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
87
	   jmp .m3
88
@@:
89
	   stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
90
.m3:
91
	   add [output],512
92
 
93
	   sub [main_count], 1
94
	   jnz .l00
95
 
96
	   call update_stream
203 serge 97
           emms
188 serge 98
           call FpuRestore
168 serge 99
	   ret
100
.exit:
101
	   mov edi, [output]
102
	   mov ecx, 0x1000
103
	   xor eax, eax
104
	   cld
105
	   rep stosd
106
	   ret
107
endp
108
 
109
 
110
align 4
111
proc update_stream
112
	   locals
170 serge 113
             stream_index  dd ?
168 serge 114
	   endl
115
 
116
	   mov [stream_index], 0
117
.l1:
118
	   mov edx, [stream_index]
119
	   mov esi, [play_list+edx*4]
120
 
121
	   mov eax, [esi+STREAM.work_read]
122
	   cmp eax, [esi+STREAM.work_top]
123
	   jb @f
124
	   mov eax, [esi+STREAM.work_buff]
125
@@:
126
	   mov [esi+STREAM.work_read], eax
127
 
128
	   cmp [esi+STREAM.format], PCM_2_16_48
129
	   je .copy
130
 
131
	   sub [esi+STREAM.work_count], 16384
132
 
133
	   cmp [esi+STREAM.work_count], 32768
134
	   ja @f
135
 
136
	   stdcall refill, esi
137
@@:
138
	   inc [stream_index]
139
	   dec [play_count]
140
	   jnz .l1
141
 
142
	   ret
143
.copy:
144
	   mov ebx, esi
145
	   mov edi, [ebx+STREAM.work_write]
146
	   cmp edi, [ebx+STREAM.work_top]
147
	   jb @f
148
	   mov edi, [ebx+STREAM.work_buff]
149
	   mov [ebx+STREAM.work_write], edi
150
@@:
151
	   mov esi, [ebx+STREAM.curr_seg]
152
	   mov ecx, 16384/4
153
	   cld
154
	   rep movsd
155
 
156
           mov [ebx+STREAM.work_write], edi
157
 
158
	   cmp esi, [ebx+STREAM.limit]
159
	   jb @f
160
 
161
	   mov esi, [ebx+STREAM.base]
162
@@:
163
	   mov [ebx+STREAM.curr_seg], esi
164
 
165
	   xor ecx, ecx
166
	   cmp esi, [ebx+STREAM.notify_off2]
167
	   je @f
168
 
169
	   mov ecx,0x8000
170
	   cmp esi, [ebx+STREAM.notify_off1]
171
	   je @f
172
 
173
	   inc [stream_index]
174
	   dec [play_count]
175
	   jnz .l1
176
 
177
	   ret
178
@@:
179
           mov eax, [ebx+STREAM.notify_task]
180
           call pid_to_slot
181
           test eax, eax
182
           jnz @f
183
           not eax
184
           mov [ebx+STREAM.notify_task], eax      ;-1
185
           jmp .l_end
186
@@:
187
           shl eax, 8
188
           mov [eax+PROC_BASE+32],ecx
189
           or dword [eax+PROC_BASE+0xA8],EVENT_NOTIFY
190
.l_end:
191
	   inc [stream_index]
192
	   dec [play_count]
193
	   jnz .l1
194
	   ret
195
endp
196
 
197
align 4
198
proc refill stdcall, str:dword
199
 
200
;    if DEBUG
201
;           mov    esi, msgUser
202
;           call   [SysMsgBoardStr]
203
;     end if
204
 
205
	   mov ebx, [str]
206
 
207
	   mov ecx, [ebx+STREAM.work_write]
208
	   cmp ecx, [ebx+STREAM.work_top]
209
	   jbe .m2
210
	   mov esi, [ebx+STREAM.work_top]
211
	   sub ecx, esi
212
	   mov edi, [ebx+STREAM.work_buff]
213
	   shr ecx, 2
214
	   rep movsd	   ;call memcpy
215
 
216
	   mov [ebx+STREAM.work_write], edi
217
.m2:
218
	   mov esi, [ebx+STREAM.curr_seg]
219
	   mov edi, [ebx+STREAM.work_write]
220
	   mov edx, [ebx+STREAM.r_buff]
221
 
222
	   stdcall [ebx+STREAM.resample], edi, esi, edx,\
223
	      [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
224
 
225
	   mov ebx, [str]
226
 
227
	   add [ebx+STREAM.work_count], eax;
228
	   add [ebx+STREAM.work_write], eax;
229
 
230
	   mov eax, [ebx+STREAM.curr_seg]
231
	   add eax, [ebx+STREAM.r_size]
232
	   cmp eax, [ebx+STREAM.limit]
233
	   jb @f
234
	   mov eax, [ebx+STREAM.base]
235
@@:
236
	   mov [ebx+STREAM.curr_seg], eax
237
 
238
	   xor ecx, ecx
239
	   cmp eax, [ebx+STREAM.notify_off2]
240
	   je @f
241
 
242
	   mov ecx,0x8000
243
	   cmp eax, [ebx+STREAM.notify_off1]
244
	   je @f
245
 
246
	   ret
247
@@:
248
           mov eax, [ebx+STREAM.notify_task]
249
           call pid_to_slot
250
           test eax, eax
251
           jnz @f
252
           not eax
253
           mov [ebx+STREAM.notify_task], eax      ;-1
254
           ret
255
@@:
256
           shl eax, 8
257
           mov [eax+PROC_BASE+32],ecx
258
           or dword [eax+PROC_BASE+0xA8],EVENT_NOTIFY
259
	   ret
260
endp
261
 
262
align 4
263
proc resample_1 stdcall, dest:dword,src:dword,r_buff:dword,\
264
		       r_dt:dword, r_size:dword,r_end:dword
265
 
266
	   mov edi, [r_buff]
267
	   add edi, 32*2
268
	   mov esi, [src]
269
	   mov ecx, [r_size]
270
	   shr ecx, 2
271
	   rep movsd
272
 
273
	   mov edi, [dest]
274
	   mov edx, [r_buff]
275
	   mov eax, 16
276
 
277
align 16
278
.l1:
279
	   mov ecx, eax
280
	   mov esi, eax
281
	   and ecx, 0x7FFF
282
	   shr esi, 15
283
	   lea esi, [edx+esi*2]
284
 
285
	   movsx ebp, word [esi]
286
	   movsx esi, word [esi+2]
287
	   mov ebx, 32768
288
	   imul esi, ecx
289
	   sub ebx, ecx
290
	   imul ebx, ebp
291
	   lea ecx, [ebx+esi+16384]
292
	   sar ecx, 15
293
	   cmp ecx, 32767	  ; 00007fffH
294
	   jle @f
295
	   mov ecx, 32767	  ; 00007fffH
296
	   jmp .write
297
@@:
298
	   cmp ecx, -32768	  ; ffff8000H
299
	   jge .write
300
	   mov ecx, -32768	  ; ffff8000H
301
.write:
302
	   mov ebx, ecx
303
	   shl ebx, 16
304
	   mov bx, cx
305
	   mov [edi], ebx
306
	   add edi, 4
307
 
308
	   add eax, [esp+20]  ;rdt
309
	   cmp eax, [esp+28]  ;r_end
310
	   jb .l1
311
 
312
	   mov ebp, esp
313
 
314
	   mov esi, [src]
315
	   add esi, [r_size]
316
	   sub esi, 32*2
317
	   mov edx, [r_buff]
318
	   mov ecx, 16
319
@@:
320
	   mov ebx, [esi]
321
	   mov [edx], ebx
322
	   add esi, 4
323
	   add edx, 4
324
	   dec ecx
325
	   jnz @B
326
 
327
	   sub edi, [dest]
328
	   mov eax, edi
329
	   ret
330
endp
331
 
332
align 4
333
proc resample_18 stdcall, dest:dword,src:dword,r_buff:dword,\
334
		       r_dt:dword, r_size:dword,r_end:dword
335
 
336
	   mov edi, [r_buff]
337
	   add edi, 32
338
	   mov esi, [src]
339
	   mov ecx, [r_size]
340
	   shr ecx, 2
341
	   rep movsd
342
 
343
	   mov edi, [dest]
344
	   mov edx, [r_buff]
345
	   mov esi, 16
346
 
347
align 16
348
.l1:
349
	   mov ecx, esi
350
	   mov eax, esi
351
	   and ecx, 0x7FFF
352
	   shr eax, 15
353
	   lea eax, [edx+eax]
354
 
355
	   mov bx, word [eax]
356
	   sub bh, 0x80
357
	   sub bl, 0x80
358
	   movsx eax, bh
359
	   shl eax,8
360
	   movsx ebp, bl
361
	   shl ebp,8
362
	   mov ebx, 32768
363
	   imul eax, ecx
364
	   sub ebx, ecx
365
	   imul ebx, ebp
366
	   lea ecx, [ebx+eax+16384]
367
	   sar ecx, 15
368
	   cmp ecx, 32767	  ; 00007fffH
369
	   jle @f
370
	   mov ecx, 32767	  ; 00007fffH
371
	   jmp .write
372
@@:
373
	   cmp ecx, -32768	  ; ffff8000H
374
	   jge .write
375
	   mov ecx, -32768	  ; ffff8000H
376
.write:
377
	   mov ebx, ecx
378
	   shl ebx, 16
379
	   mov bx, cx
380
	   mov [edi], ebx
381
	   add edi, 4
382
 
383
	   add esi, [esp+20]  ;rdt
384
	   cmp esi, [esp+28]  ;r_end
385
	   jb .l1
386
 
387
	   mov ebp, esp
388
 
389
	   mov esi, [src]
390
	   add esi, [r_size]
391
	   sub esi, 32
392
	   mov edx, [r_buff]
393
	   mov ecx, 8
394
@@:
395
	   mov ebx, [esi]
396
	   mov [edx], ebx
397
	   add esi, 4
398
	   add edx, 4
399
	   dec ecx
400
	   jnz @B
401
 
402
	   sub edi, [dest]
403
	   mov eax, edi
404
	   ret
405
endp
406
 
407
align 4
408
proc copy_stream stdcall, dest:dword,src:dword,r_buff:dword,\
409
		       r_dt:dword, r_size:dword,r_end:dword
410
 
411
	   mov ecx, [r_size]
412
	   mov eax, ecx
413
	   shr ecx, 2
414
	   mov esi, [src]
415
	   mov edi, [dest]
416
	   rep movsd
417
	   mov eax, 16384
418
	   ret
419
endp
420
 
421
align 4
422
proc resample_2 stdcall, dest:dword,src:dword,r_buff:dword,\
423
		       r_dt:dword, r_size:dword,r_end:dword
424
 
425
	   mov edi, [r_buff]
426
	   add edi, 32*4
427
	   mov esi, [src]
428
	   mov ecx, [r_size]
429
	   shr ecx, 2
430
	   rep movsd	  ;call memcpy
431
 
432
	   mov edx, [r_buff]
433
	   mov edi, [dest]
434
	   mov ebx, [r_dt]
435
	   mov eax, 16
436
	   emms
437
 
438
align 16
439
.l1:
440
	   mov ecx, eax
441
	   mov esi, eax
442
	   and ecx, 0x7FFF
443
	   shr esi, 15
444
	   lea esi, [edx+esi*4]
445
 
446
	   movq mm0, [esi]
447
	   movq mm1, mm0
448
 
449
	   movd mm2, ecx
450
	   punpcklwd mm2, mm2
451
	   movq mm3, qword [m7] ;                  // 0x8000
452
 
453
	   psubw mm3, mm2	;         // 0x8000 - iconst
454
	   punpckldq mm3, mm2
455
 
456
	   pmulhw mm0, mm3
457
	   pmullw mm1, mm3
458
 
459
	   movq mm4, mm1
460
	   punpcklwd mm1, mm0
461
	   punpckhwd mm4, mm0
462
	   paddd mm1, mm4
463
	   psrad  mm1, 15
464
	   packssdw mm1, mm1
465
	   movd [edi], mm1
466
	   add edi, 4
467
 
468
	   add eax, ebx
469
	   cmp eax, [r_end]
470
	   jb .l1
471
	   emms
472
 
473
	   mov esi, [src]
474
	   add esi, [r_size]
475
	   sub esi, 32*4
476
	   mov edx, [r_buff]
477
	   mov ecx, 32
478
@@:
479
	   mov ebx, [esi]
480
	   mov [edx], ebx
481
	   add esi, 4
482
	   add edx, 4
483
	   dec ecx
484
	   jnz @B
485
 
486
	   sub edi, [dest]
487
	   mov eax, edi
488
	   ret
489
endp
490
 
491
align 4
492
proc resample_28 stdcall, dest:dword,src:dword,r_buff:dword,\
493
		       r_dt:dword, r_size:dword,r_end:dword
494
 
495
	   mov edi, [r_buff]
496
	   add edi, 32*2
497
	   mov esi, [src]
498
	   mov ecx, [r_size]
499
	   shr ecx, 2
500
	   rep movsd	  ;call memcpy
501
 
502
	   mov edx, [r_buff]
503
	   mov edi, [dest]
504
	   mov ebx, [r_dt]
505
	   mov eax, 16
506
	   emms
507
	   movq mm7,[mm80]
508
	   movq mm6,[mm_mask]
509
 
510
align 16
511
.l1:
512
	   mov ecx, eax
513
	   mov esi, eax
514
	   and ecx, 0x7FFF
515
	   shr esi, 15
516
	   lea esi, [edx+esi*2]
517
 
518
	   movq mm0, [esi]
519
	   psubb mm0,mm7
520
	   punpcklbw mm0,mm0
521
	   pand mm0,mm6
522
 
523
	   movq mm1, mm0
524
 
525
	   movd mm2, ecx
526
	   punpcklwd mm2, mm2
527
	   movq mm3, qword [m7] ;                  // 0x8000
528
 
529
	   psubw mm3, mm2	;         // 0x8000 - iconst
530
	   punpckldq mm3, mm2
531
 
532
	   pmulhw mm0, mm3
533
	   pmullw mm1, mm3
534
 
535
	   movq mm4, mm1
536
	   punpcklwd mm1, mm0
537
	   punpckhwd mm4, mm0
538
	   paddd mm1, mm4
539
	   psrad  mm1, 15
540
	   packssdw mm1, mm1
541
	   movd [edi], mm1
542
	   add edi, 4
543
 
544
	   add eax, ebx
545
	   cmp eax, [r_end]
546
	   jb .l1
547
	   emms
548
 
549
	   mov esi, [src]
550
	   add esi, [r_size]
551
	   sub esi, 32*2
552
	   mov edx, [r_buff]
553
	   mov ecx, 16
554
@@:
555
	   mov ebx, [esi]
556
	   mov [edx], ebx
557
	   add esi, 4
558
	   add edx, 4
559
	   dec ecx
560
	   jnz @B
561
 
562
	   sub edi, [dest]
563
	   mov eax, edi
564
	   ret
565
endp
566
 
567
 
568
proc m16_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
569
		       r_dt:dword, r_size:dword,r_end:dword
570
 
571
	   mov esi, [src]
572
	   mov edi, [dest]
573
	   mov ecx, [r_size]
574
	   shr ecx,8
575
@@:
576
	   call m16_s_mmx
577
	   add edi, 128
578
	   add esi, 64
579
	   call m16_s_mmx
580
	   add edi, 128
581
	   add esi, 64
582
	   call m16_s_mmx
583
	   add edi, 128
584
	   add esi, 64
585
	   call m16_s_mmx
586
	   add edi, 128
587
	   add esi, 64
588
	   dec ecx
589
	   jnz @b
590
 
591
	   mov eax, [r_size]
592
	   add eax, eax
593
	   ret
594
endp
595
 
596
align 4
597
proc s8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
598
		       r_dt:dword, r_size:dword,r_end:dword
599
 
600
 
601
	   mov esi, [src]
602
	   mov edi, [dest]
603
	   mov ecx, [r_size]
604
	   shr ecx, 7
605
 
606
	   movq mm7, [mm80]
607
	   movq mm6, [mm_mask]
608
@@:
609
	   call s8_s_mmx
610
	   add edi, 64
611
	   add esi, 32
612
	   call s8_s_mmx
613
	   add edi, 64
614
	   add esi, 32
615
	   call s8_s_mmx
616
	   add edi, 64
617
	   add esi, 32
618
	   call s8_s_mmx
619
	   add edi, 64
620
	   add esi, 32
621
           dec ecx
622
	   jnz @b
623
 
624
	   mov eax, [r_size]
625
	   add eax, eax
626
	   ret
627
endp
628
 
629
proc m8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
630
		       r_dt:dword, r_size:dword,r_end:dword
631
 
632
	   mov esi, [src]
633
	   mov edi, [dest]
634
	   mov ecx, [r_size]
635
	   shr ecx, 6
636
 
637
	   movq mm7, [mm80]
638
	   movq mm6, [mm_mask]
639
@@:
640
	   call m8_s_mmx
641
	   add edi, 64
642
	   add esi, 16
643
	   call m8_s_mmx
644
	   add edi, 64
645
	   add esi, 16
646
	   call m8_s_mmx
647
	   add edi, 64
648
	   add esi, 16
649
	   call m8_s_mmx
650
	   add edi, 64
651
	   add esi, 16
652
           dec ecx
653
	   jnz @b
654
 
655
	   mov eax, [r_size]
656
	   add eax, eax
657
	   add eax, eax
658
	   ret
659
endp
660
 
661
align 4
662
proc alloc_mix_buff
663
 
664
	   bsf eax, [mix_buff_map]
665
	   jnz .find
666
	   xor eax, eax
667
	   ret
668
.find:
669
	   btr [mix_buff_map], eax
670
	   shl eax, 9
671
	   add eax, [mix_buff]
672
	   ret
673
endp
674
 
675
proc m16_s_mmx
676
 
677
	   movq    mm0, [esi]
678
	   movq    mm1, mm0
679
	   punpcklwd mm0, mm0
680
	   punpckhwd mm1, mm1
681
	   movq    [edi], mm0
682
	   movq    [edi+8], mm1
683
 
684
	   movq    mm0, [esi+8]
685
	   movq    mm1, mm0
686
	   punpcklwd mm0, mm0
687
	   punpckhwd mm1, mm1
688
	   movq    [edi+16], mm0
689
	   movq    [edi+24], mm1
690
 
691
	   movq    mm0, [esi+16]
692
	   movq    mm1, mm0
693
	   punpcklwd mm0, mm0
694
	   punpckhwd mm1, mm1
695
	   movq    [edi+32], mm0
696
	   movq    [edi+40], mm1
697
 
698
	   movq    mm0, [esi+24]
699
	   movq    mm1, mm0
700
	   punpcklwd mm0, mm0
701
	   punpckhwd mm1, mm1
702
	   movq    [edi+48], mm0
703
	   movq    [edi+56], mm1
704
 
705
	   movq    mm0, [esi+32]
706
	   movq    mm1, mm0
707
	   punpcklwd mm0, mm0
708
	   punpckhwd mm1, mm1
709
	   movq    [edi+64], mm0
710
	   movq    [edi+72], mm1
711
 
712
	   movq    mm0, [esi+40]
713
	   movq    mm1, mm0
714
	   punpcklwd mm0, mm0
715
	   punpckhwd mm1, mm1
716
	   movq    [edi+80], mm0
717
	   movq    [edi+88], mm1
718
 
719
 
720
	   movq    mm0, [esi+48]
721
	   movq    mm1, mm0
722
	   punpcklwd mm0, mm0
723
	   punpckhwd mm1, mm1
724
	   movq    [edi+96], mm0
725
	   movq    [edi+104], mm1
726
 
727
	   movq    mm0, [esi+56]
728
	   movq    mm1, mm0
729
	   punpcklwd mm0, mm0
730
	   punpckhwd mm1, mm1
731
	   movq    [edi+112], mm0
732
	   movq    [edi+120], mm1
733
 
734
	   ret
735
endp
736
 
737
align 4
738
proc s8_s_mmx
739
 
740
	   movq    mm0, [esi]
741
	   psubb   mm0, mm7
742
	   movq    mm1, mm0
743
	   punpcklbw mm0, mm0
744
	   pand mm0, mm6
745
	   punpckhbw mm1, mm1
746
	   pand mm1, mm6
747
	   movq    [edi], mm0
748
	   movq    [edi+8], mm1
749
 
750
	   movq    mm0, [esi+8]
751
	   psubb   mm0, mm7
752
	   movq    mm1, mm0
753
	   punpcklbw mm0, mm0
754
	   pand mm0, mm6
755
	   punpckhbw mm1, mm1
756
	   pand mm1, mm6
757
	   movq    [edi+16], mm0
758
	   movq    [edi+24], mm1
759
 
760
	   movq    mm0, [esi+16]
761
	   psubb   mm0, mm7
762
	   movq    mm1, mm0
763
	   punpcklbw mm0, mm0
764
	   pand mm0, mm6
765
	   punpckhbw mm1, mm1
766
	   pand mm1, mm6
767
	   movq    [edi+32], mm0
768
	   movq    [edi+40], mm1
769
 
770
	   movq    mm0, [esi+24]
771
	   psubb   mm0, mm7
772
	   movq    mm1, mm0
773
	   punpcklbw mm0, mm0
774
	   pand mm0, mm6
775
	   punpckhbw mm1, mm1
776
	   pand mm1, mm6
777
	   movq    [edi+48], mm0
778
	   movq    [edi+56], mm1
779
 
780
	   ret
781
 
782
endp
783
 
784
align 4
785
proc m8_s_mmx
786
 
787
	   movq    mm0, [esi]
788
	   psubb   mm0, mm7
789
	   movq    mm1, mm0
790
	   punpcklbw mm0, mm0
791
	   pand mm0, mm6
792
	   punpckhbw mm1, mm1
793
	   pand mm1, mm6
794
	   movq mm2, mm0
795
	   punpcklwd mm0, mm0
796
	   punpckhwd mm2, mm2
797
 
798
	   movq mm3, mm1
799
	   punpcklwd mm1, mm1
800
	   punpckhwd mm3, mm3
801
 
802
	   movq    [edi], mm0
803
	   movq    [edi+8], mm2
804
	   movq    [edi+16], mm1
805
	   movq    [edi+24], mm3
806
 
807
	   movq    mm0, [esi+8]
808
	   psubb   mm0, mm7
809
	   movq    mm1, mm0
810
	   punpcklbw mm0, mm0
811
	   pand mm0, mm6
812
	   punpckhbw mm1, mm1
813
	   pand mm1, mm6
814
	   movq mm2, mm0
815
	   punpcklwd mm0, mm0
816
	   punpckhwd mm2, mm2
817
 
818
	   movq mm3, mm1
819
	   punpcklwd mm1, mm1
820
	   punpckhwd mm3, mm3
821
 
822
	   movq    [edi+32], mm0
823
	   movq    [edi+40], mm2
824
	   movq    [edi+48], mm1
825
	   movq    [edi+56], mm3
826
 
827
	   ret
828
endp
829
 
830
 
831
align 4
832
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
833
 
834
	   mov edi, [output]
835
 
836
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
837
	   add edi, 128
838
	   add [str0], 128
839
	   add [str1], 128
840
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
841
	   add edi, 128
842
	   add [str0], 128
843
	   add [str1], 128
844
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
845
	   add edi, 128
846
	   add [str0], 128
847
	   add [str1], 128
848
	   stdcall mix_2_1_mmx, edi, [str0],[str1]
849
 
850
	   ret
851
endp
852
 
853
 
854
align 4
855
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
856
 
857
	   mov edi, [output]
858
 
859
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
860
	   add edi, 128
861
	   add [str0], 128
862
	   add [str1], 128
863
	   add [str2], 128
864
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
865
	   add edi, 128
866
	   add [str0], 128
867
	   add [str1], 128
868
	   add [str2], 128
869
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
870
	   add edi, 128
871
	   add [str0], 128
872
	   add [str1], 128
873
	   add [str2], 128
874
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
875
 
876
	   ret
877
endp
878
 
879
align 4
880
proc mix_4_1 stdcall, str0:dword, str1:dword,\
881
		      str2:dword, str3:dword
882
 
883
	   local output:DWORD
884
 
885
	   call alloc_mix_buff
886
	   and eax, eax
887
	   jz .err
888
	   mov [output], eax
889
 
890
	   mov edi, eax
891
 
892
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
893
	   add edi, 128
894
	   add [str0], 128
895
	   add [str1], 128
896
	   add [str2], 128
897
	   add [str3], 128
898
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
899
	   add edi, 128
900
	   add [str0], 128
901
	   add [str1], 128
902
	   add [str2], 128
903
	   add [str3], 128
904
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
905
	   add edi, 128
906
	   add [str0], 128
907
	   add [str1], 128
908
	   add [str2], 128
909
	   add [str3], 128
910
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
911
	   mov eax, [output]
912
	   ret
913
.err:
914
	   xor eax, eax
915
	   ret
916
endp
917
 
918
 
919
align 4
920
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
921
			str2:dword, str3:dword
922
 
923
	   mov edi, [output]
924
 
925
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
926
	   add edi, 128
927
	   add [str0], 128
928
	   add [str1], 128
929
	   add [str2], 128
930
	   add [str3], 128
931
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
932
	   add edi, 128
933
	   add [str0], 128
934
	   add [str1], 128
935
	   add [str2], 128
936
	   add [str3], 128
937
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
938
	   add edi, 128
939
	   add [str0], 128
940
	   add [str1], 128
941
	   add [str2], 128
942
	   add [str3], 128
943
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
944
 
945
	   ret
946
endp
947
 
948
align 4
949
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
950
 
951
	mov edx, [output]
952
	mov eax, [str0]
953
	mov ecx, [str1]
954
 
955
	movq	mm0, [eax]
956
	paddsw	mm0, [ecx]
957
       ; psraw   mm0, 1
958
	movq	[edx], mm0
959
 
960
	movq	mm1, [eax+8]
961
	paddsw	mm1,[ecx+8]
962
       ; psraw   mm1, 1
963
	movq	[edx+8], mm1
964
 
965
	movq	mm2, [eax+16]
966
	paddsw	mm2, [ecx+16]
967
       ; psraw   mm2, 1
968
	movq	[edx+16], mm2
969
 
970
	movq	mm3, [eax+24]
971
	paddsw	mm3, [ecx+24]
972
       ; psraw   mm3, 1
973
	movq [edx+24], mm3
974
 
975
	movq	mm0, [eax+32]
976
	paddsw	mm0, [ecx+32]
977
       ; psraw   mm0, 1
978
	movq	[edx+32], mm0
979
 
980
	movq	mm1, [eax+40]
981
	paddsw	mm1, [ecx+40]
982
       ; psraw   mm1, 1
983
	movq	[edx+40], mm1
984
 
985
	movq	mm2, [eax+48]
986
	paddsw	mm2, [ecx+48]
987
       ; psraw   mm2, 1
988
	movq	[edx+48], mm2
989
 
990
	movq	mm3, [eax+56]
991
	paddsw	mm3, [ecx+56]
992
       ; psraw   mm3, 1
993
	movq [edx+56], mm3
994
 
995
	movq	mm0, [eax+64]
996
	paddsw	mm0, [ecx+64]
997
       ; psraw   mm0, 1
998
	movq	[edx+64], mm0
999
 
1000
	movq	mm1, [eax+72]
1001
	paddsw	mm1, [ecx+72]
1002
       ; psraw   mm1, 1
1003
	movq	[edx+72], mm1
1004
 
1005
	movq	mm2, [eax+80]
1006
	paddsw	mm2, [ecx+80]
1007
       ; psraw   mm2, 1
1008
	movq	[edx+80], mm2
1009
 
1010
	movq	mm3, [eax+88]
1011
	paddsw	mm3, [ecx+88]
1012
       ; psraw   mm3, 1
1013
 
1014
	movq [edx+88], mm3
1015
 
1016
	movq	mm0, [eax+96]
1017
	paddsw	mm0, [ecx+96]
1018
       ; psraw   mm0, 1
1019
 
1020
	movq	[edx+96], mm0
1021
 
1022
	movq	mm1, [eax+104]
1023
	paddsw	mm1, [ecx+104]
1024
       ; psraw   mm1, 1
1025
 
1026
	movq	[edx+104], mm1
1027
 
1028
	movq	mm2, [eax+112]
1029
	paddsw	mm2, [ecx+112]
1030
       ; psraw   mm2, 1
1031
 
1032
	movq	[edx+112], mm2
1033
 
1034
	movq	mm3, [eax+120]
1035
	paddsw	mm3, [ecx+120]
1036
       ; psraw   mm3, 1
1037
 
1038
	movq [edx+120], mm3
1039
 
1040
	ret
1041
endp
1042
 
1043
align 4
1044
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
1045
 
1046
	mov edx, [output]
1047
	mov eax, [str0]
1048
	mov ebx, [str1]
1049
	mov ecx, [str2]
1050
 
1051
	movq	mm0, [eax]
1052
	paddsw	mm0, [ebx]
1053
	paddsw	mm0, [ecx]
1054
	movq	[edx], mm0
1055
 
1056
	movq	mm1, [eax+8]
1057
	paddsw	mm1,[ebx+8]
1058
	paddsw	mm1,[ecx+8]
1059
	movq	[edx+8], mm1
1060
 
1061
	movq	mm2, [eax+16]
1062
	paddsw	mm2, [ebx+16]
1063
	paddsw	mm2, [ecx+16]
1064
	movq	[edx+16], mm2
1065
 
1066
	movq	mm3, [eax+24]
1067
	paddsw	mm3, [ebx+24]
1068
	paddsw	mm3, [ecx+24]
1069
	movq [edx+24], mm3
1070
 
1071
	movq	mm0, [eax+32]
1072
	paddsw	mm0, [ebx+32]
1073
	paddsw	mm0, [ecx+32]
1074
	movq	[edx+32], mm0
1075
 
1076
	movq	mm1, [eax+40]
1077
	paddsw	mm1, [ebx+40]
1078
	paddsw	mm1, [ecx+40]
1079
	movq	[edx+40], mm1
1080
 
1081
	movq	mm2, [eax+48]
1082
	paddsw	mm2, [ebx+48]
1083
	paddsw	mm2, [ecx+48]
1084
	movq	[edx+48], mm2
1085
 
1086
	movq	mm3, [eax+56]
1087
	paddsw	mm3, [ebx+56]
1088
	paddsw	mm3, [ecx+56]
1089
	movq [edx+56], mm3
1090
 
1091
	movq	mm0, [eax+64]
1092
	paddsw	mm0, [ebx+64]
1093
	paddsw	mm0, [ecx+64]
1094
	movq	[edx+64], mm0
1095
 
1096
	movq	mm1, [eax+72]
1097
	paddsw	mm1, [ebx+72]
1098
	paddsw	mm1, [ecx+72]
1099
	movq	[edx+72], mm1
1100
 
1101
	movq	mm2, [eax+80]
1102
	paddsw	mm2, [ebx+80]
1103
	paddsw	mm2, [ecx+80]
1104
	movq	[edx+80], mm2
1105
 
1106
	movq	mm3, [eax+88]
1107
	paddsw	mm3, [ebx+88]
1108
	paddsw	mm3, [ecx+88]
1109
	movq [edx+88], mm3
1110
 
1111
	movq	mm0, [eax+96]
1112
	paddsw	mm0, [ebx+96]
1113
	paddsw	mm0, [ecx+96]
1114
	movq	[edx+96], mm0
1115
 
1116
	movq	mm1, [eax+104]
1117
	paddsw	mm1, [ebx+104]
1118
	paddsw	mm1, [ecx+104]
1119
	movq	[edx+104], mm1
1120
 
1121
	movq	mm2, [eax+112]
1122
	paddsw	mm2, [ebx+112]
1123
	paddsw	mm2, [ecx+112]
1124
	movq	[edx+112], mm2
1125
 
1126
	movq	mm3, [eax+120]
1127
	paddsw	mm3, [ebx+120]
1128
	paddsw	mm3, [ecx+120]
1129
	movq [edx+120], mm3
1130
 
1131
	ret
1132
endp
1133
 
1134
align 4
1135
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
1136
			   str2:dword, str3:dword
1137
 
1138
	mov edx, [output]
1139
	mov esi, [str0]
1140
	mov eax, [str1]
1141
	mov ebx, [str2]
1142
	mov ecx, [str3]
1143
 
1144
	movq	mm0, [esi]
1145
	movq	mm1, [eax]
1146
	paddsw	mm0, [ebx]
1147
	paddsw	mm1, [ecx]
1148
	paddsw	mm0, mm1
1149
	movq	[edx], mm0
1150
 
1151
	movq	mm2, [esi+8]
1152
	movq	mm3, [eax+8]
1153
	paddsw	mm2, [ebx+8]
1154
	paddsw	mm3, [ecx+8]
1155
	paddsw	mm2, mm3
1156
	movq	[edx+8], mm2
1157
 
1158
	movq	mm0, [esi+16]
1159
	movq	mm1, [eax+16]
1160
	paddsw	mm0, [ebx+16]
1161
	paddsw	mm1, [ecx+16]
1162
	paddsw	mm0, mm1
1163
	movq	[edx+16], mm0
1164
 
1165
	movq	mm2, [esi+24]
1166
	movq	mm3, [eax+24]
1167
	paddsw	mm2, [ebx+24]
1168
	paddsw	mm3, [ecx+24]
1169
	paddsw	mm2, mm3
1170
	movq	[edx+24], mm2
1171
 
1172
	movq	mm0, [esi+32]
1173
	movq	mm1, [eax+32]
1174
	paddsw	mm0, [ebx+32]
1175
	paddsw	mm1, [ecx+32]
1176
	paddsw	mm0, mm1
1177
	movq	[edx+32], mm0
1178
 
1179
	movq	mm2, [esi+40]
1180
	movq	mm3, [eax+40]
1181
	paddsw	mm2, [ebx+40]
1182
	paddsw	mm3, [ecx+40]
1183
	paddsw	mm2, mm3
1184
	movq	[edx+40], mm2
1185
 
1186
	movq	mm0, [esi+48]
1187
	movq	mm1, [eax+48]
1188
	paddsw	mm0, [ebx+48]
1189
	paddsw	mm1, [ecx+48]
1190
	paddsw	mm0, mm1
1191
	movq	[edx+48], mm0
1192
 
1193
	movq	mm2, [esi+56]
1194
	movq	mm3, [eax+56]
1195
	paddsw	mm2, [ebx+56]
1196
	paddsw	mm3, [ecx+56]
1197
	paddsw	mm2, mm3
1198
	movq	[edx+56], mm2
1199
 
1200
	movq	mm0, [esi+64]
1201
	movq	mm1, [eax+64]
1202
	paddsw	mm0, [ebx+64]
1203
	paddsw	mm1, [ecx+64]
1204
	paddsw	mm0, mm1
1205
	movq	[edx+64], mm0
1206
 
1207
	movq	mm2, [esi+72]
1208
	movq	mm3, [eax+72]
1209
	paddsw	mm2, [ebx+72]
1210
	paddsw	mm3, [ecx+72]
1211
	paddsw	mm2, mm3
1212
	movq	[edx+72], mm2
1213
 
1214
	movq	mm2, [esi+80]
1215
	movq	mm3, [eax+80]
1216
	paddsw	mm2, [ebx+80]
1217
	paddsw	mm3, [ecx+80]
1218
	paddsw	mm2, mm3
1219
	movq	[edx+80], mm2
1220
 
1221
	movq	mm2, [esi+88]
1222
	movq	mm3, [eax+88]
1223
	paddsw	mm2, [ebx+88]
1224
	paddsw	mm3, [ecx+88]
1225
	paddsw	mm2, mm3
1226
	movq	[edx+88], mm2
1227
 
1228
	movq	mm2, [esi+96]
1229
	movq	mm3, [eax+96]
1230
	paddsw	mm2, [ebx+96]
1231
	paddsw	mm3, [ecx+96]
1232
	paddsw	mm2, mm3
1233
	movq	[edx+96], mm2
1234
 
1235
	movq	mm2, [esi+104]
1236
	movq	mm3, [eax+104]
1237
	paddsw	mm2, [ebx+104]
1238
	paddsw	mm3, [ecx+104]
1239
	paddsw	mm2, mm3
1240
	movq	[edx+104], mm2
1241
 
1242
	movq	mm2, [esi+112]
1243
	movq	mm3, [eax+112]
1244
	paddsw	mm2, [ebx+112]
1245
	paddsw	mm3, [ecx+112]
1246
	paddsw	mm2, mm3
1247
	movq	[edx+112], mm2
1248
 
1249
	movq	mm2, [esi+120]
1250
	movq	mm3, [eax+120]
1251
	paddsw	mm2, [ebx+120]
1252
	paddsw	mm3, [ecx+120]
1253
	paddsw	mm2, mm3
1254
	movq	[edx+120], mm2
1255
 
1256
	ret
1257
endp
1258
 
1259
align 4
1260
proc copy_mem stdcall, output:dword, input:dword
1261
 
1262
	   mov edi, [output]
1263
	   mov esi, [input]
1264
	   mov ecx, 0x80
1265
.l1:
1266
	   mov eax, [esi]
1267
	   mov [edi], eax
1268
	   add esi, 4
1269
	   add edi, 4
1270
	   loop .l1
1271
 
1272
	   ret
1273
endp
1274
 
1275
proc memcpy
1276
@@:
1277
	   mov eax, [esi]
1278
	   mov [edi], eax
1279
	   add esi, 4
1280
	   add edi, 4
1281
	   dec ecx
1282
	   jnz @B
1283
	   ret
1284
endp
1285