Subversion Repositories Kolibri OS

Rev

Rev 227 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
168 serge 1
;
2
;   This file is part of the Infinity sound library.
3
;   (C) copyright Serge 2006
4
;   email: infinity_sound@mail.ru
5
;
6
;   This program is free software; you can redistribute it and/or modify
7
;   it under the terms of the GNU General Public License as published by
8
;   the Free Software Foundation; either version 2 of the License, or
9
;   (at your option) any later version.
10
;
11
;   This program is distributed in the hope that it will be useful,
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;   GNU General Public License for more details.
15
 
16
align 4
17
proc new_mix stdcall, output:dword
281 serge 18
           locals
19
             mixCounter  dd ?
20
             mixIndex  dd ?
21
             streamIndex dd ?
22
             inputCount  dd ?
23
             main_count  dd ?
24
             blockCount  dd ?
25
             mix_out  dd ?
26
           endl
168 serge 27
 
281 serge 28
           call prepare_playlist
168 serge 29
 
281 serge 30
           cmp [play_count], 0
31
           je .exit
188 serge 32
           call FpuSave
281 serge 33
           mov [main_count], 32;
168 serge 34
.l00:
281 serge 35
           mov [mix_buff_map], 0x0000FFFF;
36
           xor eax, eax
37
           mov [mixCounter], eax
38
           mov [mixIndex],eax
39
           mov [streamIndex], eax;
40
           mov ebx, [play_count]
41
           mov [inputCount], ebx
168 serge 42
.l0:
281 serge 43
           mov ecx, 4
168 serge 44
.l1:
281 serge 45
           mov ebx, [streamIndex]
46
           mov esi, [play_list+ebx*4]
47
           mov eax, [esi+STREAM.work_read]
48
           add [esi+STREAM.work_read], 512
168 serge 49
 
281 serge 50
           mov ebx, [mixIndex]
51
           mov [mix_input+ebx*4], eax
52
           inc [mixCounter]
53
           inc [mixIndex]
54
           inc [streamIndex]
55
           dec [inputCount]
56
           jz .m2
168 serge 57
 
281 serge 58
           dec ecx
59
           jnz .l1
168 serge 60
 
281 serge 61
           cmp [mixCounter], 4
62
           jnz .m2
168 serge 63
 
281 serge 64
           stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
65
           sub [mixIndex],4
66
           mov ebx, [mixIndex]
67
           mov [mix_input+ebx*4], eax
68
           inc [mixIndex]
69
           mov [mixCounter], 0
168 serge 70
 
281 serge 71
           cmp [inputCount], 0
72
           jnz .l0
168 serge 73
.m2:
281 serge 74
           cmp [mixIndex], 1
75
           jne @f
76
           stdcall copy_mem, [output], [mix_input]
77
           jmp .m3
168 serge 78
@@:
281 serge 79
           cmp [mixIndex], 2
80
           jne @f
81
           stdcall mix_2_1, [output], [mix_input], [mix_input+4]
82
           jmp .m3
168 serge 83
@@:
281 serge 84
           cmp [mixIndex], 3
85
           jne @f
86
           stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
87
           jmp .m3
168 serge 88
@@:
281 serge 89
           stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
168 serge 90
.m3:
281 serge 91
           add [output],512
168 serge 92
 
281 serge 93
           sub [main_count], 1
94
           jnz .l00
168 serge 95
 
281 serge 96
           call update_stream
203 serge 97
           emms
188 serge 98
           call FpuRestore
281 serge 99
           ret
168 serge 100
.exit:
281 serge 101
           mov edi, [output]
102
           mov ecx, 0x1000
103
           xor eax, eax
104
           cld
105
           rep stosd
106
           ret
168 serge 107
endp
108
 
109
align 4
110
proc update_stream
281 serge 111
           locals
170 serge 112
             stream_index  dd ?
227 serge 113
             ev_code       dd ?  ;EVENT
114
             ev_offs       dd ?
115
                           rd 4
281 serge 116
           endl
168 serge 117
 
281 serge 118
           mov [stream_index], 0
168 serge 119
.l1:
281 serge 120
           mov edx, [stream_index]
121
           mov esi, [play_list+edx*4]
168 serge 122
 
281 serge 123
           mov eax, [esi+STREAM.work_read]
124
           cmp eax, [esi+STREAM.work_top]
125
           jb @f
126
           mov eax, [esi+STREAM.work_buff]
168 serge 127
@@:
281 serge 128
           mov [esi+STREAM.work_read], eax
168 serge 129
 
281 serge 130
           cmp [esi+STREAM.format], PCM_2_16_48
131
           je .copy
168 serge 132
 
281 serge 133
           sub [esi+STREAM.work_count], 16384
168 serge 134
 
281 serge 135
           cmp [esi+STREAM.work_count], 32768
136
           ja @f
168 serge 137
 
281 serge 138
           stdcall refill, esi
168 serge 139
@@:
281 serge 140
           inc [stream_index]
141
           dec [play_count]
142
           jnz .l1
143
           ret
168 serge 144
.copy:
281 serge 145
           mov ebx, esi
146
           mov edi, [ebx+STREAM.work_write]
147
           cmp edi, [ebx+STREAM.work_top]
148
           jb @f
149
           mov edi, [ebx+STREAM.work_buff]
150
           mov [ebx+STREAM.work_write], edi
168 serge 151
@@:
281 serge 152
           mov esi, [ebx+STREAM.curr_seg]
153
           mov ecx, 16384/4
154
           cld
155
           rep movsd
168 serge 156
 
157
           mov [ebx+STREAM.work_write], edi
158
 
281 serge 159
           cmp esi, [ebx+STREAM.lim_0]
160
           jb @f
168 serge 161
 
281 serge 162
           mov esi, [ebx+STREAM.seg_0]
163
           mov eax, [ebx+STREAM.lim_0]
164
           xchg esi, [ebx+STREAM.seg_1]
165
           xchg eax, [ebx+STREAM.lim_1]
166
           mov [ebx+STREAM.seg_0], esi
167
           mov [ebx+STREAM.lim_0], eax
168 serge 168
@@:
281 serge 169
           mov [ebx+STREAM.curr_seg], esi
168 serge 170
 
281 serge 171
           xor ecx, ecx
172
           cmp esi, [ebx+STREAM.notify_off2]
173
           je @f
168 serge 174
 
281 serge 175
           mov ecx,0x8000
176
           cmp esi, [ebx+STREAM.notify_off1]
177
           je @f
168 serge 178
 
281 serge 179
           inc [stream_index]
180
           dec [play_count]
181
           jnz .l1
182
           ret
168 serge 183
@@:
227 serge 184
           mov [ev_code], 0xFF000001
185
           mov [ev_offs], ecx
168 serge 186
           mov eax, [ebx+STREAM.notify_task]
227 serge 187
 
188
           lea edx, [ev_code]
189
           push ebx
190
           stdcall SendEvent, eax, edx
191
           pop ebx
168 serge 192
           test eax, eax
227 serge 193
           jnz .l_end
194
 
168 serge 195
           not eax
196
           mov [ebx+STREAM.notify_task], eax      ;-1
197
.l_end:
281 serge 198
           inc [stream_index]
199
           dec [play_count]
200
           jnz .l1
201
           ret
168 serge 202
endp
203
 
204
align 4
205
proc refill stdcall, str:dword
281 serge 206
           locals
227 serge 207
             ev_code       dd ?  ;EVENT
208
             ev_offs       dd ?
209
                           rd 4
281 serge 210
           endl
168 serge 211
 
281 serge 212
           mov ebx, [str]
213
           mov ecx, [ebx+STREAM.work_write]
214
           cmp ecx, [ebx+STREAM.work_top]
215
           jbe .m2
216
           mov esi, [ebx+STREAM.work_top]
217
           sub ecx, esi
218
           mov edi, [ebx+STREAM.work_buff]
219
           shr ecx, 2
220
           rep movsd    ;call memcpy
168 serge 221
 
281 serge 222
           mov [ebx+STREAM.work_write], edi
168 serge 223
.m2:
281 serge 224
           mov esi, [ebx+STREAM.curr_seg]
225
           mov edi, [ebx+STREAM.work_write]
226
           mov edx, [ebx+STREAM.r_buff]
168 serge 227
 
281 serge 228
           stdcall [ebx+STREAM.resample], edi, esi, edx,\
229
           [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
168 serge 230
 
281 serge 231
           mov ebx, [str]
168 serge 232
 
281 serge 233
           add [ebx+STREAM.work_count], eax;
234
           add [ebx+STREAM.work_write], eax;
168 serge 235
 
281 serge 236
           mov eax, [ebx+STREAM.curr_seg]
237
           add eax, [ebx+STREAM.r_size]
238
           cmp eax, [ebx+STREAM.lim_0]
239
           jb @f
240
 
241
           mov eax, [ebx+STREAM.seg_0]
242
           mov ecx, [ebx+STREAM.lim_0]
243
           xchg eax, [ebx+STREAM.seg_1]
244
           xchg ecx, [ebx+STREAM.lim_1]
245
           mov [ebx+STREAM.seg_0], eax
246
           mov [ebx+STREAM.lim_0], ecx
168 serge 247
@@:
281 serge 248
           mov [ebx+STREAM.curr_seg], eax
168 serge 249
 
281 serge 250
           xor ecx, ecx
251
           cmp eax, [ebx+STREAM.notify_off2]
252
           je @f
168 serge 253
 
281 serge 254
           mov ecx,0x8000
255
           cmp eax, [ebx+STREAM.notify_off1]
256
           je @f
257
           ret
168 serge 258
@@:
227 serge 259
           mov [ev_code], 0xFF000001
260
           mov [ev_offs], ecx
168 serge 261
           mov eax, [ebx+STREAM.notify_task]
227 serge 262
 
263
           lea edx, [ev_code]
264
           push ebx
265
           stdcall SendEvent, eax, edx
266
           pop ebx
168 serge 267
           test eax, eax
227 serge 268
           jnz @F
168 serge 269
           not eax
270
           mov [ebx+STREAM.notify_task], eax      ;-1
271
@@:
272
	   ret
273
endp
274
 
275
align 4
276
proc resample_1 stdcall, dest:dword,src:dword,r_buff:dword,\
277
		       r_dt:dword, r_size:dword,r_end:dword
278
 
279
	   mov edi, [r_buff]
280
	   add edi, 32*2
281
	   mov esi, [src]
282
	   mov ecx, [r_size]
283
	   shr ecx, 2
284
	   rep movsd
285
 
286
	   mov edi, [dest]
287
	   mov edx, [r_buff]
288
	   mov eax, 16
289
 
290
align 16
291
.l1:
292
	   mov ecx, eax
293
	   mov esi, eax
294
	   and ecx, 0x7FFF
295
	   shr esi, 15
296
	   lea esi, [edx+esi*2]
297
 
298
	   movsx ebp, word [esi]
299
	   movsx esi, word [esi+2]
300
	   mov ebx, 32768
301
	   imul esi, ecx
302
	   sub ebx, ecx
303
	   imul ebx, ebp
304
	   lea ecx, [ebx+esi+16384]
305
	   sar ecx, 15
306
	   cmp ecx, 32767	  ; 00007fffH
307
	   jle @f
308
	   mov ecx, 32767	  ; 00007fffH
309
	   jmp .write
310
@@:
311
	   cmp ecx, -32768	  ; ffff8000H
312
	   jge .write
313
	   mov ecx, -32768	  ; ffff8000H
314
.write:
315
	   mov ebx, ecx
316
	   shl ebx, 16
317
	   mov bx, cx
318
	   mov [edi], ebx
319
	   add edi, 4
320
 
321
	   add eax, [esp+20]  ;rdt
322
	   cmp eax, [esp+28]  ;r_end
323
	   jb .l1
324
 
325
	   mov ebp, esp
326
 
327
	   mov esi, [src]
328
	   add esi, [r_size]
329
	   sub esi, 32*2
330
	   mov edx, [r_buff]
331
	   mov ecx, 16
332
@@:
333
	   mov ebx, [esi]
334
	   mov [edx], ebx
335
	   add esi, 4
336
	   add edx, 4
337
	   dec ecx
338
	   jnz @B
339
 
340
	   sub edi, [dest]
341
	   mov eax, edi
342
	   ret
343
endp
344
 
345
align 4
346
proc resample_18 stdcall, dest:dword,src:dword,r_buff:dword,\
347
		       r_dt:dword, r_size:dword,r_end:dword
348
 
349
	   mov edi, [r_buff]
350
	   add edi, 32
351
	   mov esi, [src]
352
	   mov ecx, [r_size]
353
	   shr ecx, 2
354
	   rep movsd
355
 
356
	   mov edi, [dest]
357
	   mov edx, [r_buff]
358
	   mov esi, 16
359
 
360
align 16
361
.l1:
362
	   mov ecx, esi
363
	   mov eax, esi
364
	   and ecx, 0x7FFF
365
	   shr eax, 15
366
	   lea eax, [edx+eax]
367
 
368
	   mov bx, word [eax]
369
	   sub bh, 0x80
370
	   sub bl, 0x80
371
	   movsx eax, bh
372
	   shl eax,8
373
	   movsx ebp, bl
374
	   shl ebp,8
375
	   mov ebx, 32768
376
	   imul eax, ecx
377
	   sub ebx, ecx
378
	   imul ebx, ebp
379
	   lea ecx, [ebx+eax+16384]
380
	   sar ecx, 15
381
	   cmp ecx, 32767	  ; 00007fffH
382
	   jle @f
383
	   mov ecx, 32767	  ; 00007fffH
384
	   jmp .write
385
@@:
386
	   cmp ecx, -32768	  ; ffff8000H
387
	   jge .write
388
	   mov ecx, -32768	  ; ffff8000H
389
.write:
390
	   mov ebx, ecx
391
	   shl ebx, 16
392
	   mov bx, cx
393
	   mov [edi], ebx
394
	   add edi, 4
395
 
396
	   add esi, [esp+20]  ;rdt
397
	   cmp esi, [esp+28]  ;r_end
398
	   jb .l1
399
 
400
	   mov ebp, esp
401
 
402
	   mov esi, [src]
403
	   add esi, [r_size]
404
	   sub esi, 32
405
	   mov edx, [r_buff]
406
	   mov ecx, 8
407
@@:
408
	   mov ebx, [esi]
409
	   mov [edx], ebx
410
	   add esi, 4
411
	   add edx, 4
412
	   dec ecx
413
	   jnz @B
414
 
415
	   sub edi, [dest]
416
	   mov eax, edi
417
	   ret
418
endp
419
 
420
align 4
421
proc copy_stream stdcall, dest:dword,src:dword,r_buff:dword,\
422
		       r_dt:dword, r_size:dword,r_end:dword
423
 
281 serge 424
           mov ecx, [r_size]
425
           mov eax, ecx
426
           shr ecx, 2
427
           mov esi, [src]
428
           mov edi, [dest]
429
           rep movsd
430
           mov eax, 16384
431
           ret
168 serge 432
endp
433
 
434
align 4
435
proc resample_2 stdcall, dest:dword,src:dword,r_buff:dword,\
436
		       r_dt:dword, r_size:dword,r_end:dword
437
 
438
	   mov edi, [r_buff]
439
	   add edi, 32*4
440
	   mov esi, [src]
441
	   mov ecx, [r_size]
442
	   shr ecx, 2
443
	   rep movsd	  ;call memcpy
444
 
445
	   mov edx, [r_buff]
446
	   mov edi, [dest]
447
	   mov ebx, [r_dt]
448
	   mov eax, 16
449
	   emms
450
 
451
align 16
452
.l1:
453
	   mov ecx, eax
454
	   mov esi, eax
455
	   and ecx, 0x7FFF
456
	   shr esi, 15
457
	   lea esi, [edx+esi*4]
458
 
459
	   movq mm0, [esi]
460
	   movq mm1, mm0
461
 
462
	   movd mm2, ecx
463
	   punpcklwd mm2, mm2
464
	   movq mm3, qword [m7] ;                  // 0x8000
465
 
466
	   psubw mm3, mm2	;         // 0x8000 - iconst
467
	   punpckldq mm3, mm2
468
 
469
	   pmulhw mm0, mm3
470
	   pmullw mm1, mm3
471
 
472
	   movq mm4, mm1
473
	   punpcklwd mm1, mm0
474
	   punpckhwd mm4, mm0
475
	   paddd mm1, mm4
476
	   psrad  mm1, 15
477
	   packssdw mm1, mm1
478
	   movd [edi], mm1
479
	   add edi, 4
480
 
481
	   add eax, ebx
482
	   cmp eax, [r_end]
483
	   jb .l1
484
	   emms
485
 
486
	   mov esi, [src]
487
	   add esi, [r_size]
488
	   sub esi, 32*4
489
	   mov edx, [r_buff]
490
	   mov ecx, 32
491
@@:
492
	   mov ebx, [esi]
493
	   mov [edx], ebx
494
	   add esi, 4
495
	   add edx, 4
496
	   dec ecx
497
	   jnz @B
498
 
499
	   sub edi, [dest]
500
	   mov eax, edi
501
	   ret
502
endp
503
 
504
align 4
505
proc resample_28 stdcall, dest:dword,src:dword,r_buff:dword,\
506
		       r_dt:dword, r_size:dword,r_end:dword
507
 
508
	   mov edi, [r_buff]
509
	   add edi, 32*2
510
	   mov esi, [src]
511
	   mov ecx, [r_size]
512
	   shr ecx, 2
513
	   rep movsd	  ;call memcpy
514
 
515
	   mov edx, [r_buff]
516
	   mov edi, [dest]
517
	   mov ebx, [r_dt]
518
	   mov eax, 16
519
	   emms
520
	   movq mm7,[mm80]
521
	   movq mm6,[mm_mask]
522
 
523
align 16
524
.l1:
525
	   mov ecx, eax
526
	   mov esi, eax
527
	   and ecx, 0x7FFF
528
	   shr esi, 15
529
	   lea esi, [edx+esi*2]
530
 
531
	   movq mm0, [esi]
532
	   psubb mm0,mm7
533
	   punpcklbw mm0,mm0
534
	   pand mm0,mm6
535
 
536
	   movq mm1, mm0
537
 
538
	   movd mm2, ecx
539
	   punpcklwd mm2, mm2
540
	   movq mm3, qword [m7] ;                  // 0x8000
541
 
542
	   psubw mm3, mm2	;         // 0x8000 - iconst
543
	   punpckldq mm3, mm2
544
 
545
	   pmulhw mm0, mm3
546
	   pmullw mm1, mm3
547
 
548
	   movq mm4, mm1
549
	   punpcklwd mm1, mm0
550
	   punpckhwd mm4, mm0
551
	   paddd mm1, mm4
552
	   psrad  mm1, 15
553
	   packssdw mm1, mm1
554
	   movd [edi], mm1
555
	   add edi, 4
556
 
557
	   add eax, ebx
558
	   cmp eax, [r_end]
559
	   jb .l1
560
	   emms
561
 
562
	   mov esi, [src]
563
	   add esi, [r_size]
564
	   sub esi, 32*2
565
	   mov edx, [r_buff]
566
	   mov ecx, 16
567
@@:
568
	   mov ebx, [esi]
569
	   mov [edx], ebx
570
	   add esi, 4
571
	   add edx, 4
572
	   dec ecx
573
	   jnz @B
574
 
575
	   sub edi, [dest]
576
	   mov eax, edi
577
	   ret
578
endp
579
 
580
 
581
proc m16_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
582
		       r_dt:dword, r_size:dword,r_end:dword
583
 
584
	   mov esi, [src]
585
	   mov edi, [dest]
586
	   mov ecx, [r_size]
587
	   shr ecx,8
588
@@:
589
	   call m16_s_mmx
590
	   add edi, 128
591
	   add esi, 64
592
	   call m16_s_mmx
593
	   add edi, 128
594
	   add esi, 64
595
	   call m16_s_mmx
596
	   add edi, 128
597
	   add esi, 64
598
	   call m16_s_mmx
599
	   add edi, 128
600
	   add esi, 64
601
	   dec ecx
602
	   jnz @b
603
 
604
	   mov eax, [r_size]
605
	   add eax, eax
606
	   ret
607
endp
608
 
609
align 4
610
proc s8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
611
		       r_dt:dword, r_size:dword,r_end:dword
612
 
281 serge 613
           mov esi, [src]
614
           mov edi, [dest]
615
           mov ecx, [r_size]
616
           shr ecx, 7
168 serge 617
 
281 serge 618
           movq mm7, [mm80]
619
           movq mm6, [mm_mask]
168 serge 620
@@:
281 serge 621
           call s8_s_mmx
622
           add edi, 64
623
           add esi, 32
624
           call s8_s_mmx
625
           add edi, 64
626
           add esi, 32
627
           call s8_s_mmx
628
           add edi, 64
629
           add esi, 32
630
           call s8_s_mmx
631
           add edi, 64
632
           add esi, 32
168 serge 633
           dec ecx
281 serge 634
           jnz @b
168 serge 635
 
281 serge 636
           mov eax, [r_size]
637
           add eax, eax
638
           ret
168 serge 639
endp
640
 
641
proc m8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
642
		       r_dt:dword, r_size:dword,r_end:dword
643
 
281 serge 644
           mov esi, [src]
645
           mov edi, [dest]
646
           mov ecx, [r_size]
647
           shr ecx, 6
168 serge 648
 
281 serge 649
           movq mm7, [mm80]
650
           movq mm6, [mm_mask]
168 serge 651
@@:
281 serge 652
           call m8_s_mmx
653
           add edi, 64
654
           add esi, 16
655
           call m8_s_mmx
656
           add edi, 64
657
           add esi, 16
658
           call m8_s_mmx
659
           add edi, 64
660
           add esi, 16
661
           call m8_s_mmx
662
           add edi, 64
663
           add esi, 16
664
                  dec ecx
665
           jnz @b
168 serge 666
 
281 serge 667
           mov eax, [r_size]
668
           add eax, eax
669
           add eax, eax
670
           ret
168 serge 671
endp
672
 
673
align 4
674
proc alloc_mix_buff
675
 
281 serge 676
           bsf eax, [mix_buff_map]
677
           jnz .find
678
           xor eax, eax
679
           ret
168 serge 680
.find:
281 serge 681
           btr [mix_buff_map], eax
682
           shl eax, 9
683
           add eax, [mix_buff]
684
           ret
168 serge 685
endp
686
 
687
proc m16_s_mmx
688
 
689
	   movq    mm0, [esi]
690
	   movq    mm1, mm0
691
	   punpcklwd mm0, mm0
692
	   punpckhwd mm1, mm1
693
	   movq    [edi], mm0
694
	   movq    [edi+8], mm1
695
 
696
	   movq    mm0, [esi+8]
697
	   movq    mm1, mm0
698
	   punpcklwd mm0, mm0
699
	   punpckhwd mm1, mm1
700
	   movq    [edi+16], mm0
701
	   movq    [edi+24], mm1
702
 
703
	   movq    mm0, [esi+16]
704
	   movq    mm1, mm0
705
	   punpcklwd mm0, mm0
706
	   punpckhwd mm1, mm1
707
	   movq    [edi+32], mm0
708
	   movq    [edi+40], mm1
709
 
710
	   movq    mm0, [esi+24]
711
	   movq    mm1, mm0
712
	   punpcklwd mm0, mm0
713
	   punpckhwd mm1, mm1
714
	   movq    [edi+48], mm0
715
	   movq    [edi+56], mm1
716
 
717
	   movq    mm0, [esi+32]
718
	   movq    mm1, mm0
719
	   punpcklwd mm0, mm0
720
	   punpckhwd mm1, mm1
721
	   movq    [edi+64], mm0
722
	   movq    [edi+72], mm1
723
 
724
	   movq    mm0, [esi+40]
725
	   movq    mm1, mm0
726
	   punpcklwd mm0, mm0
727
	   punpckhwd mm1, mm1
728
	   movq    [edi+80], mm0
729
	   movq    [edi+88], mm1
730
 
731
 
732
	   movq    mm0, [esi+48]
733
	   movq    mm1, mm0
734
	   punpcklwd mm0, mm0
735
	   punpckhwd mm1, mm1
736
	   movq    [edi+96], mm0
737
	   movq    [edi+104], mm1
738
 
739
	   movq    mm0, [esi+56]
740
	   movq    mm1, mm0
741
	   punpcklwd mm0, mm0
742
	   punpckhwd mm1, mm1
743
	   movq    [edi+112], mm0
744
	   movq    [edi+120], mm1
745
 
746
	   ret
747
endp
748
 
749
align 4
750
proc s8_s_mmx
751
 
281 serge 752
           movq    mm0, [esi]
753
           psubb   mm0, mm7
754
           movq    mm1, mm0
755
           punpcklbw mm0, mm0
756
           pand mm0, mm6
757
           punpckhbw mm1, mm1
758
           pand mm1, mm6
759
           movq    [edi], mm0
760
           movq    [edi+8], mm1
168 serge 761
 
281 serge 762
           movq    mm0, [esi+8]
763
           psubb   mm0, mm7
764
           movq    mm1, mm0
765
           punpcklbw mm0, mm0
766
           pand mm0, mm6
767
           punpckhbw mm1, mm1
768
           pand mm1, mm6
769
           movq    [edi+16], mm0
770
           movq    [edi+24], mm1
168 serge 771
 
281 serge 772
           movq    mm0, [esi+16]
773
           psubb   mm0, mm7
774
           movq    mm1, mm0
775
           punpcklbw mm0, mm0
776
           pand mm0, mm6
777
           punpckhbw mm1, mm1
778
           pand mm1, mm6
779
           movq    [edi+32], mm0
780
           movq    [edi+40], mm1
168 serge 781
 
281 serge 782
           movq    mm0, [esi+24]
783
           psubb   mm0, mm7
784
           movq    mm1, mm0
785
           punpcklbw mm0, mm0
786
           pand    mm0, mm6
787
           punpckhbw mm1, mm1
788
           pand    mm1, mm6
789
           movq    [edi+48], mm0
790
           movq    [edi+56], mm1
168 serge 791
 
281 serge 792
           ret
168 serge 793
 
794
endp
795
 
796
align 4
797
proc m8_s_mmx
798
 
281 serge 799
           movq    mm0, [esi]
800
           psubb   mm0, mm7
801
           movq    mm1, mm0
802
           punpcklbw mm0, mm0
803
           pand mm0, mm6
804
           punpckhbw mm1, mm1
805
           pand mm1, mm6
806
           movq mm2, mm0
807
           punpcklwd mm0, mm0
808
           punpckhwd mm2, mm2
168 serge 809
 
281 serge 810
           movq mm3, mm1
811
           punpcklwd mm1, mm1
812
           punpckhwd mm3, mm3
168 serge 813
 
281 serge 814
           movq    [edi], mm0
815
           movq    [edi+8], mm2
816
           movq    [edi+16], mm1
817
           movq    [edi+24], mm3
168 serge 818
 
281 serge 819
           movq    mm0, [esi+8]
820
           psubb   mm0, mm7
821
           movq    mm1, mm0
822
           punpcklbw mm0, mm0
823
           pand mm0, mm6
824
           punpckhbw mm1, mm1
825
           pand mm1, mm6
826
           movq mm2, mm0
827
           punpcklwd mm0, mm0
828
           punpckhwd mm2, mm2
168 serge 829
 
281 serge 830
           movq mm3, mm1
831
           punpcklwd mm1, mm1
832
           punpckhwd mm3, mm3
168 serge 833
 
281 serge 834
           movq    [edi+32], mm0
835
           movq    [edi+40], mm2
836
           movq    [edi+48], mm1
837
           movq    [edi+56], mm3
168 serge 838
 
281 serge 839
           ret
168 serge 840
endp
841
 
842
 
843
align 4
844
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
845
 
281 serge 846
           mov edi, [output]
168 serge 847
 
281 serge 848
           stdcall mix_2_1_mmx, edi, [str0],[str1]
849
           add edi, 128
850
           add [str0], 128
851
           add [str1], 128
852
           stdcall mix_2_1_mmx, edi, [str0],[str1]
853
           add edi, 128
854
           add [str0], 128
855
           add [str1], 128
856
           stdcall mix_2_1_mmx, edi, [str0],[str1]
857
           add edi, 128
858
           add [str0], 128
859
           add [str1], 128
860
           stdcall mix_2_1_mmx, edi, [str0],[str1]
168 serge 861
 
281 serge 862
           ret
168 serge 863
endp
864
 
865
 
866
align 4
867
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
868
 
869
	   mov edi, [output]
870
 
871
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
872
	   add edi, 128
873
	   add [str0], 128
874
	   add [str1], 128
875
	   add [str2], 128
876
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
877
	   add edi, 128
878
	   add [str0], 128
879
	   add [str1], 128
880
	   add [str2], 128
881
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
882
	   add edi, 128
883
	   add [str0], 128
884
	   add [str1], 128
885
	   add [str2], 128
886
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
887
 
888
	   ret
889
endp
890
 
891
align 4
892
proc mix_4_1 stdcall, str0:dword, str1:dword,\
893
		      str2:dword, str3:dword
894
 
895
	   local output:DWORD
896
 
897
	   call alloc_mix_buff
898
	   and eax, eax
899
	   jz .err
900
	   mov [output], eax
901
 
902
	   mov edi, eax
903
 
904
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
905
	   add edi, 128
906
	   add [str0], 128
907
	   add [str1], 128
908
	   add [str2], 128
909
	   add [str3], 128
910
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
911
	   add edi, 128
912
	   add [str0], 128
913
	   add [str1], 128
914
	   add [str2], 128
915
	   add [str3], 128
916
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
917
	   add edi, 128
918
	   add [str0], 128
919
	   add [str1], 128
920
	   add [str2], 128
921
	   add [str3], 128
922
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
923
	   mov eax, [output]
924
	   ret
925
.err:
926
	   xor eax, eax
927
	   ret
928
endp
929
 
930
 
931
align 4
932
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
933
			str2:dword, str3:dword
934
 
935
	   mov edi, [output]
936
 
937
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
938
	   add edi, 128
939
	   add [str0], 128
940
	   add [str1], 128
941
	   add [str2], 128
942
	   add [str3], 128
943
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
944
	   add edi, 128
945
	   add [str0], 128
946
	   add [str1], 128
947
	   add [str2], 128
948
	   add [str3], 128
949
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
950
	   add edi, 128
951
	   add [str0], 128
952
	   add [str1], 128
953
	   add [str2], 128
954
	   add [str3], 128
955
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
956
 
957
	   ret
958
endp
959
 
960
align 4
961
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
962
 
281 serge 963
           mov edx, [output]
964
           mov eax, [str0]
965
           mov ecx, [str1]
168 serge 966
 
281 serge 967
           movq mm0, [eax]
968
           paddsw mm0, [ecx]
969
           ; psraw   mm0, 1
970
           movq [edx], mm0
168 serge 971
 
281 serge 972
           movq mm1, [eax+8]
973
           paddsw mm1,[ecx+8]
974
           ; psraw   mm1, 1
975
           movq [edx+8], mm1
168 serge 976
 
281 serge 977
           movq mm2, [eax+16]
978
           paddsw mm2, [ecx+16]
979
           ; psraw   mm2, 1
980
           movq [edx+16], mm2
168 serge 981
 
281 serge 982
           movq mm3, [eax+24]
983
           paddsw mm3, [ecx+24]
984
           ; psraw   mm3, 1
985
           movq [edx+24], mm3
168 serge 986
 
281 serge 987
           movq mm0, [eax+32]
988
           paddsw mm0, [ecx+32]
989
           ; psraw   mm0, 1
990
           movq [edx+32], mm0
168 serge 991
 
281 serge 992
           movq mm1, [eax+40]
993
           paddsw mm1, [ecx+40]
994
           ; psraw   mm1, 1
995
           movq [edx+40], mm1
168 serge 996
 
281 serge 997
           movq mm2, [eax+48]
998
           paddsw mm2, [ecx+48]
999
           ; psraw   mm2, 1
1000
           movq [edx+48], mm2
168 serge 1001
 
281 serge 1002
           movq mm3, [eax+56]
1003
           paddsw mm3, [ecx+56]
1004
           ; psraw   mm3, 1
1005
           movq [edx+56], mm3
168 serge 1006
 
281 serge 1007
           movq mm0, [eax+64]
1008
           paddsw mm0, [ecx+64]
1009
           ; psraw   mm0, 1
1010
           movq [edx+64], mm0
168 serge 1011
 
281 serge 1012
           movq mm1, [eax+72]
1013
           paddsw mm1, [ecx+72]
1014
           ; psraw   mm1, 1
1015
           movq [edx+72], mm1
168 serge 1016
 
281 serge 1017
           movq mm2, [eax+80]
1018
           paddsw mm2, [ecx+80]
1019
           ; psraw   mm2, 1
1020
           movq [edx+80], mm2
168 serge 1021
 
281 serge 1022
           movq mm3, [eax+88]
1023
           paddsw mm3, [ecx+88]
1024
           ; psraw   mm3, 1
1025
           movq [edx+88], mm3
168 serge 1026
 
281 serge 1027
           movq mm0, [eax+96]
1028
           paddsw mm0, [ecx+96]
1029
           ; psraw   mm0, 1
1030
           movq [edx+96], mm0
168 serge 1031
 
281 serge 1032
           movq mm1, [eax+104]
1033
           paddsw mm1, [ecx+104]
1034
           ; psraw   mm1, 1
1035
           movq [edx+104], mm1
168 serge 1036
 
281 serge 1037
           movq mm2, [eax+112]
1038
           paddsw mm2, [ecx+112]
1039
           ; psraw   mm2, 1
1040
           movq [edx+112], mm2
168 serge 1041
 
281 serge 1042
           movq mm3, [eax+120]
1043
           paddsw mm3, [ecx+120]
1044
           ; psraw   mm3, 1
1045
           movq [edx+120], mm3
168 serge 1046
 
281 serge 1047
           ret
168 serge 1048
endp
1049
 
1050
align 4
1051
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
1052
 
281 serge 1053
           mov edx, [output]
1054
           mov eax, [str0]
1055
           mov ebx, [str1]
1056
           mov ecx, [str2]
168 serge 1057
 
281 serge 1058
           movq mm0, [eax]
1059
           paddsw mm0, [ebx]
1060
           paddsw mm0, [ecx]
1061
           movq [edx], mm0
168 serge 1062
 
281 serge 1063
           movq mm1, [eax+8]
1064
           paddsw mm1,[ebx+8]
1065
           paddsw mm1,[ecx+8]
1066
           movq [edx+8], mm1
168 serge 1067
 
281 serge 1068
           movq mm2, [eax+16]
1069
           paddsw mm2, [ebx+16]
1070
           paddsw mm2, [ecx+16]
1071
           movq [edx+16], mm2
168 serge 1072
 
281 serge 1073
           movq mm3, [eax+24]
1074
           paddsw mm3, [ebx+24]
1075
           paddsw mm3, [ecx+24]
1076
           movq [edx+24], mm3
168 serge 1077
 
281 serge 1078
           movq mm0, [eax+32]
1079
           paddsw mm0, [ebx+32]
1080
           paddsw mm0, [ecx+32]
1081
           movq [edx+32], mm0
168 serge 1082
 
281 serge 1083
           movq mm1, [eax+40]
1084
           paddsw mm1, [ebx+40]
1085
           paddsw mm1, [ecx+40]
1086
           movq [edx+40], mm1
168 serge 1087
 
281 serge 1088
           movq mm2, [eax+48]
1089
           paddsw mm2, [ebx+48]
1090
           paddsw mm2, [ecx+48]
1091
           movq [edx+48], mm2
168 serge 1092
 
281 serge 1093
           movq mm3, [eax+56]
1094
           paddsw mm3, [ebx+56]
1095
           paddsw mm3, [ecx+56]
1096
           movq [edx+56], mm3
168 serge 1097
 
281 serge 1098
           movq mm0, [eax+64]
1099
           paddsw mm0, [ebx+64]
1100
           paddsw mm0, [ecx+64]
1101
           movq [edx+64], mm0
168 serge 1102
 
281 serge 1103
           movq mm1, [eax+72]
1104
           paddsw mm1, [ebx+72]
1105
           paddsw mm1, [ecx+72]
1106
           movq [edx+72], mm1
168 serge 1107
 
281 serge 1108
           movq mm2, [eax+80]
1109
           paddsw mm2, [ebx+80]
1110
           paddsw mm2, [ecx+80]
1111
           movq [edx+80], mm2
168 serge 1112
 
281 serge 1113
           movq mm3, [eax+88]
1114
           paddsw mm3, [ebx+88]
1115
           paddsw mm3, [ecx+88]
1116
           movq [edx+88], mm3
168 serge 1117
 
281 serge 1118
           movq mm0, [eax+96]
1119
           paddsw mm0, [ebx+96]
1120
           paddsw mm0, [ecx+96]
1121
           movq [edx+96], mm0
168 serge 1122
 
281 serge 1123
           movq mm1, [eax+104]
1124
           paddsw mm1, [ebx+104]
1125
           paddsw mm1, [ecx+104]
1126
           movq [edx+104], mm1
168 serge 1127
 
281 serge 1128
           movq mm2, [eax+112]
1129
           paddsw mm2, [ebx+112]
1130
           paddsw mm2, [ecx+112]
1131
           movq [edx+112], mm2
168 serge 1132
 
281 serge 1133
           movq mm3, [eax+120]
1134
           paddsw mm3, [ebx+120]
1135
           paddsw mm3, [ecx+120]
1136
           movq [edx+120], mm3
168 serge 1137
 
281 serge 1138
           ret
168 serge 1139
endp
1140
 
1141
align 4
1142
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
281 serge 1143
                          str2:dword, str3:dword
168 serge 1144
 
281 serge 1145
           mov edx, [output]
1146
           mov esi, [str0]
1147
           mov eax, [str1]
1148
           mov ebx, [str2]
1149
           mov ecx, [str3]
168 serge 1150
 
281 serge 1151
           movq mm0, [esi]
1152
           movq mm1, [eax]
1153
           paddsw mm0, [ebx]
1154
           paddsw mm1, [ecx]
1155
           paddsw mm0, mm1
1156
           movq [edx], mm0
168 serge 1157
 
281 serge 1158
           movq mm2, [esi+8]
1159
           movq mm3, [eax+8]
1160
           paddsw mm2, [ebx+8]
1161
           paddsw mm3, [ecx+8]
1162
           paddsw mm2, mm3
1163
           movq [edx+8], mm2
168 serge 1164
 
281 serge 1165
           movq mm0, [esi+16]
1166
           movq mm1, [eax+16]
1167
           paddsw mm0, [ebx+16]
1168
           paddsw mm1, [ecx+16]
1169
           paddsw mm0, mm1
1170
           movq [edx+16], mm0
168 serge 1171
 
281 serge 1172
           movq mm2, [esi+24]
1173
           movq mm3, [eax+24]
1174
           paddsw mm2, [ebx+24]
1175
           paddsw mm3, [ecx+24]
1176
           paddsw mm2, mm3
1177
           movq [edx+24], mm2
168 serge 1178
 
281 serge 1179
           movq mm0, [esi+32]
1180
           movq mm1, [eax+32]
1181
           paddsw mm0, [ebx+32]
1182
           paddsw mm1, [ecx+32]
1183
           paddsw mm0, mm1
1184
           movq [edx+32], mm0
168 serge 1185
 
281 serge 1186
           movq mm2, [esi+40]
1187
           movq mm3, [eax+40]
1188
           paddsw mm2, [ebx+40]
1189
           paddsw mm3, [ecx+40]
1190
           paddsw mm2, mm3
1191
           movq [edx+40], mm2
168 serge 1192
 
281 serge 1193
           movq mm0, [esi+48]
1194
           movq mm1, [eax+48]
1195
           paddsw mm0, [ebx+48]
1196
           paddsw mm1, [ecx+48]
1197
           paddsw mm0, mm1
1198
           movq [edx+48], mm0
168 serge 1199
 
281 serge 1200
           movq mm2, [esi+56]
1201
           movq mm3, [eax+56]
1202
           paddsw mm2, [ebx+56]
1203
           paddsw mm3, [ecx+56]
1204
           paddsw mm2, mm3
1205
           movq [edx+56], mm2
168 serge 1206
 
281 serge 1207
           movq mm0, [esi+64]
1208
           movq mm1, [eax+64]
1209
           paddsw mm0, [ebx+64]
1210
           paddsw mm1, [ecx+64]
1211
           paddsw mm0, mm1
1212
           movq [edx+64], mm0
168 serge 1213
 
281 serge 1214
           movq mm2, [esi+72]
1215
           movq mm3, [eax+72]
1216
           paddsw mm2, [ebx+72]
1217
           paddsw mm3, [ecx+72]
1218
           paddsw mm2, mm3
1219
           movq [edx+72], mm2
168 serge 1220
 
281 serge 1221
           movq mm2, [esi+80]
1222
           movq mm3, [eax+80]
1223
           paddsw mm2, [ebx+80]
1224
           paddsw mm3, [ecx+80]
1225
           paddsw mm2, mm3
1226
           movq [edx+80], mm2
168 serge 1227
 
281 serge 1228
           movq mm2, [esi+88]
1229
           movq mm3, [eax+88]
1230
           paddsw mm2, [ebx+88]
1231
           paddsw mm3, [ecx+88]
1232
           paddsw mm2, mm3
1233
           movq [edx+88], mm2
168 serge 1234
 
281 serge 1235
           movq mm2, [esi+96]
1236
           movq mm3, [eax+96]
1237
           paddsw mm2, [ebx+96]
1238
           paddsw mm3, [ecx+96]
1239
           paddsw mm2, mm3
1240
           movq [edx+96], mm2
168 serge 1241
 
281 serge 1242
           movq mm2, [esi+104]
1243
           movq mm3, [eax+104]
1244
           paddsw mm2, [ebx+104]
1245
           paddsw mm3, [ecx+104]
1246
           paddsw mm2, mm3
1247
           movq [edx+104], mm2
168 serge 1248
 
281 serge 1249
           movq mm2, [esi+112]
1250
           movq mm3, [eax+112]
1251
           paddsw mm2, [ebx+112]
1252
           paddsw mm3, [ecx+112]
1253
           paddsw mm2, mm3
1254
           movq [edx+112], mm2
168 serge 1255
 
281 serge 1256
           movq mm2, [esi+120]
1257
           movq mm3, [eax+120]
1258
           paddsw mm2, [ebx+120]
1259
           paddsw mm3, [ecx+120]
1260
           paddsw mm2, mm3
1261
           movq [edx+120], mm2
168 serge 1262
 
281 serge 1263
           ret
168 serge 1264
endp
1265
 
1266
align 4
1267
proc copy_mem stdcall, output:dword, input:dword
1268
 
1269
	   mov edi, [output]
1270
	   mov esi, [input]
1271
	   mov ecx, 0x80
1272
.l1:
1273
	   mov eax, [esi]
1274
	   mov [edi], eax
1275
	   add esi, 4
1276
	   add edi, 4
1277
	   loop .l1
1278
 
1279
	   ret
1280
endp
1281
 
1282
proc memcpy
1283
@@:
1284
	   mov eax, [esi]
1285
	   mov [edi], eax
1286
	   add esi, 4
1287
	   add edi, 4
1288
	   dec ecx
1289
	   jnz @B
1290
	   ret
1291
endp
1292