Subversion Repositories Kolibri OS

Rev

Rev 281 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
168 serge 1
;
2
;   This file is part of the Infinity sound library.
3
;   (C) copyright Serge 2006
4
;   email: infinity_sound@mail.ru
5
;
6
;   This program is free software; you can redistribute it and/or modify
7
;   it under the terms of the GNU General Public License as published by
8
;   the Free Software Foundation; either version 2 of the License, or
9
;   (at your option) any later version.
10
;
11
;   This program is distributed in the hope that it will be useful,
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;   GNU General Public License for more details.
15
 
16
align 4
17
proc new_mix stdcall, output:dword
281 serge 18
           locals
19
             mixCounter  dd ?
20
             mixIndex  dd ?
21
             streamIndex dd ?
22
             inputCount  dd ?
23
             main_count  dd ?
24
             blockCount  dd ?
25
             mix_out  dd ?
26
           endl
168 serge 27
 
281 serge 28
           call prepare_playlist
168 serge 29
 
281 serge 30
           cmp [play_count], 0
31
           je .exit
188 serge 32
           call FpuSave
281 serge 33
           mov [main_count], 32;
168 serge 34
.l00:
281 serge 35
           mov [mix_buff_map], 0x0000FFFF;
36
           xor eax, eax
37
           mov [mixCounter], eax
38
           mov [mixIndex],eax
39
           mov [streamIndex], eax;
40
           mov ebx, [play_count]
41
           mov [inputCount], ebx
168 serge 42
.l0:
281 serge 43
           mov ecx, 4
168 serge 44
.l1:
281 serge 45
           mov ebx, [streamIndex]
46
           mov esi, [play_list+ebx*4]
47
           mov eax, [esi+STREAM.work_read]
48
           add [esi+STREAM.work_read], 512
168 serge 49
 
281 serge 50
           mov ebx, [mixIndex]
51
           mov [mix_input+ebx*4], eax
52
           inc [mixCounter]
53
           inc [mixIndex]
54
           inc [streamIndex]
55
           dec [inputCount]
56
           jz .m2
168 serge 57
 
281 serge 58
           dec ecx
59
           jnz .l1
168 serge 60
 
281 serge 61
           cmp [mixCounter], 4
62
           jnz .m2
168 serge 63
 
281 serge 64
           stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
65
           sub [mixIndex],4
66
           mov ebx, [mixIndex]
67
           mov [mix_input+ebx*4], eax
68
           inc [mixIndex]
69
           mov [mixCounter], 0
168 serge 70
 
281 serge 71
           cmp [inputCount], 0
72
           jnz .l0
168 serge 73
.m2:
281 serge 74
           cmp [mixIndex], 1
75
           jne @f
76
           stdcall copy_mem, [output], [mix_input]
77
           jmp .m3
168 serge 78
@@:
281 serge 79
           cmp [mixIndex], 2
80
           jne @f
81
           stdcall mix_2_1, [output], [mix_input], [mix_input+4]
82
           jmp .m3
168 serge 83
@@:
281 serge 84
           cmp [mixIndex], 3
85
           jne @f
86
           stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
87
           jmp .m3
168 serge 88
@@:
281 serge 89
           stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
168 serge 90
.m3:
281 serge 91
           add [output],512
168 serge 92
 
281 serge 93
           sub [main_count], 1
94
           jnz .l00
168 serge 95
 
281 serge 96
           call update_stream
203 serge 97
           emms
188 serge 98
           call FpuRestore
281 serge 99
           ret
168 serge 100
.exit:
281 serge 101
           mov edi, [output]
102
           mov ecx, 0x1000
103
           xor eax, eax
104
           cld
105
           rep stosd
106
           ret
168 serge 107
endp
108
 
109
align 4
110
proc update_stream
281 serge 111
           locals
170 serge 112
             stream_index  dd ?
227 serge 113
             ev_code       dd ?  ;EVENT
114
             ev_offs       dd ?
115
                           rd 4
281 serge 116
           endl
168 serge 117
 
281 serge 118
           mov [stream_index], 0
168 serge 119
.l1:
281 serge 120
           mov edx, [stream_index]
121
           mov esi, [play_list+edx*4]
168 serge 122
 
281 serge 123
           mov eax, [esi+STREAM.work_read]
124
           cmp eax, [esi+STREAM.work_top]
125
           jb @f
126
           mov eax, [esi+STREAM.work_buff]
168 serge 127
@@:
281 serge 128
           mov [esi+STREAM.work_read], eax
168 serge 129
 
281 serge 130
           cmp [esi+STREAM.format], PCM_2_16_48
131
           je .copy
168 serge 132
 
281 serge 133
           sub [esi+STREAM.work_count], 16384
168 serge 134
 
281 serge 135
           cmp [esi+STREAM.work_count], 32768
136
           ja @f
168 serge 137
 
281 serge 138
           stdcall refill, esi
168 serge 139
@@:
281 serge 140
           inc [stream_index]
141
           dec [play_count]
142
           jnz .l1
143
           ret
168 serge 144
.copy:
281 serge 145
           mov ebx, esi
146
           mov edi, [ebx+STREAM.work_write]
147
           cmp edi, [ebx+STREAM.work_top]
148
           jb @f
149
           mov edi, [ebx+STREAM.work_buff]
150
           mov [ebx+STREAM.work_write], edi
168 serge 151
@@:
281 serge 152
           mov esi, [ebx+STREAM.curr_seg]
153
           mov ecx, 16384/4
154
           cld
155
           rep movsd
168 serge 156
 
157
           mov [ebx+STREAM.work_write], edi
158
 
281 serge 159
           cmp esi, [ebx+STREAM.lim_0]
160
           jb @f
168 serge 161
 
281 serge 162
           mov esi, [ebx+STREAM.seg_0]
163
           mov eax, [ebx+STREAM.lim_0]
164
           xchg esi, [ebx+STREAM.seg_1]
165
           xchg eax, [ebx+STREAM.lim_1]
166
           mov [ebx+STREAM.seg_0], esi
167
           mov [ebx+STREAM.lim_0], eax
168 serge 168
@@:
281 serge 169
           mov [ebx+STREAM.curr_seg], esi
168 serge 170
 
281 serge 171
           xor ecx, ecx
172
           cmp esi, [ebx+STREAM.notify_off2]
173
           je @f
168 serge 174
 
281 serge 175
           mov ecx,0x8000
176
           cmp esi, [ebx+STREAM.notify_off1]
177
           je @f
168 serge 178
 
281 serge 179
           inc [stream_index]
180
           dec [play_count]
181
           jnz .l1
182
           ret
168 serge 183
@@:
227 serge 184
           mov [ev_code], 0xFF000001
185
           mov [ev_offs], ecx
168 serge 186
           mov eax, [ebx+STREAM.notify_task]
227 serge 187
 
188
           lea edx, [ev_code]
189
           push ebx
190
           stdcall SendEvent, eax, edx
191
           pop ebx
168 serge 192
           test eax, eax
227 serge 193
           jnz .l_end
194
 
168 serge 195
           not eax
196
           mov [ebx+STREAM.notify_task], eax      ;-1
197
.l_end:
281 serge 198
           inc [stream_index]
199
           dec [play_count]
200
           jnz .l1
201
           ret
168 serge 202
endp
203
 
204
align 4
205
proc refill stdcall, str:dword
281 serge 206
           locals
227 serge 207
             ev_code       dd ?  ;EVENT
208
             ev_offs       dd ?
209
                           rd 4
281 serge 210
           endl
168 serge 211
 
281 serge 212
           mov ebx, [str]
213
           mov ecx, [ebx+STREAM.work_write]
214
           cmp ecx, [ebx+STREAM.work_top]
215
           jbe .m2
216
           mov esi, [ebx+STREAM.work_top]
217
           sub ecx, esi
218
           mov edi, [ebx+STREAM.work_buff]
219
           shr ecx, 2
220
           rep movsd    ;call memcpy
168 serge 221
 
281 serge 222
           mov [ebx+STREAM.work_write], edi
168 serge 223
.m2:
281 serge 224
           mov esi, [ebx+STREAM.curr_seg]
225
           mov edi, [ebx+STREAM.work_write]
168 serge 226
 
285 serge 227
           stdcall [ebx+STREAM.resample], edi, esi, \
281 serge 228
           [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
168 serge 229
 
281 serge 230
           mov ebx, [str]
168 serge 231
 
281 serge 232
           add [ebx+STREAM.work_count], eax;
233
           add [ebx+STREAM.work_write], eax;
168 serge 234
 
281 serge 235
           mov eax, [ebx+STREAM.curr_seg]
236
           add eax, [ebx+STREAM.r_size]
237
           cmp eax, [ebx+STREAM.lim_0]
238
           jb @f
239
 
285 serge 240
           mov esi, [ebx+STREAM.seg_0]
241
           lea edi, [esi-128]
242
           add esi, 0x7F80
243
           mov ecx, 128/4
244
           cld
245
           rep movsd
246
 
281 serge 247
           mov eax, [ebx+STREAM.seg_0]
248
           mov ecx, [ebx+STREAM.lim_0]
249
           xchg eax, [ebx+STREAM.seg_1]
250
           xchg ecx, [ebx+STREAM.lim_1]
251
           mov [ebx+STREAM.seg_0], eax
252
           mov [ebx+STREAM.lim_0], ecx
168 serge 253
@@:
281 serge 254
           mov [ebx+STREAM.curr_seg], eax
168 serge 255
 
281 serge 256
           xor ecx, ecx
257
           cmp eax, [ebx+STREAM.notify_off2]
258
           je @f
168 serge 259
 
281 serge 260
           mov ecx,0x8000
261
           cmp eax, [ebx+STREAM.notify_off1]
262
           je @f
263
           ret
168 serge 264
@@:
227 serge 265
           mov [ev_code], 0xFF000001
266
           mov [ev_offs], ecx
168 serge 267
           mov eax, [ebx+STREAM.notify_task]
227 serge 268
 
269
           lea edx, [ev_code]
270
           push ebx
271
           stdcall SendEvent, eax, edx
272
           pop ebx
168 serge 273
           test eax, eax
227 serge 274
           jnz @F
168 serge 275
           not eax
276
           mov [ebx+STREAM.notify_task], eax      ;-1
277
@@:
278
	   ret
279
endp
280
 
281
align 4
285 serge 282
proc resample_1 stdcall, dest:dword,src:dword,\
168 serge 283
		       r_dt:dword, r_size:dword,r_end:dword
284
 
285 serge 285
; dest equ esp+8
286
; src  equ esp+12
287
; r_dt equ esp+16
288
; r_size equ esp+20
289
;r_end equ esp+24
168 serge 290
 
285 serge 291
           mov edi, [dest]
292
           mov edx, [src]
293
           sub edx, 32*2
294
           mov eax, 16
168 serge 295
 
296
align 16
297
.l1:
298
	   mov ecx, eax
299
	   mov esi, eax
300
	   and ecx, 0x7FFF
301
	   shr esi, 15
302
	   lea esi, [edx+esi*2]
303
 
304
	   movsx ebp, word [esi]
305
	   movsx esi, word [esi+2]
306
	   mov ebx, 32768
307
	   imul esi, ecx
308
	   sub ebx, ecx
309
	   imul ebx, ebp
310
	   lea ecx, [ebx+esi+16384]
311
	   sar ecx, 15
312
	   cmp ecx, 32767	  ; 00007fffH
313
	   jle @f
314
	   mov ecx, 32767	  ; 00007fffH
315
	   jmp .write
316
@@:
317
	   cmp ecx, -32768	  ; ffff8000H
318
	   jge .write
319
	   mov ecx, -32768	  ; ffff8000H
320
.write:
321
	   mov ebx, ecx
322
	   shl ebx, 16
323
	   mov bx, cx
324
	   mov [edi], ebx
325
	   add edi, 4
326
 
285 serge 327
    add eax, [esp+16]
328
    cmp eax, [esp+24]
168 serge 329
	   jb .l1
330
 
331
	   mov ebp, esp
332
 
333
	   sub edi, [dest]
334
	   mov eax, edi
335
	   ret
336
endp
337
 
338
align 4
285 serge 339
proc resample_18 stdcall, dest:dword,src:dword,\
168 serge 340
		       r_dt:dword, r_size:dword,r_end:dword
341
 
342
 
343
	   mov edi, [dest]
285 serge 344
           mov edx, [src]
345
           sub edx, 32
346
 
168 serge 347
	   mov esi, 16
348
 
349
align 16
350
.l1:
351
	   mov ecx, esi
352
	   mov eax, esi
353
	   and ecx, 0x7FFF
354
	   shr eax, 15
355
	   lea eax, [edx+eax]
356
 
357
	   mov bx, word [eax]
358
	   sub bh, 0x80
359
	   sub bl, 0x80
360
	   movsx eax, bh
361
	   shl eax,8
362
	   movsx ebp, bl
363
	   shl ebp,8
364
	   mov ebx, 32768
365
	   imul eax, ecx
366
	   sub ebx, ecx
367
	   imul ebx, ebp
368
	   lea ecx, [ebx+eax+16384]
369
	   sar ecx, 15
370
	   cmp ecx, 32767	  ; 00007fffH
371
	   jle @f
372
	   mov ecx, 32767	  ; 00007fffH
373
	   jmp .write
374
@@:
375
	   cmp ecx, -32768	  ; ffff8000H
376
	   jge .write
377
	   mov ecx, -32768	  ; ffff8000H
378
.write:
379
	   mov ebx, ecx
380
	   shl ebx, 16
381
	   mov bx, cx
382
	   mov [edi], ebx
383
	   add edi, 4
384
 
285 serge 385
    add esi, [esp+16]
386
    cmp esi, [esp+24]
168 serge 387
	   jb .l1
388
 
389
	   mov ebp, esp
390
	   sub edi, [dest]
391
	   mov eax, edi
392
	   ret
393
endp
394
 
395
align 4
285 serge 396
proc copy_stream stdcall, dest:dword,src:dword,\
168 serge 397
		       r_dt:dword, r_size:dword,r_end:dword
398
 
281 serge 399
           mov ecx, [r_size]
400
           mov eax, ecx
401
           shr ecx, 2
402
           mov esi, [src]
403
           mov edi, [dest]
404
           rep movsd
405
           mov eax, 16384
406
           ret
168 serge 407
endp
408
 
409
align 4
285 serge 410
proc resample_2 stdcall, dest:dword,src:dword,\
168 serge 411
		       r_dt:dword, r_size:dword,r_end:dword
412
 
285 serge 413
           mov edx, [src]
414
           sub edx, 32*4
415
           mov edi, [dest]
416
           mov ebx, [r_dt]
417
           mov eax, 16
418
           emms
168 serge 419
 
420
align 16
421
.l1:
285 serge 422
           mov ecx, eax
423
           mov esi, eax
424
           and ecx, 0x7FFF
425
           shr esi, 15
426
           lea esi, [edx+esi*4]
168 serge 427
 
285 serge 428
           movq mm0, [esi]
429
           movq mm1, mm0
168 serge 430
 
285 serge 431
           movd mm2, ecx
432
           punpcklwd mm2, mm2
433
           movq mm3, qword [m7]    ;0x8000
168 serge 434
 
285 serge 435
           psubw mm3, mm2 ;        ;0x8000 - iconst
436
           punpckldq mm3, mm2
168 serge 437
 
285 serge 438
           pmulhw mm0, mm3
439
           pmullw mm1, mm3
168 serge 440
 
285 serge 441
           movq mm4, mm1
442
           punpcklwd mm1, mm0
443
           punpckhwd mm4, mm0
444
           paddd mm1, mm4
445
           psrad  mm1, 15
446
           packssdw mm1, mm1
447
           movd [edi], mm1
448
           add edi, 4
168 serge 449
 
285 serge 450
           add eax, ebx
451
           cmp eax, [r_end]
452
           jb .l1
453
           emms
168 serge 454
 
285 serge 455
           sub edi, [dest]
456
           mov eax, edi
457
           ret
168 serge 458
endp
459
 
460
align 4
285 serge 461
proc resample_28 stdcall, dest:dword,src:dword,\
168 serge 462
		       r_dt:dword, r_size:dword,r_end:dword
463
 
285 serge 464
           mov edx, [src]
465
           sub edx, 32*2
466
           mov edi, [dest]
467
           mov ebx, [r_dt]
468
           mov eax, 16
469
           emms
470
           movq mm7,[mm80]
471
           movq mm6,[mm_mask]
168 serge 472
 
473
align 16
474
.l1:
475
	   mov ecx, eax
476
	   mov esi, eax
477
	   and ecx, 0x7FFF
478
	   shr esi, 15
479
	   lea esi, [edx+esi*2]
480
 
481
	   movq mm0, [esi]
482
	   psubb mm0,mm7
483
	   punpcklbw mm0,mm0
484
	   pand mm0,mm6
485
 
486
	   movq mm1, mm0
487
 
488
	   movd mm2, ecx
489
	   punpcklwd mm2, mm2
490
	   movq mm3, qword [m7] ;                  // 0x8000
491
 
492
	   psubw mm3, mm2	;         // 0x8000 - iconst
493
	   punpckldq mm3, mm2
494
 
495
	   pmulhw mm0, mm3
496
	   pmullw mm1, mm3
497
 
498
	   movq mm4, mm1
499
	   punpcklwd mm1, mm0
500
	   punpckhwd mm4, mm0
501
	   paddd mm1, mm4
502
	   psrad  mm1, 15
503
	   packssdw mm1, mm1
504
	   movd [edi], mm1
505
	   add edi, 4
506
 
507
	   add eax, ebx
508
	   cmp eax, [r_end]
509
	   jb .l1
510
	   emms
511
 
512
 
513
	   sub edi, [dest]
514
	   mov eax, edi
515
	   ret
516
endp
517
 
518
 
285 serge 519
proc m16_stereo stdcall, dest:dword,src:dword,\
168 serge 520
		       r_dt:dword, r_size:dword,r_end:dword
521
 
522
	   mov esi, [src]
523
	   mov edi, [dest]
524
	   mov ecx, [r_size]
525
	   shr ecx,8
526
@@:
527
	   call m16_s_mmx
528
	   add edi, 128
529
	   add esi, 64
530
	   call m16_s_mmx
531
	   add edi, 128
532
	   add esi, 64
533
	   call m16_s_mmx
534
	   add edi, 128
535
	   add esi, 64
536
	   call m16_s_mmx
537
	   add edi, 128
538
	   add esi, 64
539
	   dec ecx
540
	   jnz @b
541
 
542
	   mov eax, [r_size]
543
	   add eax, eax
544
	   ret
545
endp
546
 
547
align 4
285 serge 548
proc s8_stereo stdcall, dest:dword,src:dword,\
168 serge 549
		       r_dt:dword, r_size:dword,r_end:dword
550
 
281 serge 551
           mov esi, [src]
552
           mov edi, [dest]
553
           mov ecx, [r_size]
554
           shr ecx, 7
168 serge 555
 
281 serge 556
           movq mm7, [mm80]
557
           movq mm6, [mm_mask]
168 serge 558
@@:
281 serge 559
           call s8_s_mmx
560
           add edi, 64
561
           add esi, 32
562
           call s8_s_mmx
563
           add edi, 64
564
           add esi, 32
565
           call s8_s_mmx
566
           add edi, 64
567
           add esi, 32
568
           call s8_s_mmx
569
           add edi, 64
570
           add esi, 32
168 serge 571
           dec ecx
281 serge 572
           jnz @b
168 serge 573
 
281 serge 574
           mov eax, [r_size]
575
           add eax, eax
576
           ret
168 serge 577
endp
578
 
285 serge 579
proc m8_stereo stdcall, dest:dword,src:dword,\
168 serge 580
		       r_dt:dword, r_size:dword,r_end:dword
581
 
281 serge 582
           mov esi, [src]
583
           mov edi, [dest]
584
           mov ecx, [r_size]
585
           shr ecx, 6
168 serge 586
 
281 serge 587
           movq mm7, [mm80]
588
           movq mm6, [mm_mask]
168 serge 589
@@:
281 serge 590
           call m8_s_mmx
591
           add edi, 64
592
           add esi, 16
593
           call m8_s_mmx
594
           add edi, 64
595
           add esi, 16
596
           call m8_s_mmx
597
           add edi, 64
598
           add esi, 16
599
           call m8_s_mmx
600
           add edi, 64
601
           add esi, 16
602
                  dec ecx
603
           jnz @b
168 serge 604
 
281 serge 605
           mov eax, [r_size]
606
           add eax, eax
607
           add eax, eax
608
           ret
168 serge 609
endp
610
 
611
align 4
612
proc alloc_mix_buff
613
 
281 serge 614
           bsf eax, [mix_buff_map]
615
           jnz .find
616
           xor eax, eax
617
           ret
168 serge 618
.find:
281 serge 619
           btr [mix_buff_map], eax
620
           shl eax, 9
621
           add eax, [mix_buff]
622
           ret
168 serge 623
endp
624
 
625
proc m16_s_mmx
626
 
627
	   movq    mm0, [esi]
628
	   movq    mm1, mm0
629
	   punpcklwd mm0, mm0
630
	   punpckhwd mm1, mm1
631
	   movq    [edi], mm0
632
	   movq    [edi+8], mm1
633
 
634
	   movq    mm0, [esi+8]
635
	   movq    mm1, mm0
636
	   punpcklwd mm0, mm0
637
	   punpckhwd mm1, mm1
638
	   movq    [edi+16], mm0
639
	   movq    [edi+24], mm1
640
 
641
	   movq    mm0, [esi+16]
642
	   movq    mm1, mm0
643
	   punpcklwd mm0, mm0
644
	   punpckhwd mm1, mm1
645
	   movq    [edi+32], mm0
646
	   movq    [edi+40], mm1
647
 
648
	   movq    mm0, [esi+24]
649
	   movq    mm1, mm0
650
	   punpcklwd mm0, mm0
651
	   punpckhwd mm1, mm1
652
	   movq    [edi+48], mm0
653
	   movq    [edi+56], mm1
654
 
655
	   movq    mm0, [esi+32]
656
	   movq    mm1, mm0
657
	   punpcklwd mm0, mm0
658
	   punpckhwd mm1, mm1
659
	   movq    [edi+64], mm0
660
	   movq    [edi+72], mm1
661
 
662
	   movq    mm0, [esi+40]
663
	   movq    mm1, mm0
664
	   punpcklwd mm0, mm0
665
	   punpckhwd mm1, mm1
666
	   movq    [edi+80], mm0
667
	   movq    [edi+88], mm1
668
 
669
 
670
	   movq    mm0, [esi+48]
671
	   movq    mm1, mm0
672
	   punpcklwd mm0, mm0
673
	   punpckhwd mm1, mm1
674
	   movq    [edi+96], mm0
675
	   movq    [edi+104], mm1
676
 
677
	   movq    mm0, [esi+56]
678
	   movq    mm1, mm0
679
	   punpcklwd mm0, mm0
680
	   punpckhwd mm1, mm1
681
	   movq    [edi+112], mm0
682
	   movq    [edi+120], mm1
683
 
684
	   ret
685
endp
686
 
687
align 4
688
proc s8_s_mmx
689
 
281 serge 690
           movq    mm0, [esi]
691
           psubb   mm0, mm7
692
           movq    mm1, mm0
693
           punpcklbw mm0, mm0
694
           pand mm0, mm6
695
           punpckhbw mm1, mm1
696
           pand mm1, mm6
697
           movq    [edi], mm0
698
           movq    [edi+8], mm1
168 serge 699
 
281 serge 700
           movq    mm0, [esi+8]
701
           psubb   mm0, mm7
702
           movq    mm1, mm0
703
           punpcklbw mm0, mm0
704
           pand mm0, mm6
705
           punpckhbw mm1, mm1
706
           pand mm1, mm6
707
           movq    [edi+16], mm0
708
           movq    [edi+24], mm1
168 serge 709
 
281 serge 710
           movq    mm0, [esi+16]
711
           psubb   mm0, mm7
712
           movq    mm1, mm0
713
           punpcklbw mm0, mm0
714
           pand mm0, mm6
715
           punpckhbw mm1, mm1
716
           pand mm1, mm6
717
           movq    [edi+32], mm0
718
           movq    [edi+40], mm1
168 serge 719
 
281 serge 720
           movq    mm0, [esi+24]
721
           psubb   mm0, mm7
722
           movq    mm1, mm0
723
           punpcklbw mm0, mm0
724
           pand    mm0, mm6
725
           punpckhbw mm1, mm1
726
           pand    mm1, mm6
727
           movq    [edi+48], mm0
728
           movq    [edi+56], mm1
168 serge 729
 
281 serge 730
           ret
168 serge 731
 
732
endp
733
 
734
align 4
735
proc m8_s_mmx
736
 
281 serge 737
           movq    mm0, [esi]
738
           psubb   mm0, mm7
739
           movq    mm1, mm0
740
           punpcklbw mm0, mm0
741
           pand mm0, mm6
742
           punpckhbw mm1, mm1
743
           pand mm1, mm6
744
           movq mm2, mm0
745
           punpcklwd mm0, mm0
746
           punpckhwd mm2, mm2
168 serge 747
 
281 serge 748
           movq mm3, mm1
749
           punpcklwd mm1, mm1
750
           punpckhwd mm3, mm3
168 serge 751
 
281 serge 752
           movq    [edi], mm0
753
           movq    [edi+8], mm2
754
           movq    [edi+16], mm1
755
           movq    [edi+24], mm3
168 serge 756
 
281 serge 757
           movq    mm0, [esi+8]
758
           psubb   mm0, mm7
759
           movq    mm1, mm0
760
           punpcklbw mm0, mm0
761
           pand mm0, mm6
762
           punpckhbw mm1, mm1
763
           pand mm1, mm6
764
           movq mm2, mm0
765
           punpcklwd mm0, mm0
766
           punpckhwd mm2, mm2
168 serge 767
 
281 serge 768
           movq mm3, mm1
769
           punpcklwd mm1, mm1
770
           punpckhwd mm3, mm3
168 serge 771
 
281 serge 772
           movq    [edi+32], mm0
773
           movq    [edi+40], mm2
774
           movq    [edi+48], mm1
775
           movq    [edi+56], mm3
168 serge 776
 
281 serge 777
           ret
168 serge 778
endp
779
 
780
 
781
align 4
782
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
783
 
281 serge 784
           mov edi, [output]
168 serge 785
 
281 serge 786
           stdcall mix_2_1_mmx, edi, [str0],[str1]
787
           add edi, 128
788
           add [str0], 128
789
           add [str1], 128
790
           stdcall mix_2_1_mmx, edi, [str0],[str1]
791
           add edi, 128
792
           add [str0], 128
793
           add [str1], 128
794
           stdcall mix_2_1_mmx, edi, [str0],[str1]
795
           add edi, 128
796
           add [str0], 128
797
           add [str1], 128
798
           stdcall mix_2_1_mmx, edi, [str0],[str1]
168 serge 799
 
281 serge 800
           ret
168 serge 801
endp
802
 
803
 
804
align 4
805
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
806
 
807
	   mov edi, [output]
808
 
809
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
810
	   add edi, 128
811
	   add [str0], 128
812
	   add [str1], 128
813
	   add [str2], 128
814
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
815
	   add edi, 128
816
	   add [str0], 128
817
	   add [str1], 128
818
	   add [str2], 128
819
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
820
	   add edi, 128
821
	   add [str0], 128
822
	   add [str1], 128
823
	   add [str2], 128
824
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
825
 
826
	   ret
827
endp
828
 
829
align 4
830
proc mix_4_1 stdcall, str0:dword, str1:dword,\
831
		      str2:dword, str3:dword
832
 
833
	   local output:DWORD
834
 
835
	   call alloc_mix_buff
836
	   and eax, eax
837
	   jz .err
838
	   mov [output], eax
839
 
840
	   mov edi, eax
841
 
842
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
843
	   add edi, 128
844
	   add [str0], 128
845
	   add [str1], 128
846
	   add [str2], 128
847
	   add [str3], 128
848
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
849
	   add edi, 128
850
	   add [str0], 128
851
	   add [str1], 128
852
	   add [str2], 128
853
	   add [str3], 128
854
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
855
	   add edi, 128
856
	   add [str0], 128
857
	   add [str1], 128
858
	   add [str2], 128
859
	   add [str3], 128
860
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
861
	   mov eax, [output]
862
	   ret
863
.err:
864
	   xor eax, eax
865
	   ret
866
endp
867
 
868
 
869
align 4
870
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
871
			str2:dword, str3:dword
872
 
873
	   mov edi, [output]
874
 
875
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
876
	   add edi, 128
877
	   add [str0], 128
878
	   add [str1], 128
879
	   add [str2], 128
880
	   add [str3], 128
881
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
882
	   add edi, 128
883
	   add [str0], 128
884
	   add [str1], 128
885
	   add [str2], 128
886
	   add [str3], 128
887
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
888
	   add edi, 128
889
	   add [str0], 128
890
	   add [str1], 128
891
	   add [str2], 128
892
	   add [str3], 128
893
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
894
 
895
	   ret
896
endp
897
 
898
align 4
899
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
900
 
281 serge 901
           mov edx, [output]
902
           mov eax, [str0]
903
           mov ecx, [str1]
168 serge 904
 
281 serge 905
           movq mm0, [eax]
906
           paddsw mm0, [ecx]
907
           ; psraw   mm0, 1
908
           movq [edx], mm0
168 serge 909
 
281 serge 910
           movq mm1, [eax+8]
911
           paddsw mm1,[ecx+8]
912
           ; psraw   mm1, 1
913
           movq [edx+8], mm1
168 serge 914
 
281 serge 915
           movq mm2, [eax+16]
916
           paddsw mm2, [ecx+16]
917
           ; psraw   mm2, 1
918
           movq [edx+16], mm2
168 serge 919
 
281 serge 920
           movq mm3, [eax+24]
921
           paddsw mm3, [ecx+24]
922
           ; psraw   mm3, 1
923
           movq [edx+24], mm3
168 serge 924
 
281 serge 925
           movq mm0, [eax+32]
926
           paddsw mm0, [ecx+32]
927
           ; psraw   mm0, 1
928
           movq [edx+32], mm0
168 serge 929
 
281 serge 930
           movq mm1, [eax+40]
931
           paddsw mm1, [ecx+40]
932
           ; psraw   mm1, 1
933
           movq [edx+40], mm1
168 serge 934
 
281 serge 935
           movq mm2, [eax+48]
936
           paddsw mm2, [ecx+48]
937
           ; psraw   mm2, 1
938
           movq [edx+48], mm2
168 serge 939
 
281 serge 940
           movq mm3, [eax+56]
941
           paddsw mm3, [ecx+56]
942
           ; psraw   mm3, 1
943
           movq [edx+56], mm3
168 serge 944
 
281 serge 945
           movq mm0, [eax+64]
946
           paddsw mm0, [ecx+64]
947
           ; psraw   mm0, 1
948
           movq [edx+64], mm0
168 serge 949
 
281 serge 950
           movq mm1, [eax+72]
951
           paddsw mm1, [ecx+72]
952
           ; psraw   mm1, 1
953
           movq [edx+72], mm1
168 serge 954
 
281 serge 955
           movq mm2, [eax+80]
956
           paddsw mm2, [ecx+80]
957
           ; psraw   mm2, 1
958
           movq [edx+80], mm2
168 serge 959
 
281 serge 960
           movq mm3, [eax+88]
961
           paddsw mm3, [ecx+88]
962
           ; psraw   mm3, 1
963
           movq [edx+88], mm3
168 serge 964
 
281 serge 965
           movq mm0, [eax+96]
966
           paddsw mm0, [ecx+96]
967
           ; psraw   mm0, 1
968
           movq [edx+96], mm0
168 serge 969
 
281 serge 970
           movq mm1, [eax+104]
971
           paddsw mm1, [ecx+104]
972
           ; psraw   mm1, 1
973
           movq [edx+104], mm1
168 serge 974
 
281 serge 975
           movq mm2, [eax+112]
976
           paddsw mm2, [ecx+112]
977
           ; psraw   mm2, 1
978
           movq [edx+112], mm2
168 serge 979
 
281 serge 980
           movq mm3, [eax+120]
981
           paddsw mm3, [ecx+120]
982
           ; psraw   mm3, 1
983
           movq [edx+120], mm3
168 serge 984
 
281 serge 985
           ret
168 serge 986
endp
987
 
988
align 4
989
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
990
 
281 serge 991
           mov edx, [output]
992
           mov eax, [str0]
993
           mov ebx, [str1]
994
           mov ecx, [str2]
168 serge 995
 
281 serge 996
           movq mm0, [eax]
997
           paddsw mm0, [ebx]
998
           paddsw mm0, [ecx]
999
           movq [edx], mm0
168 serge 1000
 
281 serge 1001
           movq mm1, [eax+8]
1002
           paddsw mm1,[ebx+8]
1003
           paddsw mm1,[ecx+8]
1004
           movq [edx+8], mm1
168 serge 1005
 
281 serge 1006
           movq mm2, [eax+16]
1007
           paddsw mm2, [ebx+16]
1008
           paddsw mm2, [ecx+16]
1009
           movq [edx+16], mm2
168 serge 1010
 
281 serge 1011
           movq mm3, [eax+24]
1012
           paddsw mm3, [ebx+24]
1013
           paddsw mm3, [ecx+24]
1014
           movq [edx+24], mm3
168 serge 1015
 
281 serge 1016
           movq mm0, [eax+32]
1017
           paddsw mm0, [ebx+32]
1018
           paddsw mm0, [ecx+32]
1019
           movq [edx+32], mm0
168 serge 1020
 
281 serge 1021
           movq mm1, [eax+40]
1022
           paddsw mm1, [ebx+40]
1023
           paddsw mm1, [ecx+40]
1024
           movq [edx+40], mm1
168 serge 1025
 
281 serge 1026
           movq mm2, [eax+48]
1027
           paddsw mm2, [ebx+48]
1028
           paddsw mm2, [ecx+48]
1029
           movq [edx+48], mm2
168 serge 1030
 
281 serge 1031
           movq mm3, [eax+56]
1032
           paddsw mm3, [ebx+56]
1033
           paddsw mm3, [ecx+56]
1034
           movq [edx+56], mm3
168 serge 1035
 
281 serge 1036
           movq mm0, [eax+64]
1037
           paddsw mm0, [ebx+64]
1038
           paddsw mm0, [ecx+64]
1039
           movq [edx+64], mm0
168 serge 1040
 
281 serge 1041
           movq mm1, [eax+72]
1042
           paddsw mm1, [ebx+72]
1043
           paddsw mm1, [ecx+72]
1044
           movq [edx+72], mm1
168 serge 1045
 
281 serge 1046
           movq mm2, [eax+80]
1047
           paddsw mm2, [ebx+80]
1048
           paddsw mm2, [ecx+80]
1049
           movq [edx+80], mm2
168 serge 1050
 
281 serge 1051
           movq mm3, [eax+88]
1052
           paddsw mm3, [ebx+88]
1053
           paddsw mm3, [ecx+88]
1054
           movq [edx+88], mm3
168 serge 1055
 
281 serge 1056
           movq mm0, [eax+96]
1057
           paddsw mm0, [ebx+96]
1058
           paddsw mm0, [ecx+96]
1059
           movq [edx+96], mm0
168 serge 1060
 
281 serge 1061
           movq mm1, [eax+104]
1062
           paddsw mm1, [ebx+104]
1063
           paddsw mm1, [ecx+104]
1064
           movq [edx+104], mm1
168 serge 1065
 
281 serge 1066
           movq mm2, [eax+112]
1067
           paddsw mm2, [ebx+112]
1068
           paddsw mm2, [ecx+112]
1069
           movq [edx+112], mm2
168 serge 1070
 
281 serge 1071
           movq mm3, [eax+120]
1072
           paddsw mm3, [ebx+120]
1073
           paddsw mm3, [ecx+120]
1074
           movq [edx+120], mm3
168 serge 1075
 
281 serge 1076
           ret
168 serge 1077
endp
1078
 
1079
align 4
1080
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
281 serge 1081
                          str2:dword, str3:dword
168 serge 1082
 
281 serge 1083
           mov edx, [output]
1084
           mov esi, [str0]
1085
           mov eax, [str1]
1086
           mov ebx, [str2]
1087
           mov ecx, [str3]
168 serge 1088
 
281 serge 1089
           movq mm0, [esi]
1090
           movq mm1, [eax]
1091
           paddsw mm0, [ebx]
1092
           paddsw mm1, [ecx]
1093
           paddsw mm0, mm1
1094
           movq [edx], mm0
168 serge 1095
 
281 serge 1096
           movq mm2, [esi+8]
1097
           movq mm3, [eax+8]
1098
           paddsw mm2, [ebx+8]
1099
           paddsw mm3, [ecx+8]
1100
           paddsw mm2, mm3
1101
           movq [edx+8], mm2
168 serge 1102
 
281 serge 1103
           movq mm0, [esi+16]
1104
           movq mm1, [eax+16]
1105
           paddsw mm0, [ebx+16]
1106
           paddsw mm1, [ecx+16]
1107
           paddsw mm0, mm1
1108
           movq [edx+16], mm0
168 serge 1109
 
281 serge 1110
           movq mm2, [esi+24]
1111
           movq mm3, [eax+24]
1112
           paddsw mm2, [ebx+24]
1113
           paddsw mm3, [ecx+24]
1114
           paddsw mm2, mm3
1115
           movq [edx+24], mm2
168 serge 1116
 
281 serge 1117
           movq mm0, [esi+32]
1118
           movq mm1, [eax+32]
1119
           paddsw mm0, [ebx+32]
1120
           paddsw mm1, [ecx+32]
1121
           paddsw mm0, mm1
1122
           movq [edx+32], mm0
168 serge 1123
 
281 serge 1124
           movq mm2, [esi+40]
1125
           movq mm3, [eax+40]
1126
           paddsw mm2, [ebx+40]
1127
           paddsw mm3, [ecx+40]
1128
           paddsw mm2, mm3
1129
           movq [edx+40], mm2
168 serge 1130
 
281 serge 1131
           movq mm0, [esi+48]
1132
           movq mm1, [eax+48]
1133
           paddsw mm0, [ebx+48]
1134
           paddsw mm1, [ecx+48]
1135
           paddsw mm0, mm1
1136
           movq [edx+48], mm0
168 serge 1137
 
281 serge 1138
           movq mm2, [esi+56]
1139
           movq mm3, [eax+56]
1140
           paddsw mm2, [ebx+56]
1141
           paddsw mm3, [ecx+56]
1142
           paddsw mm2, mm3
1143
           movq [edx+56], mm2
168 serge 1144
 
281 serge 1145
           movq mm0, [esi+64]
1146
           movq mm1, [eax+64]
1147
           paddsw mm0, [ebx+64]
1148
           paddsw mm1, [ecx+64]
1149
           paddsw mm0, mm1
1150
           movq [edx+64], mm0
168 serge 1151
 
281 serge 1152
           movq mm2, [esi+72]
1153
           movq mm3, [eax+72]
1154
           paddsw mm2, [ebx+72]
1155
           paddsw mm3, [ecx+72]
1156
           paddsw mm2, mm3
1157
           movq [edx+72], mm2
168 serge 1158
 
281 serge 1159
           movq mm2, [esi+80]
1160
           movq mm3, [eax+80]
1161
           paddsw mm2, [ebx+80]
1162
           paddsw mm3, [ecx+80]
1163
           paddsw mm2, mm3
1164
           movq [edx+80], mm2
168 serge 1165
 
281 serge 1166
           movq mm2, [esi+88]
1167
           movq mm3, [eax+88]
1168
           paddsw mm2, [ebx+88]
1169
           paddsw mm3, [ecx+88]
1170
           paddsw mm2, mm3
1171
           movq [edx+88], mm2
168 serge 1172
 
281 serge 1173
           movq mm2, [esi+96]
1174
           movq mm3, [eax+96]
1175
           paddsw mm2, [ebx+96]
1176
           paddsw mm3, [ecx+96]
1177
           paddsw mm2, mm3
1178
           movq [edx+96], mm2
168 serge 1179
 
281 serge 1180
           movq mm2, [esi+104]
1181
           movq mm3, [eax+104]
1182
           paddsw mm2, [ebx+104]
1183
           paddsw mm3, [ecx+104]
1184
           paddsw mm2, mm3
1185
           movq [edx+104], mm2
168 serge 1186
 
281 serge 1187
           movq mm2, [esi+112]
1188
           movq mm3, [eax+112]
1189
           paddsw mm2, [ebx+112]
1190
           paddsw mm3, [ecx+112]
1191
           paddsw mm2, mm3
1192
           movq [edx+112], mm2
168 serge 1193
 
281 serge 1194
           movq mm2, [esi+120]
1195
           movq mm3, [eax+120]
1196
           paddsw mm2, [ebx+120]
1197
           paddsw mm3, [ecx+120]
1198
           paddsw mm2, mm3
1199
           movq [edx+120], mm2
168 serge 1200
 
281 serge 1201
           ret
168 serge 1202
endp
1203
 
1204
align 4
1205
proc copy_mem stdcall, output:dword, input:dword
1206
 
1207
	   mov edi, [output]
1208
	   mov esi, [input]
1209
	   mov ecx, 0x80
1210
.l1:
1211
	   mov eax, [esi]
1212
	   mov [edi], eax
1213
	   add esi, 4
1214
	   add edi, 4
1215
	   loop .l1
1216
 
1217
	   ret
1218
endp
1219
 
1220
proc memcpy
1221
@@:
1222
	   mov eax, [esi]
1223
	   mov [edi], eax
1224
	   add esi, 4
1225
	   add edi, 4
1226
	   dec ecx
1227
	   jnz @B
1228
	   ret
1229
endp
1230