Subversion Repositories Kolibri OS

Rev

Rev 281 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 281 Rev 285
1
;
1
;
2
;   This file is part of the Infinity sound library.
2
;   This file is part of the Infinity sound library.
3
;   (C) copyright Serge 2006
3
;   (C) copyright Serge 2006
4
;   email: infinity_sound@mail.ru
4
;   email: infinity_sound@mail.ru
5
;
5
;
6
;   This program is free software; you can redistribute it and/or modify
6
;   This program is free software; you can redistribute it and/or modify
7
;   it under the terms of the GNU General Public License as published by
7
;   it under the terms of the GNU General Public License as published by
8
;   the Free Software Foundation; either version 2 of the License, or
8
;   the Free Software Foundation; either version 2 of the License, or
9
;   (at your option) any later version.
9
;   (at your option) any later version.
10
;
10
;
11
;   This program is distributed in the hope that it will be useful,
11
;   This program is distributed in the hope that it will be useful,
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
;   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;   GNU General Public License for more details.
14
;   GNU General Public License for more details.
15
 
15
 
16
align 4
16
align 4
17
proc new_mix stdcall, output:dword
17
proc new_mix stdcall, output:dword
18
           locals
18
           locals
19
             mixCounter  dd ?
19
             mixCounter  dd ?
20
             mixIndex  dd ?
20
             mixIndex  dd ?
21
             streamIndex dd ?
21
             streamIndex dd ?
22
             inputCount  dd ?
22
             inputCount  dd ?
23
             main_count  dd ?
23
             main_count  dd ?
24
             blockCount  dd ?
24
             blockCount  dd ?
25
             mix_out  dd ?
25
             mix_out  dd ?
26
           endl
26
           endl
27
 
27
 
28
           call prepare_playlist
28
           call prepare_playlist
29
 
29
 
30
           cmp [play_count], 0
30
           cmp [play_count], 0
31
           je .exit
31
           je .exit
32
           call FpuSave
32
           call FpuSave
33
           mov [main_count], 32;
33
           mov [main_count], 32;
34
.l00:
34
.l00:
35
           mov [mix_buff_map], 0x0000FFFF;
35
           mov [mix_buff_map], 0x0000FFFF;
36
           xor eax, eax
36
           xor eax, eax
37
           mov [mixCounter], eax
37
           mov [mixCounter], eax
38
           mov [mixIndex],eax
38
           mov [mixIndex],eax
39
           mov [streamIndex], eax;
39
           mov [streamIndex], eax;
40
           mov ebx, [play_count]
40
           mov ebx, [play_count]
41
           mov [inputCount], ebx
41
           mov [inputCount], ebx
42
.l0:
42
.l0:
43
           mov ecx, 4
43
           mov ecx, 4
44
.l1:
44
.l1:
45
           mov ebx, [streamIndex]
45
           mov ebx, [streamIndex]
46
           mov esi, [play_list+ebx*4]
46
           mov esi, [play_list+ebx*4]
47
           mov eax, [esi+STREAM.work_read]
47
           mov eax, [esi+STREAM.work_read]
48
           add [esi+STREAM.work_read], 512
48
           add [esi+STREAM.work_read], 512
49
 
49
 
50
           mov ebx, [mixIndex]
50
           mov ebx, [mixIndex]
51
           mov [mix_input+ebx*4], eax
51
           mov [mix_input+ebx*4], eax
52
           inc [mixCounter]
52
           inc [mixCounter]
53
           inc [mixIndex]
53
           inc [mixIndex]
54
           inc [streamIndex]
54
           inc [streamIndex]
55
           dec [inputCount]
55
           dec [inputCount]
56
           jz .m2
56
           jz .m2
57
 
57
 
58
           dec ecx
58
           dec ecx
59
           jnz .l1
59
           jnz .l1
60
 
60
 
61
           cmp [mixCounter], 4
61
           cmp [mixCounter], 4
62
           jnz .m2
62
           jnz .m2
63
 
63
 
64
           stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
64
           stdcall mix_4_1, [mix_input],[mix_input+4],[mix_input+8],[mix_input+12]
65
           sub [mixIndex],4
65
           sub [mixIndex],4
66
           mov ebx, [mixIndex]
66
           mov ebx, [mixIndex]
67
           mov [mix_input+ebx*4], eax
67
           mov [mix_input+ebx*4], eax
68
           inc [mixIndex]
68
           inc [mixIndex]
69
           mov [mixCounter], 0
69
           mov [mixCounter], 0
70
 
70
 
71
           cmp [inputCount], 0
71
           cmp [inputCount], 0
72
           jnz .l0
72
           jnz .l0
73
.m2:
73
.m2:
74
           cmp [mixIndex], 1
74
           cmp [mixIndex], 1
75
           jne @f
75
           jne @f
76
           stdcall copy_mem, [output], [mix_input]
76
           stdcall copy_mem, [output], [mix_input]
77
           jmp .m3
77
           jmp .m3
78
@@:
78
@@:
79
           cmp [mixIndex], 2
79
           cmp [mixIndex], 2
80
           jne @f
80
           jne @f
81
           stdcall mix_2_1, [output], [mix_input], [mix_input+4]
81
           stdcall mix_2_1, [output], [mix_input], [mix_input+4]
82
           jmp .m3
82
           jmp .m3
83
@@:
83
@@:
84
           cmp [mixIndex], 3
84
           cmp [mixIndex], 3
85
           jne @f
85
           jne @f
86
           stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
86
           stdcall mix_3_1, [output],[mix_input],[mix_input+4],[mix_input+8]
87
           jmp .m3
87
           jmp .m3
88
@@:
88
@@:
89
           stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
89
           stdcall final_mix, [output],[mix_input],[mix_input+4],[mix_input+8], [mix_input+12]
90
.m3:
90
.m3:
91
           add [output],512
91
           add [output],512
92
 
92
 
93
           sub [main_count], 1
93
           sub [main_count], 1
94
           jnz .l00
94
           jnz .l00
95
 
95
 
96
           call update_stream
96
           call update_stream
97
           emms
97
           emms
98
           call FpuRestore
98
           call FpuRestore
99
           ret
99
           ret
100
.exit:
100
.exit:
101
           mov edi, [output]
101
           mov edi, [output]
102
           mov ecx, 0x1000
102
           mov ecx, 0x1000
103
           xor eax, eax
103
           xor eax, eax
104
           cld
104
           cld
105
           rep stosd
105
           rep stosd
106
           ret
106
           ret
107
endp
107
endp
108
 
108
 
109
align 4
109
align 4
110
proc update_stream
110
proc update_stream
111
           locals
111
           locals
112
             stream_index  dd ?
112
             stream_index  dd ?
113
             ev_code       dd ?  ;EVENT
113
             ev_code       dd ?  ;EVENT
114
             ev_offs       dd ?
114
             ev_offs       dd ?
115
                           rd 4
115
                           rd 4
116
           endl
116
           endl
117
 
117
 
118
           mov [stream_index], 0
118
           mov [stream_index], 0
119
.l1:
119
.l1:
120
           mov edx, [stream_index]
120
           mov edx, [stream_index]
121
           mov esi, [play_list+edx*4]
121
           mov esi, [play_list+edx*4]
122
 
122
 
123
           mov eax, [esi+STREAM.work_read]
123
           mov eax, [esi+STREAM.work_read]
124
           cmp eax, [esi+STREAM.work_top]
124
           cmp eax, [esi+STREAM.work_top]
125
           jb @f
125
           jb @f
126
           mov eax, [esi+STREAM.work_buff]
126
           mov eax, [esi+STREAM.work_buff]
127
@@:
127
@@:
128
           mov [esi+STREAM.work_read], eax
128
           mov [esi+STREAM.work_read], eax
129
 
129
 
130
           cmp [esi+STREAM.format], PCM_2_16_48
130
           cmp [esi+STREAM.format], PCM_2_16_48
131
           je .copy
131
           je .copy
132
 
132
 
133
           sub [esi+STREAM.work_count], 16384
133
           sub [esi+STREAM.work_count], 16384
134
 
134
 
135
           cmp [esi+STREAM.work_count], 32768
135
           cmp [esi+STREAM.work_count], 32768
136
           ja @f
136
           ja @f
137
 
137
 
138
           stdcall refill, esi
138
           stdcall refill, esi
139
@@:
139
@@:
140
           inc [stream_index]
140
           inc [stream_index]
141
           dec [play_count]
141
           dec [play_count]
142
           jnz .l1
142
           jnz .l1
143
           ret
143
           ret
144
.copy:
144
.copy:
145
           mov ebx, esi
145
           mov ebx, esi
146
           mov edi, [ebx+STREAM.work_write]
146
           mov edi, [ebx+STREAM.work_write]
147
           cmp edi, [ebx+STREAM.work_top]
147
           cmp edi, [ebx+STREAM.work_top]
148
           jb @f
148
           jb @f
149
           mov edi, [ebx+STREAM.work_buff]
149
           mov edi, [ebx+STREAM.work_buff]
150
           mov [ebx+STREAM.work_write], edi
150
           mov [ebx+STREAM.work_write], edi
151
@@:
151
@@:
152
           mov esi, [ebx+STREAM.curr_seg]
152
           mov esi, [ebx+STREAM.curr_seg]
153
           mov ecx, 16384/4
153
           mov ecx, 16384/4
154
           cld
154
           cld
155
           rep movsd
155
           rep movsd
156
 
156
 
157
           mov [ebx+STREAM.work_write], edi
157
           mov [ebx+STREAM.work_write], edi
158
 
158
 
159
           cmp esi, [ebx+STREAM.lim_0]
159
           cmp esi, [ebx+STREAM.lim_0]
160
           jb @f
160
           jb @f
161
 
161
 
162
           mov esi, [ebx+STREAM.seg_0]
162
           mov esi, [ebx+STREAM.seg_0]
163
           mov eax, [ebx+STREAM.lim_0]
163
           mov eax, [ebx+STREAM.lim_0]
164
           xchg esi, [ebx+STREAM.seg_1]
164
           xchg esi, [ebx+STREAM.seg_1]
165
           xchg eax, [ebx+STREAM.lim_1]
165
           xchg eax, [ebx+STREAM.lim_1]
166
           mov [ebx+STREAM.seg_0], esi
166
           mov [ebx+STREAM.seg_0], esi
167
           mov [ebx+STREAM.lim_0], eax
167
           mov [ebx+STREAM.lim_0], eax
168
@@:
168
@@:
169
           mov [ebx+STREAM.curr_seg], esi
169
           mov [ebx+STREAM.curr_seg], esi
170
 
170
 
171
           xor ecx, ecx
171
           xor ecx, ecx
172
           cmp esi, [ebx+STREAM.notify_off2]
172
           cmp esi, [ebx+STREAM.notify_off2]
173
           je @f
173
           je @f
174
 
174
 
175
           mov ecx,0x8000
175
           mov ecx,0x8000
176
           cmp esi, [ebx+STREAM.notify_off1]
176
           cmp esi, [ebx+STREAM.notify_off1]
177
           je @f
177
           je @f
178
 
178
 
179
           inc [stream_index]
179
           inc [stream_index]
180
           dec [play_count]
180
           dec [play_count]
181
           jnz .l1
181
           jnz .l1
182
           ret
182
           ret
183
@@:
183
@@:
184
           mov [ev_code], 0xFF000001
184
           mov [ev_code], 0xFF000001
185
           mov [ev_offs], ecx
185
           mov [ev_offs], ecx
186
           mov eax, [ebx+STREAM.notify_task]
186
           mov eax, [ebx+STREAM.notify_task]
187
 
187
 
188
           lea edx, [ev_code]
188
           lea edx, [ev_code]
189
           push ebx
189
           push ebx
190
           stdcall SendEvent, eax, edx
190
           stdcall SendEvent, eax, edx
191
           pop ebx
191
           pop ebx
192
           test eax, eax
192
           test eax, eax
193
           jnz .l_end
193
           jnz .l_end
194
 
194
 
195
           not eax
195
           not eax
196
           mov [ebx+STREAM.notify_task], eax      ;-1
196
           mov [ebx+STREAM.notify_task], eax      ;-1
197
.l_end:
197
.l_end:
198
           inc [stream_index]
198
           inc [stream_index]
199
           dec [play_count]
199
           dec [play_count]
200
           jnz .l1
200
           jnz .l1
201
           ret
201
           ret
202
endp
202
endp
203
 
203
 
204
align 4
204
align 4
205
proc refill stdcall, str:dword
205
proc refill stdcall, str:dword
206
           locals
206
           locals
207
             ev_code       dd ?  ;EVENT
207
             ev_code       dd ?  ;EVENT
208
             ev_offs       dd ?
208
             ev_offs       dd ?
209
                           rd 4
209
                           rd 4
210
           endl
210
           endl
211
 
211
 
212
           mov ebx, [str]
212
           mov ebx, [str]
213
           mov ecx, [ebx+STREAM.work_write]
213
           mov ecx, [ebx+STREAM.work_write]
214
           cmp ecx, [ebx+STREAM.work_top]
214
           cmp ecx, [ebx+STREAM.work_top]
215
           jbe .m2
215
           jbe .m2
216
           mov esi, [ebx+STREAM.work_top]
216
           mov esi, [ebx+STREAM.work_top]
217
           sub ecx, esi
217
           sub ecx, esi
218
           mov edi, [ebx+STREAM.work_buff]
218
           mov edi, [ebx+STREAM.work_buff]
219
           shr ecx, 2
219
           shr ecx, 2
220
           rep movsd    ;call memcpy
220
           rep movsd    ;call memcpy
221
 
221
 
222
           mov [ebx+STREAM.work_write], edi
222
           mov [ebx+STREAM.work_write], edi
223
.m2:
223
.m2:
224
           mov esi, [ebx+STREAM.curr_seg]
224
           mov esi, [ebx+STREAM.curr_seg]
225
           mov edi, [ebx+STREAM.work_write]
225
           mov edi, [ebx+STREAM.work_write]
226
           mov edx, [ebx+STREAM.r_buff]
-
 
227
 
226
 
228
           stdcall [ebx+STREAM.resample], edi, esi, edx,\
227
           stdcall [ebx+STREAM.resample], edi, esi, \
229
           [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
228
           [ebx+STREAM.r_dt],[ebx+STREAM.r_size],[ebx+STREAM.r_end]
230
 
229
 
231
           mov ebx, [str]
230
           mov ebx, [str]
232
 
231
 
233
           add [ebx+STREAM.work_count], eax;
232
           add [ebx+STREAM.work_count], eax;
234
           add [ebx+STREAM.work_write], eax;
233
           add [ebx+STREAM.work_write], eax;
235
 
234
 
236
           mov eax, [ebx+STREAM.curr_seg]
235
           mov eax, [ebx+STREAM.curr_seg]
237
           add eax, [ebx+STREAM.r_size]
236
           add eax, [ebx+STREAM.r_size]
238
           cmp eax, [ebx+STREAM.lim_0]
237
           cmp eax, [ebx+STREAM.lim_0]
239
           jb @f
238
           jb @f
-
 
239
 
-
 
240
           mov esi, [ebx+STREAM.seg_0]
-
 
241
           lea edi, [esi-128]
-
 
242
           add esi, 0x7F80
-
 
243
           mov ecx, 128/4
-
 
244
           cld
-
 
245
           rep movsd
240
 
246
 
241
           mov eax, [ebx+STREAM.seg_0]
247
           mov eax, [ebx+STREAM.seg_0]
242
           mov ecx, [ebx+STREAM.lim_0]
248
           mov ecx, [ebx+STREAM.lim_0]
243
           xchg eax, [ebx+STREAM.seg_1]
249
           xchg eax, [ebx+STREAM.seg_1]
244
           xchg ecx, [ebx+STREAM.lim_1]
250
           xchg ecx, [ebx+STREAM.lim_1]
245
           mov [ebx+STREAM.seg_0], eax
251
           mov [ebx+STREAM.seg_0], eax
246
           mov [ebx+STREAM.lim_0], ecx
252
           mov [ebx+STREAM.lim_0], ecx
247
@@:
253
@@:
248
           mov [ebx+STREAM.curr_seg], eax
254
           mov [ebx+STREAM.curr_seg], eax
249
 
255
 
250
           xor ecx, ecx
256
           xor ecx, ecx
251
           cmp eax, [ebx+STREAM.notify_off2]
257
           cmp eax, [ebx+STREAM.notify_off2]
252
           je @f
258
           je @f
253
 
259
 
254
           mov ecx,0x8000
260
           mov ecx,0x8000
255
           cmp eax, [ebx+STREAM.notify_off1]
261
           cmp eax, [ebx+STREAM.notify_off1]
256
           je @f
262
           je @f
257
           ret
263
           ret
258
@@:
264
@@:
259
           mov [ev_code], 0xFF000001
265
           mov [ev_code], 0xFF000001
260
           mov [ev_offs], ecx
266
           mov [ev_offs], ecx
261
           mov eax, [ebx+STREAM.notify_task]
267
           mov eax, [ebx+STREAM.notify_task]
262
 
268
 
263
           lea edx, [ev_code]
269
           lea edx, [ev_code]
264
           push ebx
270
           push ebx
265
           stdcall SendEvent, eax, edx
271
           stdcall SendEvent, eax, edx
266
           pop ebx
272
           pop ebx
267
           test eax, eax
273
           test eax, eax
268
           jnz @F
274
           jnz @F
269
           not eax
275
           not eax
270
           mov [ebx+STREAM.notify_task], eax      ;-1
276
           mov [ebx+STREAM.notify_task], eax      ;-1
271
@@:
277
@@:
272
	   ret
278
	   ret
273
endp
279
endp
274
 
280
 
275
align 4
281
align 4
276
proc resample_1 stdcall, dest:dword,src:dword,r_buff:dword,\
282
proc resample_1 stdcall, dest:dword,src:dword,\
277
		       r_dt:dword, r_size:dword,r_end:dword
283
		       r_dt:dword, r_size:dword,r_end:dword
278
 
284
 
279
	   mov edi, [r_buff]
285
; dest equ esp+8
280
	   add edi, 32*2
286
; src  equ esp+12
281
	   mov esi, [src]
287
; r_dt equ esp+16
282
	   mov ecx, [r_size]
288
; r_size equ esp+20
283
	   shr ecx, 2
-
 
284
	   rep movsd
289
;r_end equ esp+24
285
 
290
 
-
 
291
           mov edi, [dest]
286
	   mov edi, [dest]
292
           mov edx, [src]
287
	   mov edx, [r_buff]
293
           sub edx, 32*2
288
	   mov eax, 16
294
           mov eax, 16
289
 
295
 
290
align 16
296
align 16
291
.l1:
297
.l1:
292
	   mov ecx, eax
298
	   mov ecx, eax
293
	   mov esi, eax
299
	   mov esi, eax
294
	   and ecx, 0x7FFF
300
	   and ecx, 0x7FFF
295
	   shr esi, 15
301
	   shr esi, 15
296
	   lea esi, [edx+esi*2]
302
	   lea esi, [edx+esi*2]
297
 
303
 
298
	   movsx ebp, word [esi]
304
	   movsx ebp, word [esi]
299
	   movsx esi, word [esi+2]
305
	   movsx esi, word [esi+2]
300
	   mov ebx, 32768
306
	   mov ebx, 32768
301
	   imul esi, ecx
307
	   imul esi, ecx
302
	   sub ebx, ecx
308
	   sub ebx, ecx
303
	   imul ebx, ebp
309
	   imul ebx, ebp
304
	   lea ecx, [ebx+esi+16384]
310
	   lea ecx, [ebx+esi+16384]
305
	   sar ecx, 15
311
	   sar ecx, 15
306
	   cmp ecx, 32767	  ; 00007fffH
312
	   cmp ecx, 32767	  ; 00007fffH
307
	   jle @f
313
	   jle @f
308
	   mov ecx, 32767	  ; 00007fffH
314
	   mov ecx, 32767	  ; 00007fffH
309
	   jmp .write
315
	   jmp .write
310
@@:
316
@@:
311
	   cmp ecx, -32768	  ; ffff8000H
317
	   cmp ecx, -32768	  ; ffff8000H
312
	   jge .write
318
	   jge .write
313
	   mov ecx, -32768	  ; ffff8000H
319
	   mov ecx, -32768	  ; ffff8000H
314
.write:
320
.write:
315
	   mov ebx, ecx
321
	   mov ebx, ecx
316
	   shl ebx, 16
322
	   shl ebx, 16
317
	   mov bx, cx
323
	   mov bx, cx
318
	   mov [edi], ebx
324
	   mov [edi], ebx
319
	   add edi, 4
325
	   add edi, 4
320
 
326
 
321
	   add eax, [esp+20]  ;rdt
327
    add eax, [esp+16]
322
	   cmp eax, [esp+28]  ;r_end
328
    cmp eax, [esp+24]
323
	   jb .l1
329
	   jb .l1
324
 
330
 
325
	   mov ebp, esp
331
	   mov ebp, esp
326
 
-
 
327
	   mov esi, [src]
-
 
328
	   add esi, [r_size]
-
 
329
	   sub esi, 32*2
-
 
330
	   mov edx, [r_buff]
-
 
331
	   mov ecx, 16
-
 
332
@@:
-
 
333
	   mov ebx, [esi]
-
 
334
	   mov [edx], ebx
-
 
335
	   add esi, 4
-
 
336
	   add edx, 4
-
 
337
	   dec ecx
-
 
338
	   jnz @B
-
 
339
 
332
 
340
	   sub edi, [dest]
333
	   sub edi, [dest]
341
	   mov eax, edi
334
	   mov eax, edi
342
	   ret
335
	   ret
343
endp
336
endp
344
 
337
 
345
align 4
338
align 4
346
proc resample_18 stdcall, dest:dword,src:dword,r_buff:dword,\
339
proc resample_18 stdcall, dest:dword,src:dword,\
347
		       r_dt:dword, r_size:dword,r_end:dword
340
		       r_dt:dword, r_size:dword,r_end:dword
348
 
-
 
349
	   mov edi, [r_buff]
-
 
350
	   add edi, 32
-
 
351
	   mov esi, [src]
-
 
352
	   mov ecx, [r_size]
-
 
353
	   shr ecx, 2
-
 
354
	   rep movsd
341
 
355
 
342
 
-
 
343
	   mov edi, [dest]
-
 
344
           mov edx, [src]
356
	   mov edi, [dest]
345
           sub edx, 32
357
	   mov edx, [r_buff]
346
 
358
	   mov esi, 16
347
	   mov esi, 16
359
 
348
 
360
align 16
349
align 16
361
.l1:
350
.l1:
362
	   mov ecx, esi
351
	   mov ecx, esi
363
	   mov eax, esi
352
	   mov eax, esi
364
	   and ecx, 0x7FFF
353
	   and ecx, 0x7FFF
365
	   shr eax, 15
354
	   shr eax, 15
366
	   lea eax, [edx+eax]
355
	   lea eax, [edx+eax]
367
 
356
 
368
	   mov bx, word [eax]
357
	   mov bx, word [eax]
369
	   sub bh, 0x80
358
	   sub bh, 0x80
370
	   sub bl, 0x80
359
	   sub bl, 0x80
371
	   movsx eax, bh
360
	   movsx eax, bh
372
	   shl eax,8
361
	   shl eax,8
373
	   movsx ebp, bl
362
	   movsx ebp, bl
374
	   shl ebp,8
363
	   shl ebp,8
375
	   mov ebx, 32768
364
	   mov ebx, 32768
376
	   imul eax, ecx
365
	   imul eax, ecx
377
	   sub ebx, ecx
366
	   sub ebx, ecx
378
	   imul ebx, ebp
367
	   imul ebx, ebp
379
	   lea ecx, [ebx+eax+16384]
368
	   lea ecx, [ebx+eax+16384]
380
	   sar ecx, 15
369
	   sar ecx, 15
381
	   cmp ecx, 32767	  ; 00007fffH
370
	   cmp ecx, 32767	  ; 00007fffH
382
	   jle @f
371
	   jle @f
383
	   mov ecx, 32767	  ; 00007fffH
372
	   mov ecx, 32767	  ; 00007fffH
384
	   jmp .write
373
	   jmp .write
385
@@:
374
@@:
386
	   cmp ecx, -32768	  ; ffff8000H
375
	   cmp ecx, -32768	  ; ffff8000H
387
	   jge .write
376
	   jge .write
388
	   mov ecx, -32768	  ; ffff8000H
377
	   mov ecx, -32768	  ; ffff8000H
389
.write:
378
.write:
390
	   mov ebx, ecx
379
	   mov ebx, ecx
391
	   shl ebx, 16
380
	   shl ebx, 16
392
	   mov bx, cx
381
	   mov bx, cx
393
	   mov [edi], ebx
382
	   mov [edi], ebx
394
	   add edi, 4
383
	   add edi, 4
395
 
384
 
396
	   add esi, [esp+20]  ;rdt
385
    add esi, [esp+16]
397
	   cmp esi, [esp+28]  ;r_end
386
    cmp esi, [esp+24]
398
	   jb .l1
387
	   jb .l1
399
 
388
 
400
	   mov ebp, esp
389
	   mov ebp, esp
401
 
-
 
402
	   mov esi, [src]
-
 
403
	   add esi, [r_size]
-
 
404
	   sub esi, 32
-
 
405
	   mov edx, [r_buff]
-
 
406
	   mov ecx, 8
-
 
407
@@:
-
 
408
	   mov ebx, [esi]
-
 
409
	   mov [edx], ebx
-
 
410
	   add esi, 4
-
 
411
	   add edx, 4
-
 
412
	   dec ecx
-
 
413
	   jnz @B
-
 
414
 
-
 
415
	   sub edi, [dest]
390
	   sub edi, [dest]
416
	   mov eax, edi
391
	   mov eax, edi
417
	   ret
392
	   ret
418
endp
393
endp
419
 
394
 
420
align 4
395
align 4
421
proc copy_stream stdcall, dest:dword,src:dword,r_buff:dword,\
396
proc copy_stream stdcall, dest:dword,src:dword,\
422
		       r_dt:dword, r_size:dword,r_end:dword
397
		       r_dt:dword, r_size:dword,r_end:dword
423
 
398
 
424
           mov ecx, [r_size]
399
           mov ecx, [r_size]
425
           mov eax, ecx
400
           mov eax, ecx
426
           shr ecx, 2
401
           shr ecx, 2
427
           mov esi, [src]
402
           mov esi, [src]
428
           mov edi, [dest]
403
           mov edi, [dest]
429
           rep movsd
404
           rep movsd
430
           mov eax, 16384
405
           mov eax, 16384
431
           ret
406
           ret
432
endp
407
endp
433
 
408
 
434
align 4
409
align 4
435
proc resample_2 stdcall, dest:dword,src:dword,r_buff:dword,\
410
proc resample_2 stdcall, dest:dword,src:dword,\
436
		       r_dt:dword, r_size:dword,r_end:dword
411
		       r_dt:dword, r_size:dword,r_end:dword
437
 
-
 
438
	   mov edi, [r_buff]
-
 
439
	   add edi, 32*4
412
 
440
	   mov esi, [src]
-
 
441
	   mov ecx, [r_size]
413
           mov edx, [src]
442
	   shr ecx, 2
-
 
443
	   rep movsd	  ;call memcpy
-
 
444
 
-
 
445
	   mov edx, [r_buff]
414
           sub edx, 32*4
446
	   mov edi, [dest]
415
           mov edi, [dest]
447
	   mov ebx, [r_dt]
416
           mov ebx, [r_dt]
448
	   mov eax, 16
417
           mov eax, 16
449
	   emms
418
           emms
450
 
419
 
451
align 16
420
align 16
452
.l1:
421
.l1:
453
	   mov ecx, eax
422
           mov ecx, eax
454
	   mov esi, eax
423
           mov esi, eax
455
	   and ecx, 0x7FFF
424
           and ecx, 0x7FFF
456
	   shr esi, 15
425
           shr esi, 15
457
	   lea esi, [edx+esi*4]
426
           lea esi, [edx+esi*4]
458
 
427
 
459
	   movq mm0, [esi]
428
           movq mm0, [esi]
460
	   movq mm1, mm0
429
           movq mm1, mm0
461
 
430
 
462
	   movd mm2, ecx
431
           movd mm2, ecx
463
	   punpcklwd mm2, mm2
432
           punpcklwd mm2, mm2
464
	   movq mm3, qword [m7] ;                  // 0x8000
433
           movq mm3, qword [m7]    ;0x8000
465
 
434
 
466
	   psubw mm3, mm2	;         // 0x8000 - iconst
435
           psubw mm3, mm2 ;        ;0x8000 - iconst
467
	   punpckldq mm3, mm2
436
           punpckldq mm3, mm2
468
 
437
 
469
	   pmulhw mm0, mm3
438
           pmulhw mm0, mm3
470
	   pmullw mm1, mm3
439
           pmullw mm1, mm3
471
 
440
 
472
	   movq mm4, mm1
441
           movq mm4, mm1
473
	   punpcklwd mm1, mm0
442
           punpcklwd mm1, mm0
474
	   punpckhwd mm4, mm0
443
           punpckhwd mm4, mm0
475
	   paddd mm1, mm4
444
           paddd mm1, mm4
476
	   psrad  mm1, 15
445
           psrad  mm1, 15
477
	   packssdw mm1, mm1
446
           packssdw mm1, mm1
478
	   movd [edi], mm1
447
           movd [edi], mm1
479
	   add edi, 4
448
           add edi, 4
480
 
449
 
481
	   add eax, ebx
450
           add eax, ebx
482
	   cmp eax, [r_end]
451
           cmp eax, [r_end]
483
	   jb .l1
452
           jb .l1
484
	   emms
453
           emms
485
 
-
 
486
	   mov esi, [src]
-
 
487
	   add esi, [r_size]
-
 
488
	   sub esi, 32*4
-
 
489
	   mov edx, [r_buff]
-
 
490
	   mov ecx, 32
-
 
491
@@:
-
 
492
	   mov ebx, [esi]
-
 
493
	   mov [edx], ebx
-
 
494
	   add esi, 4
-
 
495
	   add edx, 4
-
 
496
	   dec ecx
-
 
497
	   jnz @B
-
 
498
 
454
 
499
	   sub edi, [dest]
455
           sub edi, [dest]
500
	   mov eax, edi
456
           mov eax, edi
501
	   ret
457
           ret
502
endp
458
endp
503
 
459
 
504
align 4
460
align 4
505
proc resample_28 stdcall, dest:dword,src:dword,r_buff:dword,\
461
proc resample_28 stdcall, dest:dword,src:dword,\
506
		       r_dt:dword, r_size:dword,r_end:dword
462
		       r_dt:dword, r_size:dword,r_end:dword
507
 
-
 
508
	   mov edi, [r_buff]
-
 
509
	   add edi, 32*2
463
 
510
	   mov esi, [src]
-
 
511
	   mov ecx, [r_size]
464
           mov edx, [src]
512
	   shr ecx, 2
-
 
513
	   rep movsd	  ;call memcpy
-
 
514
 
-
 
515
	   mov edx, [r_buff]
465
           sub edx, 32*2
516
	   mov edi, [dest]
466
           mov edi, [dest]
517
	   mov ebx, [r_dt]
467
           mov ebx, [r_dt]
518
	   mov eax, 16
468
           mov eax, 16
519
	   emms
469
           emms
520
	   movq mm7,[mm80]
470
           movq mm7,[mm80]
521
	   movq mm6,[mm_mask]
471
           movq mm6,[mm_mask]
522
 
472
 
523
align 16
473
align 16
524
.l1:
474
.l1:
525
	   mov ecx, eax
475
	   mov ecx, eax
526
	   mov esi, eax
476
	   mov esi, eax
527
	   and ecx, 0x7FFF
477
	   and ecx, 0x7FFF
528
	   shr esi, 15
478
	   shr esi, 15
529
	   lea esi, [edx+esi*2]
479
	   lea esi, [edx+esi*2]
530
 
480
 
531
	   movq mm0, [esi]
481
	   movq mm0, [esi]
532
	   psubb mm0,mm7
482
	   psubb mm0,mm7
533
	   punpcklbw mm0,mm0
483
	   punpcklbw mm0,mm0
534
	   pand mm0,mm6
484
	   pand mm0,mm6
535
 
485
 
536
	   movq mm1, mm0
486
	   movq mm1, mm0
537
 
487
 
538
	   movd mm2, ecx
488
	   movd mm2, ecx
539
	   punpcklwd mm2, mm2
489
	   punpcklwd mm2, mm2
540
	   movq mm3, qword [m7] ;                  // 0x8000
490
	   movq mm3, qword [m7] ;                  // 0x8000
541
 
491
 
542
	   psubw mm3, mm2	;         // 0x8000 - iconst
492
	   psubw mm3, mm2	;         // 0x8000 - iconst
543
	   punpckldq mm3, mm2
493
	   punpckldq mm3, mm2
544
 
494
 
545
	   pmulhw mm0, mm3
495
	   pmulhw mm0, mm3
546
	   pmullw mm1, mm3
496
	   pmullw mm1, mm3
547
 
497
 
548
	   movq mm4, mm1
498
	   movq mm4, mm1
549
	   punpcklwd mm1, mm0
499
	   punpcklwd mm1, mm0
550
	   punpckhwd mm4, mm0
500
	   punpckhwd mm4, mm0
551
	   paddd mm1, mm4
501
	   paddd mm1, mm4
552
	   psrad  mm1, 15
502
	   psrad  mm1, 15
553
	   packssdw mm1, mm1
503
	   packssdw mm1, mm1
554
	   movd [edi], mm1
504
	   movd [edi], mm1
555
	   add edi, 4
505
	   add edi, 4
556
 
506
 
557
	   add eax, ebx
507
	   add eax, ebx
558
	   cmp eax, [r_end]
508
	   cmp eax, [r_end]
559
	   jb .l1
509
	   jb .l1
560
	   emms
510
	   emms
561
 
-
 
562
	   mov esi, [src]
-
 
563
	   add esi, [r_size]
-
 
564
	   sub esi, 32*2
-
 
565
	   mov edx, [r_buff]
-
 
566
	   mov ecx, 16
-
 
567
@@:
-
 
568
	   mov ebx, [esi]
-
 
569
	   mov [edx], ebx
-
 
570
	   add esi, 4
-
 
571
	   add edx, 4
-
 
572
	   dec ecx
-
 
573
	   jnz @B
511
 
574
 
512
 
575
	   sub edi, [dest]
513
	   sub edi, [dest]
576
	   mov eax, edi
514
	   mov eax, edi
577
	   ret
515
	   ret
578
endp
516
endp
579
 
517
 
580
 
518
 
581
proc m16_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
519
proc m16_stereo stdcall, dest:dword,src:dword,\
582
		       r_dt:dword, r_size:dword,r_end:dword
520
		       r_dt:dword, r_size:dword,r_end:dword
583
 
521
 
584
	   mov esi, [src]
522
	   mov esi, [src]
585
	   mov edi, [dest]
523
	   mov edi, [dest]
586
	   mov ecx, [r_size]
524
	   mov ecx, [r_size]
587
	   shr ecx,8
525
	   shr ecx,8
588
@@:
526
@@:
589
	   call m16_s_mmx
527
	   call m16_s_mmx
590
	   add edi, 128
528
	   add edi, 128
591
	   add esi, 64
529
	   add esi, 64
592
	   call m16_s_mmx
530
	   call m16_s_mmx
593
	   add edi, 128
531
	   add edi, 128
594
	   add esi, 64
532
	   add esi, 64
595
	   call m16_s_mmx
533
	   call m16_s_mmx
596
	   add edi, 128
534
	   add edi, 128
597
	   add esi, 64
535
	   add esi, 64
598
	   call m16_s_mmx
536
	   call m16_s_mmx
599
	   add edi, 128
537
	   add edi, 128
600
	   add esi, 64
538
	   add esi, 64
601
	   dec ecx
539
	   dec ecx
602
	   jnz @b
540
	   jnz @b
603
 
541
 
604
	   mov eax, [r_size]
542
	   mov eax, [r_size]
605
	   add eax, eax
543
	   add eax, eax
606
	   ret
544
	   ret
607
endp
545
endp
608
 
546
 
609
align 4
547
align 4
610
proc s8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
548
proc s8_stereo stdcall, dest:dword,src:dword,\
611
		       r_dt:dword, r_size:dword,r_end:dword
549
		       r_dt:dword, r_size:dword,r_end:dword
612
 
550
 
613
           mov esi, [src]
551
           mov esi, [src]
614
           mov edi, [dest]
552
           mov edi, [dest]
615
           mov ecx, [r_size]
553
           mov ecx, [r_size]
616
           shr ecx, 7
554
           shr ecx, 7
617
 
555
 
618
           movq mm7, [mm80]
556
           movq mm7, [mm80]
619
           movq mm6, [mm_mask]
557
           movq mm6, [mm_mask]
620
@@:
558
@@:
621
           call s8_s_mmx
559
           call s8_s_mmx
622
           add edi, 64
560
           add edi, 64
623
           add esi, 32
561
           add esi, 32
624
           call s8_s_mmx
562
           call s8_s_mmx
625
           add edi, 64
563
           add edi, 64
626
           add esi, 32
564
           add esi, 32
627
           call s8_s_mmx
565
           call s8_s_mmx
628
           add edi, 64
566
           add edi, 64
629
           add esi, 32
567
           add esi, 32
630
           call s8_s_mmx
568
           call s8_s_mmx
631
           add edi, 64
569
           add edi, 64
632
           add esi, 32
570
           add esi, 32
633
           dec ecx
571
           dec ecx
634
           jnz @b
572
           jnz @b
635
 
573
 
636
           mov eax, [r_size]
574
           mov eax, [r_size]
637
           add eax, eax
575
           add eax, eax
638
           ret
576
           ret
639
endp
577
endp
640
 
578
 
641
proc m8_stereo stdcall, dest:dword,src:dword,r_buff:dword,\
579
proc m8_stereo stdcall, dest:dword,src:dword,\
642
		       r_dt:dword, r_size:dword,r_end:dword
580
		       r_dt:dword, r_size:dword,r_end:dword
643
 
581
 
644
           mov esi, [src]
582
           mov esi, [src]
645
           mov edi, [dest]
583
           mov edi, [dest]
646
           mov ecx, [r_size]
584
           mov ecx, [r_size]
647
           shr ecx, 6
585
           shr ecx, 6
648
 
586
 
649
           movq mm7, [mm80]
587
           movq mm7, [mm80]
650
           movq mm6, [mm_mask]
588
           movq mm6, [mm_mask]
651
@@:
589
@@:
652
           call m8_s_mmx
590
           call m8_s_mmx
653
           add edi, 64
591
           add edi, 64
654
           add esi, 16
592
           add esi, 16
655
           call m8_s_mmx
593
           call m8_s_mmx
656
           add edi, 64
594
           add edi, 64
657
           add esi, 16
595
           add esi, 16
658
           call m8_s_mmx
596
           call m8_s_mmx
659
           add edi, 64
597
           add edi, 64
660
           add esi, 16
598
           add esi, 16
661
           call m8_s_mmx
599
           call m8_s_mmx
662
           add edi, 64
600
           add edi, 64
663
           add esi, 16
601
           add esi, 16
664
                  dec ecx
602
                  dec ecx
665
           jnz @b
603
           jnz @b
666
 
604
 
667
           mov eax, [r_size]
605
           mov eax, [r_size]
668
           add eax, eax
606
           add eax, eax
669
           add eax, eax
607
           add eax, eax
670
           ret
608
           ret
671
endp
609
endp
672
 
610
 
673
align 4
611
align 4
674
proc alloc_mix_buff
612
proc alloc_mix_buff
675
 
613
 
676
           bsf eax, [mix_buff_map]
614
           bsf eax, [mix_buff_map]
677
           jnz .find
615
           jnz .find
678
           xor eax, eax
616
           xor eax, eax
679
           ret
617
           ret
680
.find:
618
.find:
681
           btr [mix_buff_map], eax
619
           btr [mix_buff_map], eax
682
           shl eax, 9
620
           shl eax, 9
683
           add eax, [mix_buff]
621
           add eax, [mix_buff]
684
           ret
622
           ret
685
endp
623
endp
686
 
624
 
687
proc m16_s_mmx
625
proc m16_s_mmx
688
 
626
 
689
	   movq    mm0, [esi]
627
	   movq    mm0, [esi]
690
	   movq    mm1, mm0
628
	   movq    mm1, mm0
691
	   punpcklwd mm0, mm0
629
	   punpcklwd mm0, mm0
692
	   punpckhwd mm1, mm1
630
	   punpckhwd mm1, mm1
693
	   movq    [edi], mm0
631
	   movq    [edi], mm0
694
	   movq    [edi+8], mm1
632
	   movq    [edi+8], mm1
695
 
633
 
696
	   movq    mm0, [esi+8]
634
	   movq    mm0, [esi+8]
697
	   movq    mm1, mm0
635
	   movq    mm1, mm0
698
	   punpcklwd mm0, mm0
636
	   punpcklwd mm0, mm0
699
	   punpckhwd mm1, mm1
637
	   punpckhwd mm1, mm1
700
	   movq    [edi+16], mm0
638
	   movq    [edi+16], mm0
701
	   movq    [edi+24], mm1
639
	   movq    [edi+24], mm1
702
 
640
 
703
	   movq    mm0, [esi+16]
641
	   movq    mm0, [esi+16]
704
	   movq    mm1, mm0
642
	   movq    mm1, mm0
705
	   punpcklwd mm0, mm0
643
	   punpcklwd mm0, mm0
706
	   punpckhwd mm1, mm1
644
	   punpckhwd mm1, mm1
707
	   movq    [edi+32], mm0
645
	   movq    [edi+32], mm0
708
	   movq    [edi+40], mm1
646
	   movq    [edi+40], mm1
709
 
647
 
710
	   movq    mm0, [esi+24]
648
	   movq    mm0, [esi+24]
711
	   movq    mm1, mm0
649
	   movq    mm1, mm0
712
	   punpcklwd mm0, mm0
650
	   punpcklwd mm0, mm0
713
	   punpckhwd mm1, mm1
651
	   punpckhwd mm1, mm1
714
	   movq    [edi+48], mm0
652
	   movq    [edi+48], mm0
715
	   movq    [edi+56], mm1
653
	   movq    [edi+56], mm1
716
 
654
 
717
	   movq    mm0, [esi+32]
655
	   movq    mm0, [esi+32]
718
	   movq    mm1, mm0
656
	   movq    mm1, mm0
719
	   punpcklwd mm0, mm0
657
	   punpcklwd mm0, mm0
720
	   punpckhwd mm1, mm1
658
	   punpckhwd mm1, mm1
721
	   movq    [edi+64], mm0
659
	   movq    [edi+64], mm0
722
	   movq    [edi+72], mm1
660
	   movq    [edi+72], mm1
723
 
661
 
724
	   movq    mm0, [esi+40]
662
	   movq    mm0, [esi+40]
725
	   movq    mm1, mm0
663
	   movq    mm1, mm0
726
	   punpcklwd mm0, mm0
664
	   punpcklwd mm0, mm0
727
	   punpckhwd mm1, mm1
665
	   punpckhwd mm1, mm1
728
	   movq    [edi+80], mm0
666
	   movq    [edi+80], mm0
729
	   movq    [edi+88], mm1
667
	   movq    [edi+88], mm1
730
 
668
 
731
 
669
 
732
	   movq    mm0, [esi+48]
670
	   movq    mm0, [esi+48]
733
	   movq    mm1, mm0
671
	   movq    mm1, mm0
734
	   punpcklwd mm0, mm0
672
	   punpcklwd mm0, mm0
735
	   punpckhwd mm1, mm1
673
	   punpckhwd mm1, mm1
736
	   movq    [edi+96], mm0
674
	   movq    [edi+96], mm0
737
	   movq    [edi+104], mm1
675
	   movq    [edi+104], mm1
738
 
676
 
739
	   movq    mm0, [esi+56]
677
	   movq    mm0, [esi+56]
740
	   movq    mm1, mm0
678
	   movq    mm1, mm0
741
	   punpcklwd mm0, mm0
679
	   punpcklwd mm0, mm0
742
	   punpckhwd mm1, mm1
680
	   punpckhwd mm1, mm1
743
	   movq    [edi+112], mm0
681
	   movq    [edi+112], mm0
744
	   movq    [edi+120], mm1
682
	   movq    [edi+120], mm1
745
 
683
 
746
	   ret
684
	   ret
747
endp
685
endp
748
 
686
 
749
align 4
687
align 4
750
proc s8_s_mmx
688
proc s8_s_mmx
751
 
689
 
752
           movq    mm0, [esi]
690
           movq    mm0, [esi]
753
           psubb   mm0, mm7
691
           psubb   mm0, mm7
754
           movq    mm1, mm0
692
           movq    mm1, mm0
755
           punpcklbw mm0, mm0
693
           punpcklbw mm0, mm0
756
           pand mm0, mm6
694
           pand mm0, mm6
757
           punpckhbw mm1, mm1
695
           punpckhbw mm1, mm1
758
           pand mm1, mm6
696
           pand mm1, mm6
759
           movq    [edi], mm0
697
           movq    [edi], mm0
760
           movq    [edi+8], mm1
698
           movq    [edi+8], mm1
761
 
699
 
762
           movq    mm0, [esi+8]
700
           movq    mm0, [esi+8]
763
           psubb   mm0, mm7
701
           psubb   mm0, mm7
764
           movq    mm1, mm0
702
           movq    mm1, mm0
765
           punpcklbw mm0, mm0
703
           punpcklbw mm0, mm0
766
           pand mm0, mm6
704
           pand mm0, mm6
767
           punpckhbw mm1, mm1
705
           punpckhbw mm1, mm1
768
           pand mm1, mm6
706
           pand mm1, mm6
769
           movq    [edi+16], mm0
707
           movq    [edi+16], mm0
770
           movq    [edi+24], mm1
708
           movq    [edi+24], mm1
771
 
709
 
772
           movq    mm0, [esi+16]
710
           movq    mm0, [esi+16]
773
           psubb   mm0, mm7
711
           psubb   mm0, mm7
774
           movq    mm1, mm0
712
           movq    mm1, mm0
775
           punpcklbw mm0, mm0
713
           punpcklbw mm0, mm0
776
           pand mm0, mm6
714
           pand mm0, mm6
777
           punpckhbw mm1, mm1
715
           punpckhbw mm1, mm1
778
           pand mm1, mm6
716
           pand mm1, mm6
779
           movq    [edi+32], mm0
717
           movq    [edi+32], mm0
780
           movq    [edi+40], mm1
718
           movq    [edi+40], mm1
781
 
719
 
782
           movq    mm0, [esi+24]
720
           movq    mm0, [esi+24]
783
           psubb   mm0, mm7
721
           psubb   mm0, mm7
784
           movq    mm1, mm0
722
           movq    mm1, mm0
785
           punpcklbw mm0, mm0
723
           punpcklbw mm0, mm0
786
           pand    mm0, mm6
724
           pand    mm0, mm6
787
           punpckhbw mm1, mm1
725
           punpckhbw mm1, mm1
788
           pand    mm1, mm6
726
           pand    mm1, mm6
789
           movq    [edi+48], mm0
727
           movq    [edi+48], mm0
790
           movq    [edi+56], mm1
728
           movq    [edi+56], mm1
791
 
729
 
792
           ret
730
           ret
793
 
731
 
794
endp
732
endp
795
 
733
 
796
align 4
734
align 4
797
proc m8_s_mmx
735
proc m8_s_mmx
798
 
736
 
799
           movq    mm0, [esi]
737
           movq    mm0, [esi]
800
           psubb   mm0, mm7
738
           psubb   mm0, mm7
801
           movq    mm1, mm0
739
           movq    mm1, mm0
802
           punpcklbw mm0, mm0
740
           punpcklbw mm0, mm0
803
           pand mm0, mm6
741
           pand mm0, mm6
804
           punpckhbw mm1, mm1
742
           punpckhbw mm1, mm1
805
           pand mm1, mm6
743
           pand mm1, mm6
806
           movq mm2, mm0
744
           movq mm2, mm0
807
           punpcklwd mm0, mm0
745
           punpcklwd mm0, mm0
808
           punpckhwd mm2, mm2
746
           punpckhwd mm2, mm2
809
 
747
 
810
           movq mm3, mm1
748
           movq mm3, mm1
811
           punpcklwd mm1, mm1
749
           punpcklwd mm1, mm1
812
           punpckhwd mm3, mm3
750
           punpckhwd mm3, mm3
813
 
751
 
814
           movq    [edi], mm0
752
           movq    [edi], mm0
815
           movq    [edi+8], mm2
753
           movq    [edi+8], mm2
816
           movq    [edi+16], mm1
754
           movq    [edi+16], mm1
817
           movq    [edi+24], mm3
755
           movq    [edi+24], mm3
818
 
756
 
819
           movq    mm0, [esi+8]
757
           movq    mm0, [esi+8]
820
           psubb   mm0, mm7
758
           psubb   mm0, mm7
821
           movq    mm1, mm0
759
           movq    mm1, mm0
822
           punpcklbw mm0, mm0
760
           punpcklbw mm0, mm0
823
           pand mm0, mm6
761
           pand mm0, mm6
824
           punpckhbw mm1, mm1
762
           punpckhbw mm1, mm1
825
           pand mm1, mm6
763
           pand mm1, mm6
826
           movq mm2, mm0
764
           movq mm2, mm0
827
           punpcklwd mm0, mm0
765
           punpcklwd mm0, mm0
828
           punpckhwd mm2, mm2
766
           punpckhwd mm2, mm2
829
 
767
 
830
           movq mm3, mm1
768
           movq mm3, mm1
831
           punpcklwd mm1, mm1
769
           punpcklwd mm1, mm1
832
           punpckhwd mm3, mm3
770
           punpckhwd mm3, mm3
833
 
771
 
834
           movq    [edi+32], mm0
772
           movq    [edi+32], mm0
835
           movq    [edi+40], mm2
773
           movq    [edi+40], mm2
836
           movq    [edi+48], mm1
774
           movq    [edi+48], mm1
837
           movq    [edi+56], mm3
775
           movq    [edi+56], mm3
838
 
776
 
839
           ret
777
           ret
840
endp
778
endp
841
 
779
 
842
 
780
 
843
align 4
781
align 4
844
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
782
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword
845
 
783
 
846
           mov edi, [output]
784
           mov edi, [output]
847
 
785
 
848
           stdcall mix_2_1_mmx, edi, [str0],[str1]
786
           stdcall mix_2_1_mmx, edi, [str0],[str1]
849
           add edi, 128
787
           add edi, 128
850
           add [str0], 128
788
           add [str0], 128
851
           add [str1], 128
789
           add [str1], 128
852
           stdcall mix_2_1_mmx, edi, [str0],[str1]
790
           stdcall mix_2_1_mmx, edi, [str0],[str1]
853
           add edi, 128
791
           add edi, 128
854
           add [str0], 128
792
           add [str0], 128
855
           add [str1], 128
793
           add [str1], 128
856
           stdcall mix_2_1_mmx, edi, [str0],[str1]
794
           stdcall mix_2_1_mmx, edi, [str0],[str1]
857
           add edi, 128
795
           add edi, 128
858
           add [str0], 128
796
           add [str0], 128
859
           add [str1], 128
797
           add [str1], 128
860
           stdcall mix_2_1_mmx, edi, [str0],[str1]
798
           stdcall mix_2_1_mmx, edi, [str0],[str1]
861
 
799
 
862
           ret
800
           ret
863
endp
801
endp
864
 
802
 
865
 
803
 
866
align 4
804
align 4
867
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
805
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword
868
 
806
 
869
	   mov edi, [output]
807
	   mov edi, [output]
870
 
808
 
871
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
809
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
872
	   add edi, 128
810
	   add edi, 128
873
	   add [str0], 128
811
	   add [str0], 128
874
	   add [str1], 128
812
	   add [str1], 128
875
	   add [str2], 128
813
	   add [str2], 128
876
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
814
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
877
	   add edi, 128
815
	   add edi, 128
878
	   add [str0], 128
816
	   add [str0], 128
879
	   add [str1], 128
817
	   add [str1], 128
880
	   add [str2], 128
818
	   add [str2], 128
881
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
819
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
882
	   add edi, 128
820
	   add edi, 128
883
	   add [str0], 128
821
	   add [str0], 128
884
	   add [str1], 128
822
	   add [str1], 128
885
	   add [str2], 128
823
	   add [str2], 128
886
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
824
	   stdcall mix_3_1_mmx, edi, [str0],[str1],[str2]
887
 
825
 
888
	   ret
826
	   ret
889
endp
827
endp
890
 
828
 
891
align 4
829
align 4
892
proc mix_4_1 stdcall, str0:dword, str1:dword,\
830
proc mix_4_1 stdcall, str0:dword, str1:dword,\
893
		      str2:dword, str3:dword
831
		      str2:dword, str3:dword
894
 
832
 
895
	   local output:DWORD
833
	   local output:DWORD
896
 
834
 
897
	   call alloc_mix_buff
835
	   call alloc_mix_buff
898
	   and eax, eax
836
	   and eax, eax
899
	   jz .err
837
	   jz .err
900
	   mov [output], eax
838
	   mov [output], eax
901
 
839
 
902
	   mov edi, eax
840
	   mov edi, eax
903
 
841
 
904
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
842
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
905
	   add edi, 128
843
	   add edi, 128
906
	   add [str0], 128
844
	   add [str0], 128
907
	   add [str1], 128
845
	   add [str1], 128
908
	   add [str2], 128
846
	   add [str2], 128
909
	   add [str3], 128
847
	   add [str3], 128
910
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
848
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
911
	   add edi, 128
849
	   add edi, 128
912
	   add [str0], 128
850
	   add [str0], 128
913
	   add [str1], 128
851
	   add [str1], 128
914
	   add [str2], 128
852
	   add [str2], 128
915
	   add [str3], 128
853
	   add [str3], 128
916
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
854
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
917
	   add edi, 128
855
	   add edi, 128
918
	   add [str0], 128
856
	   add [str0], 128
919
	   add [str1], 128
857
	   add [str1], 128
920
	   add [str2], 128
858
	   add [str2], 128
921
	   add [str3], 128
859
	   add [str3], 128
922
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
860
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
923
	   mov eax, [output]
861
	   mov eax, [output]
924
	   ret
862
	   ret
925
.err:
863
.err:
926
	   xor eax, eax
864
	   xor eax, eax
927
	   ret
865
	   ret
928
endp
866
endp
929
 
867
 
930
 
868
 
931
align 4
869
align 4
932
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
870
proc final_mix stdcall, output:dword, str0:dword, str1:dword,\
933
			str2:dword, str3:dword
871
			str2:dword, str3:dword
934
 
872
 
935
	   mov edi, [output]
873
	   mov edi, [output]
936
 
874
 
937
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
875
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
938
	   add edi, 128
876
	   add edi, 128
939
	   add [str0], 128
877
	   add [str0], 128
940
	   add [str1], 128
878
	   add [str1], 128
941
	   add [str2], 128
879
	   add [str2], 128
942
	   add [str3], 128
880
	   add [str3], 128
943
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
881
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
944
	   add edi, 128
882
	   add edi, 128
945
	   add [str0], 128
883
	   add [str0], 128
946
	   add [str1], 128
884
	   add [str1], 128
947
	   add [str2], 128
885
	   add [str2], 128
948
	   add [str3], 128
886
	   add [str3], 128
949
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
887
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
950
	   add edi, 128
888
	   add edi, 128
951
	   add [str0], 128
889
	   add [str0], 128
952
	   add [str1], 128
890
	   add [str1], 128
953
	   add [str2], 128
891
	   add [str2], 128
954
	   add [str3], 128
892
	   add [str3], 128
955
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
893
	   stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3]
956
 
894
 
957
	   ret
895
	   ret
958
endp
896
endp
959
 
897
 
960
align 4
898
align 4
961
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
899
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword
962
 
900
 
963
           mov edx, [output]
901
           mov edx, [output]
964
           mov eax, [str0]
902
           mov eax, [str0]
965
           mov ecx, [str1]
903
           mov ecx, [str1]
966
 
904
 
967
           movq mm0, [eax]
905
           movq mm0, [eax]
968
           paddsw mm0, [ecx]
906
           paddsw mm0, [ecx]
969
           ; psraw   mm0, 1
907
           ; psraw   mm0, 1
970
           movq [edx], mm0
908
           movq [edx], mm0
971
 
909
 
972
           movq mm1, [eax+8]
910
           movq mm1, [eax+8]
973
           paddsw mm1,[ecx+8]
911
           paddsw mm1,[ecx+8]
974
           ; psraw   mm1, 1
912
           ; psraw   mm1, 1
975
           movq [edx+8], mm1
913
           movq [edx+8], mm1
976
 
914
 
977
           movq mm2, [eax+16]
915
           movq mm2, [eax+16]
978
           paddsw mm2, [ecx+16]
916
           paddsw mm2, [ecx+16]
979
           ; psraw   mm2, 1
917
           ; psraw   mm2, 1
980
           movq [edx+16], mm2
918
           movq [edx+16], mm2
981
 
919
 
982
           movq mm3, [eax+24]
920
           movq mm3, [eax+24]
983
           paddsw mm3, [ecx+24]
921
           paddsw mm3, [ecx+24]
984
           ; psraw   mm3, 1
922
           ; psraw   mm3, 1
985
           movq [edx+24], mm3
923
           movq [edx+24], mm3
986
 
924
 
987
           movq mm0, [eax+32]
925
           movq mm0, [eax+32]
988
           paddsw mm0, [ecx+32]
926
           paddsw mm0, [ecx+32]
989
           ; psraw   mm0, 1
927
           ; psraw   mm0, 1
990
           movq [edx+32], mm0
928
           movq [edx+32], mm0
991
 
929
 
992
           movq mm1, [eax+40]
930
           movq mm1, [eax+40]
993
           paddsw mm1, [ecx+40]
931
           paddsw mm1, [ecx+40]
994
           ; psraw   mm1, 1
932
           ; psraw   mm1, 1
995
           movq [edx+40], mm1
933
           movq [edx+40], mm1
996
 
934
 
997
           movq mm2, [eax+48]
935
           movq mm2, [eax+48]
998
           paddsw mm2, [ecx+48]
936
           paddsw mm2, [ecx+48]
999
           ; psraw   mm2, 1
937
           ; psraw   mm2, 1
1000
           movq [edx+48], mm2
938
           movq [edx+48], mm2
1001
 
939
 
1002
           movq mm3, [eax+56]
940
           movq mm3, [eax+56]
1003
           paddsw mm3, [ecx+56]
941
           paddsw mm3, [ecx+56]
1004
           ; psraw   mm3, 1
942
           ; psraw   mm3, 1
1005
           movq [edx+56], mm3
943
           movq [edx+56], mm3
1006
 
944
 
1007
           movq mm0, [eax+64]
945
           movq mm0, [eax+64]
1008
           paddsw mm0, [ecx+64]
946
           paddsw mm0, [ecx+64]
1009
           ; psraw   mm0, 1
947
           ; psraw   mm0, 1
1010
           movq [edx+64], mm0
948
           movq [edx+64], mm0
1011
 
949
 
1012
           movq mm1, [eax+72]
950
           movq mm1, [eax+72]
1013
           paddsw mm1, [ecx+72]
951
           paddsw mm1, [ecx+72]
1014
           ; psraw   mm1, 1
952
           ; psraw   mm1, 1
1015
           movq [edx+72], mm1
953
           movq [edx+72], mm1
1016
 
954
 
1017
           movq mm2, [eax+80]
955
           movq mm2, [eax+80]
1018
           paddsw mm2, [ecx+80]
956
           paddsw mm2, [ecx+80]
1019
           ; psraw   mm2, 1
957
           ; psraw   mm2, 1
1020
           movq [edx+80], mm2
958
           movq [edx+80], mm2
1021
 
959
 
1022
           movq mm3, [eax+88]
960
           movq mm3, [eax+88]
1023
           paddsw mm3, [ecx+88]
961
           paddsw mm3, [ecx+88]
1024
           ; psraw   mm3, 1
962
           ; psraw   mm3, 1
1025
           movq [edx+88], mm3
963
           movq [edx+88], mm3
1026
 
964
 
1027
           movq mm0, [eax+96]
965
           movq mm0, [eax+96]
1028
           paddsw mm0, [ecx+96]
966
           paddsw mm0, [ecx+96]
1029
           ; psraw   mm0, 1
967
           ; psraw   mm0, 1
1030
           movq [edx+96], mm0
968
           movq [edx+96], mm0
1031
 
969
 
1032
           movq mm1, [eax+104]
970
           movq mm1, [eax+104]
1033
           paddsw mm1, [ecx+104]
971
           paddsw mm1, [ecx+104]
1034
           ; psraw   mm1, 1
972
           ; psraw   mm1, 1
1035
           movq [edx+104], mm1
973
           movq [edx+104], mm1
1036
 
974
 
1037
           movq mm2, [eax+112]
975
           movq mm2, [eax+112]
1038
           paddsw mm2, [ecx+112]
976
           paddsw mm2, [ecx+112]
1039
           ; psraw   mm2, 1
977
           ; psraw   mm2, 1
1040
           movq [edx+112], mm2
978
           movq [edx+112], mm2
1041
 
979
 
1042
           movq mm3, [eax+120]
980
           movq mm3, [eax+120]
1043
           paddsw mm3, [ecx+120]
981
           paddsw mm3, [ecx+120]
1044
           ; psraw   mm3, 1
982
           ; psraw   mm3, 1
1045
           movq [edx+120], mm3
983
           movq [edx+120], mm3
1046
 
984
 
1047
           ret
985
           ret
1048
endp
986
endp
1049
 
987
 
1050
align 4
988
align 4
1051
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
989
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword
1052
 
990
 
1053
           mov edx, [output]
991
           mov edx, [output]
1054
           mov eax, [str0]
992
           mov eax, [str0]
1055
           mov ebx, [str1]
993
           mov ebx, [str1]
1056
           mov ecx, [str2]
994
           mov ecx, [str2]
1057
 
995
 
1058
           movq mm0, [eax]
996
           movq mm0, [eax]
1059
           paddsw mm0, [ebx]
997
           paddsw mm0, [ebx]
1060
           paddsw mm0, [ecx]
998
           paddsw mm0, [ecx]
1061
           movq [edx], mm0
999
           movq [edx], mm0
1062
 
1000
 
1063
           movq mm1, [eax+8]
1001
           movq mm1, [eax+8]
1064
           paddsw mm1,[ebx+8]
1002
           paddsw mm1,[ebx+8]
1065
           paddsw mm1,[ecx+8]
1003
           paddsw mm1,[ecx+8]
1066
           movq [edx+8], mm1
1004
           movq [edx+8], mm1
1067
 
1005
 
1068
           movq mm2, [eax+16]
1006
           movq mm2, [eax+16]
1069
           paddsw mm2, [ebx+16]
1007
           paddsw mm2, [ebx+16]
1070
           paddsw mm2, [ecx+16]
1008
           paddsw mm2, [ecx+16]
1071
           movq [edx+16], mm2
1009
           movq [edx+16], mm2
1072
 
1010
 
1073
           movq mm3, [eax+24]
1011
           movq mm3, [eax+24]
1074
           paddsw mm3, [ebx+24]
1012
           paddsw mm3, [ebx+24]
1075
           paddsw mm3, [ecx+24]
1013
           paddsw mm3, [ecx+24]
1076
           movq [edx+24], mm3
1014
           movq [edx+24], mm3
1077
 
1015
 
1078
           movq mm0, [eax+32]
1016
           movq mm0, [eax+32]
1079
           paddsw mm0, [ebx+32]
1017
           paddsw mm0, [ebx+32]
1080
           paddsw mm0, [ecx+32]
1018
           paddsw mm0, [ecx+32]
1081
           movq [edx+32], mm0
1019
           movq [edx+32], mm0
1082
 
1020
 
1083
           movq mm1, [eax+40]
1021
           movq mm1, [eax+40]
1084
           paddsw mm1, [ebx+40]
1022
           paddsw mm1, [ebx+40]
1085
           paddsw mm1, [ecx+40]
1023
           paddsw mm1, [ecx+40]
1086
           movq [edx+40], mm1
1024
           movq [edx+40], mm1
1087
 
1025
 
1088
           movq mm2, [eax+48]
1026
           movq mm2, [eax+48]
1089
           paddsw mm2, [ebx+48]
1027
           paddsw mm2, [ebx+48]
1090
           paddsw mm2, [ecx+48]
1028
           paddsw mm2, [ecx+48]
1091
           movq [edx+48], mm2
1029
           movq [edx+48], mm2
1092
 
1030
 
1093
           movq mm3, [eax+56]
1031
           movq mm3, [eax+56]
1094
           paddsw mm3, [ebx+56]
1032
           paddsw mm3, [ebx+56]
1095
           paddsw mm3, [ecx+56]
1033
           paddsw mm3, [ecx+56]
1096
           movq [edx+56], mm3
1034
           movq [edx+56], mm3
1097
 
1035
 
1098
           movq mm0, [eax+64]
1036
           movq mm0, [eax+64]
1099
           paddsw mm0, [ebx+64]
1037
           paddsw mm0, [ebx+64]
1100
           paddsw mm0, [ecx+64]
1038
           paddsw mm0, [ecx+64]
1101
           movq [edx+64], mm0
1039
           movq [edx+64], mm0
1102
 
1040
 
1103
           movq mm1, [eax+72]
1041
           movq mm1, [eax+72]
1104
           paddsw mm1, [ebx+72]
1042
           paddsw mm1, [ebx+72]
1105
           paddsw mm1, [ecx+72]
1043
           paddsw mm1, [ecx+72]
1106
           movq [edx+72], mm1
1044
           movq [edx+72], mm1
1107
 
1045
 
1108
           movq mm2, [eax+80]
1046
           movq mm2, [eax+80]
1109
           paddsw mm2, [ebx+80]
1047
           paddsw mm2, [ebx+80]
1110
           paddsw mm2, [ecx+80]
1048
           paddsw mm2, [ecx+80]
1111
           movq [edx+80], mm2
1049
           movq [edx+80], mm2
1112
 
1050
 
1113
           movq mm3, [eax+88]
1051
           movq mm3, [eax+88]
1114
           paddsw mm3, [ebx+88]
1052
           paddsw mm3, [ebx+88]
1115
           paddsw mm3, [ecx+88]
1053
           paddsw mm3, [ecx+88]
1116
           movq [edx+88], mm3
1054
           movq [edx+88], mm3
1117
 
1055
 
1118
           movq mm0, [eax+96]
1056
           movq mm0, [eax+96]
1119
           paddsw mm0, [ebx+96]
1057
           paddsw mm0, [ebx+96]
1120
           paddsw mm0, [ecx+96]
1058
           paddsw mm0, [ecx+96]
1121
           movq [edx+96], mm0
1059
           movq [edx+96], mm0
1122
 
1060
 
1123
           movq mm1, [eax+104]
1061
           movq mm1, [eax+104]
1124
           paddsw mm1, [ebx+104]
1062
           paddsw mm1, [ebx+104]
1125
           paddsw mm1, [ecx+104]
1063
           paddsw mm1, [ecx+104]
1126
           movq [edx+104], mm1
1064
           movq [edx+104], mm1
1127
 
1065
 
1128
           movq mm2, [eax+112]
1066
           movq mm2, [eax+112]
1129
           paddsw mm2, [ebx+112]
1067
           paddsw mm2, [ebx+112]
1130
           paddsw mm2, [ecx+112]
1068
           paddsw mm2, [ecx+112]
1131
           movq [edx+112], mm2
1069
           movq [edx+112], mm2
1132
 
1070
 
1133
           movq mm3, [eax+120]
1071
           movq mm3, [eax+120]
1134
           paddsw mm3, [ebx+120]
1072
           paddsw mm3, [ebx+120]
1135
           paddsw mm3, [ecx+120]
1073
           paddsw mm3, [ecx+120]
1136
           movq [edx+120], mm3
1074
           movq [edx+120], mm3
1137
 
1075
 
1138
           ret
1076
           ret
1139
endp
1077
endp
1140
 
1078
 
1141
align 4
1079
align 4
1142
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
1080
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\
1143
                          str2:dword, str3:dword
1081
                          str2:dword, str3:dword
1144
 
1082
 
1145
           mov edx, [output]
1083
           mov edx, [output]
1146
           mov esi, [str0]
1084
           mov esi, [str0]
1147
           mov eax, [str1]
1085
           mov eax, [str1]
1148
           mov ebx, [str2]
1086
           mov ebx, [str2]
1149
           mov ecx, [str3]
1087
           mov ecx, [str3]
1150
 
1088
 
1151
           movq mm0, [esi]
1089
           movq mm0, [esi]
1152
           movq mm1, [eax]
1090
           movq mm1, [eax]
1153
           paddsw mm0, [ebx]
1091
           paddsw mm0, [ebx]
1154
           paddsw mm1, [ecx]
1092
           paddsw mm1, [ecx]
1155
           paddsw mm0, mm1
1093
           paddsw mm0, mm1
1156
           movq [edx], mm0
1094
           movq [edx], mm0
1157
 
1095
 
1158
           movq mm2, [esi+8]
1096
           movq mm2, [esi+8]
1159
           movq mm3, [eax+8]
1097
           movq mm3, [eax+8]
1160
           paddsw mm2, [ebx+8]
1098
           paddsw mm2, [ebx+8]
1161
           paddsw mm3, [ecx+8]
1099
           paddsw mm3, [ecx+8]
1162
           paddsw mm2, mm3
1100
           paddsw mm2, mm3
1163
           movq [edx+8], mm2
1101
           movq [edx+8], mm2
1164
 
1102
 
1165
           movq mm0, [esi+16]
1103
           movq mm0, [esi+16]
1166
           movq mm1, [eax+16]
1104
           movq mm1, [eax+16]
1167
           paddsw mm0, [ebx+16]
1105
           paddsw mm0, [ebx+16]
1168
           paddsw mm1, [ecx+16]
1106
           paddsw mm1, [ecx+16]
1169
           paddsw mm0, mm1
1107
           paddsw mm0, mm1
1170
           movq [edx+16], mm0
1108
           movq [edx+16], mm0
1171
 
1109
 
1172
           movq mm2, [esi+24]
1110
           movq mm2, [esi+24]
1173
           movq mm3, [eax+24]
1111
           movq mm3, [eax+24]
1174
           paddsw mm2, [ebx+24]
1112
           paddsw mm2, [ebx+24]
1175
           paddsw mm3, [ecx+24]
1113
           paddsw mm3, [ecx+24]
1176
           paddsw mm2, mm3
1114
           paddsw mm2, mm3
1177
           movq [edx+24], mm2
1115
           movq [edx+24], mm2
1178
 
1116
 
1179
           movq mm0, [esi+32]
1117
           movq mm0, [esi+32]
1180
           movq mm1, [eax+32]
1118
           movq mm1, [eax+32]
1181
           paddsw mm0, [ebx+32]
1119
           paddsw mm0, [ebx+32]
1182
           paddsw mm1, [ecx+32]
1120
           paddsw mm1, [ecx+32]
1183
           paddsw mm0, mm1
1121
           paddsw mm0, mm1
1184
           movq [edx+32], mm0
1122
           movq [edx+32], mm0
1185
 
1123
 
1186
           movq mm2, [esi+40]
1124
           movq mm2, [esi+40]
1187
           movq mm3, [eax+40]
1125
           movq mm3, [eax+40]
1188
           paddsw mm2, [ebx+40]
1126
           paddsw mm2, [ebx+40]
1189
           paddsw mm3, [ecx+40]
1127
           paddsw mm3, [ecx+40]
1190
           paddsw mm2, mm3
1128
           paddsw mm2, mm3
1191
           movq [edx+40], mm2
1129
           movq [edx+40], mm2
1192
 
1130
 
1193
           movq mm0, [esi+48]
1131
           movq mm0, [esi+48]
1194
           movq mm1, [eax+48]
1132
           movq mm1, [eax+48]
1195
           paddsw mm0, [ebx+48]
1133
           paddsw mm0, [ebx+48]
1196
           paddsw mm1, [ecx+48]
1134
           paddsw mm1, [ecx+48]
1197
           paddsw mm0, mm1
1135
           paddsw mm0, mm1
1198
           movq [edx+48], mm0
1136
           movq [edx+48], mm0
1199
 
1137
 
1200
           movq mm2, [esi+56]
1138
           movq mm2, [esi+56]
1201
           movq mm3, [eax+56]
1139
           movq mm3, [eax+56]
1202
           paddsw mm2, [ebx+56]
1140
           paddsw mm2, [ebx+56]
1203
           paddsw mm3, [ecx+56]
1141
           paddsw mm3, [ecx+56]
1204
           paddsw mm2, mm3
1142
           paddsw mm2, mm3
1205
           movq [edx+56], mm2
1143
           movq [edx+56], mm2
1206
 
1144
 
1207
           movq mm0, [esi+64]
1145
           movq mm0, [esi+64]
1208
           movq mm1, [eax+64]
1146
           movq mm1, [eax+64]
1209
           paddsw mm0, [ebx+64]
1147
           paddsw mm0, [ebx+64]
1210
           paddsw mm1, [ecx+64]
1148
           paddsw mm1, [ecx+64]
1211
           paddsw mm0, mm1
1149
           paddsw mm0, mm1
1212
           movq [edx+64], mm0
1150
           movq [edx+64], mm0
1213
 
1151
 
1214
           movq mm2, [esi+72]
1152
           movq mm2, [esi+72]
1215
           movq mm3, [eax+72]
1153
           movq mm3, [eax+72]
1216
           paddsw mm2, [ebx+72]
1154
           paddsw mm2, [ebx+72]
1217
           paddsw mm3, [ecx+72]
1155
           paddsw mm3, [ecx+72]
1218
           paddsw mm2, mm3
1156
           paddsw mm2, mm3
1219
           movq [edx+72], mm2
1157
           movq [edx+72], mm2
1220
 
1158
 
1221
           movq mm2, [esi+80]
1159
           movq mm2, [esi+80]
1222
           movq mm3, [eax+80]
1160
           movq mm3, [eax+80]
1223
           paddsw mm2, [ebx+80]
1161
           paddsw mm2, [ebx+80]
1224
           paddsw mm3, [ecx+80]
1162
           paddsw mm3, [ecx+80]
1225
           paddsw mm2, mm3
1163
           paddsw mm2, mm3
1226
           movq [edx+80], mm2
1164
           movq [edx+80], mm2
1227
 
1165
 
1228
           movq mm2, [esi+88]
1166
           movq mm2, [esi+88]
1229
           movq mm3, [eax+88]
1167
           movq mm3, [eax+88]
1230
           paddsw mm2, [ebx+88]
1168
           paddsw mm2, [ebx+88]
1231
           paddsw mm3, [ecx+88]
1169
           paddsw mm3, [ecx+88]
1232
           paddsw mm2, mm3
1170
           paddsw mm2, mm3
1233
           movq [edx+88], mm2
1171
           movq [edx+88], mm2
1234
 
1172
 
1235
           movq mm2, [esi+96]
1173
           movq mm2, [esi+96]
1236
           movq mm3, [eax+96]
1174
           movq mm3, [eax+96]
1237
           paddsw mm2, [ebx+96]
1175
           paddsw mm2, [ebx+96]
1238
           paddsw mm3, [ecx+96]
1176
           paddsw mm3, [ecx+96]
1239
           paddsw mm2, mm3
1177
           paddsw mm2, mm3
1240
           movq [edx+96], mm2
1178
           movq [edx+96], mm2
1241
 
1179
 
1242
           movq mm2, [esi+104]
1180
           movq mm2, [esi+104]
1243
           movq mm3, [eax+104]
1181
           movq mm3, [eax+104]
1244
           paddsw mm2, [ebx+104]
1182
           paddsw mm2, [ebx+104]
1245
           paddsw mm3, [ecx+104]
1183
           paddsw mm3, [ecx+104]
1246
           paddsw mm2, mm3
1184
           paddsw mm2, mm3
1247
           movq [edx+104], mm2
1185
           movq [edx+104], mm2
1248
 
1186
 
1249
           movq mm2, [esi+112]
1187
           movq mm2, [esi+112]
1250
           movq mm3, [eax+112]
1188
           movq mm3, [eax+112]
1251
           paddsw mm2, [ebx+112]
1189
           paddsw mm2, [ebx+112]
1252
           paddsw mm3, [ecx+112]
1190
           paddsw mm3, [ecx+112]
1253
           paddsw mm2, mm3
1191
           paddsw mm2, mm3
1254
           movq [edx+112], mm2
1192
           movq [edx+112], mm2
1255
 
1193
 
1256
           movq mm2, [esi+120]
1194
           movq mm2, [esi+120]
1257
           movq mm3, [eax+120]
1195
           movq mm3, [eax+120]
1258
           paddsw mm2, [ebx+120]
1196
           paddsw mm2, [ebx+120]
1259
           paddsw mm3, [ecx+120]
1197
           paddsw mm3, [ecx+120]
1260
           paddsw mm2, mm3
1198
           paddsw mm2, mm3
1261
           movq [edx+120], mm2
1199
           movq [edx+120], mm2
1262
 
1200
 
1263
           ret
1201
           ret
1264
endp
1202
endp
1265
 
1203
 
1266
align 4
1204
align 4
1267
proc copy_mem stdcall, output:dword, input:dword
1205
proc copy_mem stdcall, output:dword, input:dword
1268
 
1206
 
1269
	   mov edi, [output]
1207
	   mov edi, [output]
1270
	   mov esi, [input]
1208
	   mov esi, [input]
1271
	   mov ecx, 0x80
1209
	   mov ecx, 0x80
1272
.l1:
1210
.l1:
1273
	   mov eax, [esi]
1211
	   mov eax, [esi]
1274
	   mov [edi], eax
1212
	   mov [edi], eax
1275
	   add esi, 4
1213
	   add esi, 4
1276
	   add edi, 4
1214
	   add edi, 4
1277
	   loop .l1
1215
	   loop .l1
1278
 
1216
 
1279
	   ret
1217
	   ret
1280
endp
1218
endp
1281
 
1219
 
1282
proc memcpy
1220
proc memcpy
1283
@@:
1221
@@:
1284
	   mov eax, [esi]
1222
	   mov eax, [esi]
1285
	   mov [edi], eax
1223
	   mov [edi], eax
1286
	   add esi, 4
1224
	   add esi, 4
1287
	   add edi, 4
1225
	   add edi, 4
1288
	   dec ecx
1226
	   dec ecx
1289
	   jnz @B
1227
	   jnz @B
1290
	   ret
1228
	   ret
1291
endp
1229
endp