Subversion Repositories Kolibri OS

Rev

Rev 387 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
444 serge 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
3
;; Copyright (C) KolibriOS team 2004-2007. All rights reserved. ;;
4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 serge 7
 
8
if used mmx128_mix_2
9
 
10
align 4
11
mmx128_mix_2:
12
           prefetcht1 [eax+128]
13
           prefetcht1 [ebx+128]
14
 
15
           movaps xmm0, [eax]
16
           movaps xmm1, [eax+16]
17
           movaps xmm2, [eax+32]
18
           movaps xmm3, [eax+48]
19
           movaps xmm4, [eax+64]
20
           movaps xmm5, [eax+80]
21
           movaps xmm6, [eax+96]
22
           movaps xmm7, [eax+112]
23
 
24
           paddsw xmm0, [ebx]
25
           movaps [edi], xmm0
26
           paddsw xmm1,[ebx+16]
27
           movaps [edi+16], xmm1
28
           paddsw xmm2, [ebx+32]
29
           movaps [edi+32], xmm2
30
           paddsw xmm3, [ebx+48]
31
           movaps [edi+48], xmm3
32
           paddsw xmm4, [ebx+64]
33
           movaps [edi+64], xmm4
34
           paddsw xmm5, [ebx+80]
35
           movaps [edi+80], xmm5
36
           paddsw xmm6, [ebx+96]
37
           movaps [edi+96], xmm6
38
           paddsw xmm7, [ebx+112]
39
           movaps [edi+112], xmm7
40
           ret
41
 
42
align 4
43
mmx128_mix_3:
44
           prefetcht1 [eax+128]
45
           prefetcht1 [ebx+128]
46
           prefetcht1 [ecx+128]
47
 
48
           movaps xmm0, [eax]
49
           movaps xmm1, [eax+16]
50
           movaps xmm2, [eax+32]
51
           movaps xmm3, [eax+48]
52
           movaps xmm4, [eax+64]
53
           movaps xmm5, [eax+80]
54
           movaps xmm6, [eax+96]
55
           movaps xmm7, [eax+112]
56
 
57
           paddsw xmm0, [ebx]
58
           paddsw xmm1, [ebx+16]
59
           paddsw xmm2, [ebx+32]
60
           paddsw xmm3, [ebx+48]
61
           paddsw xmm4, [ebx+64]
62
           paddsw xmm5, [ebx+80]
63
           paddsw xmm6, [ebx+96]
64
           paddsw xmm7, [ebx+112]
65
 
66
           paddsw xmm0, [ecx]
67
           movaps [edi], xmm0
68
           paddsw xmm1, [ecx+16]
69
           movaps [edi+16], xmm1
70
           paddsw xmm2, [ecx+32]
71
           movaps [edi+32], xmm2
72
           paddsw xmm3, [ecx+48]
73
           movaps [edi+48], xmm3
74
           paddsw xmm4, [ecx+64]
75
           movaps [edi+64], xmm4
76
           paddsw xmm5, [ecx+80]
77
           movaps [edi+80], xmm5
78
           paddsw xmm6, [ecx+96]
79
           movaps [edi+96], xmm6
80
           paddsw xmm7, [ecx+112]
81
           movaps [edi+112], xmm7
82
           ret
83
 
84
align 4
85
mmx128_mix_4:
86
           prefetcht1 [eax+128]
87
           prefetcht1 [ebx+128]
88
           prefetcht1 [ecx+128]
89
           prefetcht1 [edx+128]
90
 
91
           movaps xmm0, [eax]
92
           movaps xmm2, [eax+16]
93
           movaps xmm4, [eax+32]
94
           movaps xmm6, [eax+48]
95
           movaps xmm1, [ebx]
96
           movaps xmm3, [ebx+16]
97
           movaps xmm5, [ebx+32]
98
           movaps xmm7, [ebx+48]
99
 
100
           paddsw xmm0, [ecx]
101
           paddsw xmm2, [ecx+16]
102
           paddsw xmm4, [ecx+32]
103
           paddsw xmm6, [ecx+48]
104
           paddsw xmm1, [edx]
105
           paddsw xmm3, [edx+16]
106
           paddsw xmm5, [edx+32]
107
           paddsw xmm7, [edx+48]
108
 
109
           paddsw xmm0, xmm1
110
           movaps [edi], xmm0
111
           paddsw xmm2, xmm3
112
           movaps [edi+16], xmm2
113
           paddsw xmm4, xmm5
114
           movaps [edi+32], xmm4
115
           paddsw xmm6, xmm7
116
           movaps [edi+48], xmm6
117
 
118
           movaps xmm0, [eax+64]
119
           movaps xmm2, [eax+80]
120
           movaps xmm4, [eax+96]
121
           movaps xmm6, [eax+112]
122
 
123
           movaps xmm1, [ebx+64]
124
           movaps xmm3, [ebx+80]
125
           movaps xmm5, [ebx+96]
126
           movaps xmm7, [ebx+112]
127
           paddsw xmm0, [ecx+64]
128
           paddsw xmm2, [ecx+80]
129
           paddsw xmm4, [ecx+96]
130
           paddsw xmm6, [ecx+112]
131
 
132
           paddsw xmm1, [edx+64]
133
           paddsw xmm3, [edx+80]
134
           paddsw xmm5, [edx+96]
135
           paddsw xmm7, [edx+112]
136
           paddsw xmm0, xmm1
137
           movaps [edi+64], xmm0
138
           paddsw xmm2, xmm3
139
           movaps [edi+80], xmm2
140
           paddsw xmm4, xmm5
141
           movaps [edi+96], xmm4
142
           paddsw xmm6, xmm7
143
           movaps [edi+112], xmm6
144
           ret
145
end if