Rev 2288 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 2288 | Rev 2455 | ||
---|---|---|---|
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
2 | ;; ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2007. All rights reserved. ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2011. All rights reserved. ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
5 | ;; ;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
7 | 7 | ||
8 | if used mmx128_mix_2 |
8 | if used mmx128_mix_2 |
9 | 9 | ||
10 | align 4 |
10 | align 4 |
11 | mmx128_mix_2: |
11 | mmx128_mix_2: |
12 | prefetcht1 [eax+128] |
12 | prefetcht1 [eax+128] |
13 | prefetcht1 [ebx+128] |
13 | prefetcht1 [ebx+128] |
14 | 14 | ||
15 | movaps xmm0, [eax] |
15 | movaps xmm0, [eax] |
16 | movaps xmm1, [eax+16] |
16 | movaps xmm1, [eax+16] |
17 | movaps xmm2, [eax+32] |
17 | movaps xmm2, [eax+32] |
18 | movaps xmm3, [eax+48] |
18 | movaps xmm3, [eax+48] |
19 | movaps xmm4, [eax+64] |
19 | movaps xmm4, [eax+64] |
20 | movaps xmm5, [eax+80] |
20 | movaps xmm5, [eax+80] |
21 | movaps xmm6, [eax+96] |
21 | movaps xmm6, [eax+96] |
22 | movaps xmm7, [eax+112] |
22 | movaps xmm7, [eax+112] |
23 | 23 | ||
24 | paddsw xmm0, [ebx] |
24 | paddsw xmm0, [ebx] |
25 | movaps [edi], xmm0 |
25 | movaps [edi], xmm0 |
26 | paddsw xmm1, [ebx+16] |
26 | paddsw xmm1, [ebx+16] |
27 | movaps [edi+16], xmm1 |
27 | movaps [edi+16], xmm1 |
28 | paddsw xmm2, [ebx+32] |
28 | paddsw xmm2, [ebx+32] |
29 | movaps [edi+32], xmm2 |
29 | movaps [edi+32], xmm2 |
30 | paddsw xmm3, [ebx+48] |
30 | paddsw xmm3, [ebx+48] |
31 | movaps [edi+48], xmm3 |
31 | movaps [edi+48], xmm3 |
32 | paddsw xmm4, [ebx+64] |
32 | paddsw xmm4, [ebx+64] |
33 | movaps [edi+64], xmm4 |
33 | movaps [edi+64], xmm4 |
34 | paddsw xmm5, [ebx+80] |
34 | paddsw xmm5, [ebx+80] |
35 | movaps [edi+80], xmm5 |
35 | movaps [edi+80], xmm5 |
36 | paddsw xmm6, [ebx+96] |
36 | paddsw xmm6, [ebx+96] |
37 | movaps [edi+96], xmm6 |
37 | movaps [edi+96], xmm6 |
38 | paddsw xmm7, [ebx+112] |
38 | paddsw xmm7, [ebx+112] |
39 | movaps [edi+112], xmm7 |
39 | movaps [edi+112], xmm7 |
40 | ret |
40 | ret |
41 | 41 | ||
42 | align 4 |
42 | align 4 |
43 | mmx128_mix_3: |
43 | mmx128_mix_3: |
44 | prefetcht1 [eax+128] |
44 | prefetcht1 [eax+128] |
45 | prefetcht1 [ebx+128] |
45 | prefetcht1 [ebx+128] |
46 | prefetcht1 [ecx+128] |
46 | prefetcht1 [ecx+128] |
47 | 47 | ||
48 | movaps xmm0, [eax] |
48 | movaps xmm0, [eax] |
49 | movaps xmm1, [eax+16] |
49 | movaps xmm1, [eax+16] |
50 | movaps xmm2, [eax+32] |
50 | movaps xmm2, [eax+32] |
51 | movaps xmm3, [eax+48] |
51 | movaps xmm3, [eax+48] |
52 | movaps xmm4, [eax+64] |
52 | movaps xmm4, [eax+64] |
53 | movaps xmm5, [eax+80] |
53 | movaps xmm5, [eax+80] |
54 | movaps xmm6, [eax+96] |
54 | movaps xmm6, [eax+96] |
55 | movaps xmm7, [eax+112] |
55 | movaps xmm7, [eax+112] |
56 | 56 | ||
57 | paddsw xmm0, [ebx] |
57 | paddsw xmm0, [ebx] |
58 | paddsw xmm1, [ebx+16] |
58 | paddsw xmm1, [ebx+16] |
59 | paddsw xmm2, [ebx+32] |
59 | paddsw xmm2, [ebx+32] |
60 | paddsw xmm3, [ebx+48] |
60 | paddsw xmm3, [ebx+48] |
61 | paddsw xmm4, [ebx+64] |
61 | paddsw xmm4, [ebx+64] |
62 | paddsw xmm5, [ebx+80] |
62 | paddsw xmm5, [ebx+80] |
63 | paddsw xmm6, [ebx+96] |
63 | paddsw xmm6, [ebx+96] |
64 | paddsw xmm7, [ebx+112] |
64 | paddsw xmm7, [ebx+112] |
65 | 65 | ||
66 | paddsw xmm0, [ecx] |
66 | paddsw xmm0, [ecx] |
67 | movaps [edi], xmm0 |
67 | movaps [edi], xmm0 |
68 | paddsw xmm1, [ecx+16] |
68 | paddsw xmm1, [ecx+16] |
69 | movaps [edi+16], xmm1 |
69 | movaps [edi+16], xmm1 |
70 | paddsw xmm2, [ecx+32] |
70 | paddsw xmm2, [ecx+32] |
71 | movaps [edi+32], xmm2 |
71 | movaps [edi+32], xmm2 |
72 | paddsw xmm3, [ecx+48] |
72 | paddsw xmm3, [ecx+48] |
73 | movaps [edi+48], xmm3 |
73 | movaps [edi+48], xmm3 |
74 | paddsw xmm4, [ecx+64] |
74 | paddsw xmm4, [ecx+64] |
75 | movaps [edi+64], xmm4 |
75 | movaps [edi+64], xmm4 |
76 | paddsw xmm5, [ecx+80] |
76 | paddsw xmm5, [ecx+80] |
77 | movaps [edi+80], xmm5 |
77 | movaps [edi+80], xmm5 |
78 | paddsw xmm6, [ecx+96] |
78 | paddsw xmm6, [ecx+96] |
79 | movaps [edi+96], xmm6 |
79 | movaps [edi+96], xmm6 |
80 | paddsw xmm7, [ecx+112] |
80 | paddsw xmm7, [ecx+112] |
81 | movaps [edi+112], xmm7 |
81 | movaps [edi+112], xmm7 |
82 | ret |
82 | ret |
83 | 83 | ||
84 | align 4 |
84 | align 4 |
85 | mmx128_mix_4: |
85 | mmx128_mix_4: |
86 | prefetcht1 [eax+128] |
86 | prefetcht1 [eax+128] |
87 | prefetcht1 [ebx+128] |
87 | prefetcht1 [ebx+128] |
88 | prefetcht1 [ecx+128] |
88 | prefetcht1 [ecx+128] |
89 | prefetcht1 [edx+128] |
89 | prefetcht1 [edx+128] |
90 | 90 | ||
91 | movaps xmm0, [eax] |
91 | movaps xmm0, [eax] |
92 | movaps xmm2, [eax+16] |
92 | movaps xmm2, [eax+16] |
93 | movaps xmm4, [eax+32] |
93 | movaps xmm4, [eax+32] |
94 | movaps xmm6, [eax+48] |
94 | movaps xmm6, [eax+48] |
95 | movaps xmm1, [ebx] |
95 | movaps xmm1, [ebx] |
96 | movaps xmm3, [ebx+16] |
96 | movaps xmm3, [ebx+16] |
97 | movaps xmm5, [ebx+32] |
97 | movaps xmm5, [ebx+32] |
98 | movaps xmm7, [ebx+48] |
98 | movaps xmm7, [ebx+48] |
99 | 99 | ||
100 | paddsw xmm0, [ecx] |
100 | paddsw xmm0, [ecx] |
101 | paddsw xmm2, [ecx+16] |
101 | paddsw xmm2, [ecx+16] |
102 | paddsw xmm4, [ecx+32] |
102 | paddsw xmm4, [ecx+32] |
103 | paddsw xmm6, [ecx+48] |
103 | paddsw xmm6, [ecx+48] |
104 | paddsw xmm1, [edx] |
104 | paddsw xmm1, [edx] |
105 | paddsw xmm3, [edx+16] |
105 | paddsw xmm3, [edx+16] |
106 | paddsw xmm5, [edx+32] |
106 | paddsw xmm5, [edx+32] |
107 | paddsw xmm7, [edx+48] |
107 | paddsw xmm7, [edx+48] |
108 | 108 | ||
109 | paddsw xmm0, xmm1 |
109 | paddsw xmm0, xmm1 |
110 | movaps [edi], xmm0 |
110 | movaps [edi], xmm0 |
111 | paddsw xmm2, xmm3 |
111 | paddsw xmm2, xmm3 |
112 | movaps [edi+16], xmm2 |
112 | movaps [edi+16], xmm2 |
113 | paddsw xmm4, xmm5 |
113 | paddsw xmm4, xmm5 |
114 | movaps [edi+32], xmm4 |
114 | movaps [edi+32], xmm4 |
115 | paddsw xmm6, xmm7 |
115 | paddsw xmm6, xmm7 |
116 | movaps [edi+48], xmm6 |
116 | movaps [edi+48], xmm6 |
117 | 117 | ||
118 | movaps xmm0, [eax+64] |
118 | movaps xmm0, [eax+64] |
119 | movaps xmm2, [eax+80] |
119 | movaps xmm2, [eax+80] |
120 | movaps xmm4, [eax+96] |
120 | movaps xmm4, [eax+96] |
121 | movaps xmm6, [eax+112] |
121 | movaps xmm6, [eax+112] |
122 | 122 | ||
123 | movaps xmm1, [ebx+64] |
123 | movaps xmm1, [ebx+64] |
124 | movaps xmm3, [ebx+80] |
124 | movaps xmm3, [ebx+80] |
125 | movaps xmm5, [ebx+96] |
125 | movaps xmm5, [ebx+96] |
126 | movaps xmm7, [ebx+112] |
126 | movaps xmm7, [ebx+112] |
127 | paddsw xmm0, [ecx+64] |
127 | paddsw xmm0, [ecx+64] |
128 | paddsw xmm2, [ecx+80] |
128 | paddsw xmm2, [ecx+80] |
129 | paddsw xmm4, [ecx+96] |
129 | paddsw xmm4, [ecx+96] |
130 | paddsw xmm6, [ecx+112] |
130 | paddsw xmm6, [ecx+112] |
131 | 131 | ||
132 | paddsw xmm1, [edx+64] |
132 | paddsw xmm1, [edx+64] |
133 | paddsw xmm3, [edx+80] |
133 | paddsw xmm3, [edx+80] |
134 | paddsw xmm5, [edx+96] |
134 | paddsw xmm5, [edx+96] |
135 | paddsw xmm7, [edx+112] |
135 | paddsw xmm7, [edx+112] |
136 | paddsw xmm0, xmm1 |
136 | paddsw xmm0, xmm1 |
137 | movaps [edi+64], xmm0 |
137 | movaps [edi+64], xmm0 |
138 | paddsw xmm2, xmm3 |
138 | paddsw xmm2, xmm3 |
139 | movaps [edi+80], xmm2 |
139 | movaps [edi+80], xmm2 |
140 | paddsw xmm4, xmm5 |
140 | paddsw xmm4, xmm5 |
141 | movaps [edi+96], xmm4 |
141 | movaps [edi+96], xmm4 |
142 | paddsw xmm6, xmm7 |
142 | paddsw xmm6, xmm7 |
143 | movaps [edi+112], xmm6 |
143 | movaps [edi+112], xmm6 |
144 | ret |
144 | ret |
145 | end if |
145 | end if |