Subversion Repositories Kolibri OS

Rev

Rev 2288 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2288 Rev 2455
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
2
;;                                                              ;;
3
;; Copyright (C) KolibriOS team 2004-2007. All rights reserved. ;;
3
;; Copyright (C) KolibriOS team 2004-2011. All rights reserved. ;;
4
;; Distributed under terms of the GNU General Public License    ;;
4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
7
 
8
if used mmx128_mix_2
8
if used mmx128_mix_2
9
 
9
 
10
align 4
10
align 4
11
mmx128_mix_2:
11
mmx128_mix_2:
12
        prefetcht1 [eax+128]
12
        prefetcht1 [eax+128]
13
        prefetcht1 [ebx+128]
13
        prefetcht1 [ebx+128]
14
 
14
 
15
        movaps  xmm0, [eax]
15
        movaps  xmm0, [eax]
16
        movaps  xmm1, [eax+16]
16
        movaps  xmm1, [eax+16]
17
        movaps  xmm2, [eax+32]
17
        movaps  xmm2, [eax+32]
18
        movaps  xmm3, [eax+48]
18
        movaps  xmm3, [eax+48]
19
        movaps  xmm4, [eax+64]
19
        movaps  xmm4, [eax+64]
20
        movaps  xmm5, [eax+80]
20
        movaps  xmm5, [eax+80]
21
        movaps  xmm6, [eax+96]
21
        movaps  xmm6, [eax+96]
22
        movaps  xmm7, [eax+112]
22
        movaps  xmm7, [eax+112]
23
 
23
 
24
        paddsw  xmm0, [ebx]
24
        paddsw  xmm0, [ebx]
25
        movaps  [edi], xmm0
25
        movaps  [edi], xmm0
26
        paddsw  xmm1, [ebx+16]
26
        paddsw  xmm1, [ebx+16]
27
        movaps  [edi+16], xmm1
27
        movaps  [edi+16], xmm1
28
        paddsw  xmm2, [ebx+32]
28
        paddsw  xmm2, [ebx+32]
29
        movaps  [edi+32], xmm2
29
        movaps  [edi+32], xmm2
30
        paddsw  xmm3, [ebx+48]
30
        paddsw  xmm3, [ebx+48]
31
        movaps  [edi+48], xmm3
31
        movaps  [edi+48], xmm3
32
        paddsw  xmm4, [ebx+64]
32
        paddsw  xmm4, [ebx+64]
33
        movaps  [edi+64], xmm4
33
        movaps  [edi+64], xmm4
34
        paddsw  xmm5, [ebx+80]
34
        paddsw  xmm5, [ebx+80]
35
        movaps  [edi+80], xmm5
35
        movaps  [edi+80], xmm5
36
        paddsw  xmm6, [ebx+96]
36
        paddsw  xmm6, [ebx+96]
37
        movaps  [edi+96], xmm6
37
        movaps  [edi+96], xmm6
38
        paddsw  xmm7, [ebx+112]
38
        paddsw  xmm7, [ebx+112]
39
        movaps  [edi+112], xmm7
39
        movaps  [edi+112], xmm7
40
        ret
40
        ret
41
 
41
 
42
align 4
42
align 4
43
mmx128_mix_3:
43
mmx128_mix_3:
44
        prefetcht1 [eax+128]
44
        prefetcht1 [eax+128]
45
        prefetcht1 [ebx+128]
45
        prefetcht1 [ebx+128]
46
        prefetcht1 [ecx+128]
46
        prefetcht1 [ecx+128]
47
 
47
 
48
        movaps  xmm0, [eax]
48
        movaps  xmm0, [eax]
49
        movaps  xmm1, [eax+16]
49
        movaps  xmm1, [eax+16]
50
        movaps  xmm2, [eax+32]
50
        movaps  xmm2, [eax+32]
51
        movaps  xmm3, [eax+48]
51
        movaps  xmm3, [eax+48]
52
        movaps  xmm4, [eax+64]
52
        movaps  xmm4, [eax+64]
53
        movaps  xmm5, [eax+80]
53
        movaps  xmm5, [eax+80]
54
        movaps  xmm6, [eax+96]
54
        movaps  xmm6, [eax+96]
55
        movaps  xmm7, [eax+112]
55
        movaps  xmm7, [eax+112]
56
 
56
 
57
        paddsw  xmm0, [ebx]
57
        paddsw  xmm0, [ebx]
58
        paddsw  xmm1, [ebx+16]
58
        paddsw  xmm1, [ebx+16]
59
        paddsw  xmm2, [ebx+32]
59
        paddsw  xmm2, [ebx+32]
60
        paddsw  xmm3, [ebx+48]
60
        paddsw  xmm3, [ebx+48]
61
        paddsw  xmm4, [ebx+64]
61
        paddsw  xmm4, [ebx+64]
62
        paddsw  xmm5, [ebx+80]
62
        paddsw  xmm5, [ebx+80]
63
        paddsw  xmm6, [ebx+96]
63
        paddsw  xmm6, [ebx+96]
64
        paddsw  xmm7, [ebx+112]
64
        paddsw  xmm7, [ebx+112]
65
 
65
 
66
        paddsw  xmm0, [ecx]
66
        paddsw  xmm0, [ecx]
67
        movaps  [edi], xmm0
67
        movaps  [edi], xmm0
68
        paddsw  xmm1, [ecx+16]
68
        paddsw  xmm1, [ecx+16]
69
        movaps  [edi+16], xmm1
69
        movaps  [edi+16], xmm1
70
        paddsw  xmm2, [ecx+32]
70
        paddsw  xmm2, [ecx+32]
71
        movaps  [edi+32], xmm2
71
        movaps  [edi+32], xmm2
72
        paddsw  xmm3, [ecx+48]
72
        paddsw  xmm3, [ecx+48]
73
        movaps  [edi+48], xmm3
73
        movaps  [edi+48], xmm3
74
        paddsw  xmm4, [ecx+64]
74
        paddsw  xmm4, [ecx+64]
75
        movaps  [edi+64], xmm4
75
        movaps  [edi+64], xmm4
76
        paddsw  xmm5, [ecx+80]
76
        paddsw  xmm5, [ecx+80]
77
        movaps  [edi+80], xmm5
77
        movaps  [edi+80], xmm5
78
        paddsw  xmm6, [ecx+96]
78
        paddsw  xmm6, [ecx+96]
79
        movaps  [edi+96], xmm6
79
        movaps  [edi+96], xmm6
80
        paddsw  xmm7, [ecx+112]
80
        paddsw  xmm7, [ecx+112]
81
        movaps  [edi+112], xmm7
81
        movaps  [edi+112], xmm7
82
        ret
82
        ret
83
 
83
 
84
align 4
84
align 4
85
mmx128_mix_4:
85
mmx128_mix_4:
86
        prefetcht1 [eax+128]
86
        prefetcht1 [eax+128]
87
        prefetcht1 [ebx+128]
87
        prefetcht1 [ebx+128]
88
        prefetcht1 [ecx+128]
88
        prefetcht1 [ecx+128]
89
        prefetcht1 [edx+128]
89
        prefetcht1 [edx+128]
90
 
90
 
91
        movaps  xmm0, [eax]
91
        movaps  xmm0, [eax]
92
        movaps  xmm2, [eax+16]
92
        movaps  xmm2, [eax+16]
93
        movaps  xmm4, [eax+32]
93
        movaps  xmm4, [eax+32]
94
        movaps  xmm6, [eax+48]
94
        movaps  xmm6, [eax+48]
95
        movaps  xmm1, [ebx]
95
        movaps  xmm1, [ebx]
96
        movaps  xmm3, [ebx+16]
96
        movaps  xmm3, [ebx+16]
97
        movaps  xmm5, [ebx+32]
97
        movaps  xmm5, [ebx+32]
98
        movaps  xmm7, [ebx+48]
98
        movaps  xmm7, [ebx+48]
99
 
99
 
100
        paddsw  xmm0, [ecx]
100
        paddsw  xmm0, [ecx]
101
        paddsw  xmm2, [ecx+16]
101
        paddsw  xmm2, [ecx+16]
102
        paddsw  xmm4, [ecx+32]
102
        paddsw  xmm4, [ecx+32]
103
        paddsw  xmm6, [ecx+48]
103
        paddsw  xmm6, [ecx+48]
104
        paddsw  xmm1, [edx]
104
        paddsw  xmm1, [edx]
105
        paddsw  xmm3, [edx+16]
105
        paddsw  xmm3, [edx+16]
106
        paddsw  xmm5, [edx+32]
106
        paddsw  xmm5, [edx+32]
107
        paddsw  xmm7, [edx+48]
107
        paddsw  xmm7, [edx+48]
108
 
108
 
109
        paddsw  xmm0, xmm1
109
        paddsw  xmm0, xmm1
110
        movaps  [edi], xmm0
110
        movaps  [edi], xmm0
111
        paddsw  xmm2, xmm3
111
        paddsw  xmm2, xmm3
112
        movaps  [edi+16], xmm2
112
        movaps  [edi+16], xmm2
113
        paddsw  xmm4, xmm5
113
        paddsw  xmm4, xmm5
114
        movaps  [edi+32], xmm4
114
        movaps  [edi+32], xmm4
115
        paddsw  xmm6, xmm7
115
        paddsw  xmm6, xmm7
116
        movaps  [edi+48], xmm6
116
        movaps  [edi+48], xmm6
117
 
117
 
118
        movaps  xmm0, [eax+64]
118
        movaps  xmm0, [eax+64]
119
        movaps  xmm2, [eax+80]
119
        movaps  xmm2, [eax+80]
120
        movaps  xmm4, [eax+96]
120
        movaps  xmm4, [eax+96]
121
        movaps  xmm6, [eax+112]
121
        movaps  xmm6, [eax+112]
122
 
122
 
123
        movaps  xmm1, [ebx+64]
123
        movaps  xmm1, [ebx+64]
124
        movaps  xmm3, [ebx+80]
124
        movaps  xmm3, [ebx+80]
125
        movaps  xmm5, [ebx+96]
125
        movaps  xmm5, [ebx+96]
126
        movaps  xmm7, [ebx+112]
126
        movaps  xmm7, [ebx+112]
127
        paddsw  xmm0, [ecx+64]
127
        paddsw  xmm0, [ecx+64]
128
        paddsw  xmm2, [ecx+80]
128
        paddsw  xmm2, [ecx+80]
129
        paddsw  xmm4, [ecx+96]
129
        paddsw  xmm4, [ecx+96]
130
        paddsw  xmm6, [ecx+112]
130
        paddsw  xmm6, [ecx+112]
131
 
131
 
132
        paddsw  xmm1, [edx+64]
132
        paddsw  xmm1, [edx+64]
133
        paddsw  xmm3, [edx+80]
133
        paddsw  xmm3, [edx+80]
134
        paddsw  xmm5, [edx+96]
134
        paddsw  xmm5, [edx+96]
135
        paddsw  xmm7, [edx+112]
135
        paddsw  xmm7, [edx+112]
136
        paddsw  xmm0, xmm1
136
        paddsw  xmm0, xmm1
137
        movaps  [edi+64], xmm0
137
        movaps  [edi+64], xmm0
138
        paddsw  xmm2, xmm3
138
        paddsw  xmm2, xmm3
139
        movaps  [edi+80], xmm2
139
        movaps  [edi+80], xmm2
140
        paddsw  xmm4, xmm5
140
        paddsw  xmm4, xmm5
141
        movaps  [edi+96], xmm4
141
        movaps  [edi+96], xmm4
142
        paddsw  xmm6, xmm7
142
        paddsw  xmm6, xmm7
143
        movaps  [edi+112], xmm6
143
        movaps  [edi+112], xmm6
144
        ret
144
        ret
145
end if
145
end if