Subversion Repositories Kolibri OS

Rev

Rev 2434 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
431 serge 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
2465 Serge 3
;; Copyright (C) KolibriOS team 2004-2011. All rights reserved. ;;
431 serge 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 serge 7
 
8
if used mmx128_mix_2
9
 
10
align 4
11
mmx128_mix_2:
2434 Serge 12
        prefetcht1 [eax+128]
13
        prefetcht1 [ebx+128]
293 serge 14
 
2434 Serge 15
        movaps  xmm0, [eax]
16
        movaps  xmm1, [eax+16]
17
        movaps  xmm2, [eax+32]
18
        movaps  xmm3, [eax+48]
19
        movaps  xmm4, [eax+64]
20
        movaps  xmm5, [eax+80]
21
        movaps  xmm6, [eax+96]
22
        movaps  xmm7, [eax+112]
293 serge 23
 
2434 Serge 24
        paddsw  xmm0, [ebx]
25
        movaps  [edi], xmm0
26
        paddsw  xmm1, [ebx+16]
27
        movaps  [edi+16], xmm1
28
        paddsw  xmm2, [ebx+32]
29
        movaps  [edi+32], xmm2
30
        paddsw  xmm3, [ebx+48]
31
        movaps  [edi+48], xmm3
32
        paddsw  xmm4, [ebx+64]
33
        movaps  [edi+64], xmm4
34
        paddsw  xmm5, [ebx+80]
35
        movaps  [edi+80], xmm5
36
        paddsw  xmm6, [ebx+96]
37
        movaps  [edi+96], xmm6
38
        paddsw  xmm7, [ebx+112]
39
        movaps  [edi+112], xmm7
40
        ret
293 serge 41
 
42
align 4
43
mmx128_mix_3:
2434 Serge 44
        prefetcht1 [eax+128]
45
        prefetcht1 [ebx+128]
46
        prefetcht1 [ecx+128]
293 serge 47
 
2434 Serge 48
        movaps  xmm0, [eax]
49
        movaps  xmm1, [eax+16]
50
        movaps  xmm2, [eax+32]
51
        movaps  xmm3, [eax+48]
52
        movaps  xmm4, [eax+64]
53
        movaps  xmm5, [eax+80]
54
        movaps  xmm6, [eax+96]
55
        movaps  xmm7, [eax+112]
293 serge 56
 
2434 Serge 57
        paddsw  xmm0, [ebx]
58
        paddsw  xmm1, [ebx+16]
59
        paddsw  xmm2, [ebx+32]
60
        paddsw  xmm3, [ebx+48]
61
        paddsw  xmm4, [ebx+64]
62
        paddsw  xmm5, [ebx+80]
63
        paddsw  xmm6, [ebx+96]
64
        paddsw  xmm7, [ebx+112]
293 serge 65
 
2434 Serge 66
        paddsw  xmm0, [ecx]
67
        movaps  [edi], xmm0
68
        paddsw  xmm1, [ecx+16]
69
        movaps  [edi+16], xmm1
70
        paddsw  xmm2, [ecx+32]
71
        movaps  [edi+32], xmm2
72
        paddsw  xmm3, [ecx+48]
73
        movaps  [edi+48], xmm3
74
        paddsw  xmm4, [ecx+64]
75
        movaps  [edi+64], xmm4
76
        paddsw  xmm5, [ecx+80]
77
        movaps  [edi+80], xmm5
78
        paddsw  xmm6, [ecx+96]
79
        movaps  [edi+96], xmm6
80
        paddsw  xmm7, [ecx+112]
81
        movaps  [edi+112], xmm7
82
        ret
293 serge 83
 
84
align 4
85
mmx128_mix_4:
2434 Serge 86
        prefetcht1 [eax+128]
87
        prefetcht1 [ebx+128]
88
        prefetcht1 [ecx+128]
89
        prefetcht1 [edx+128]
293 serge 90
 
2434 Serge 91
        movaps  xmm0, [eax]
92
        movaps  xmm2, [eax+16]
93
        movaps  xmm4, [eax+32]
94
        movaps  xmm6, [eax+48]
95
        movaps  xmm1, [ebx]
96
        movaps  xmm3, [ebx+16]
97
        movaps  xmm5, [ebx+32]
98
        movaps  xmm7, [ebx+48]
293 serge 99
 
2434 Serge 100
        paddsw  xmm0, [ecx]
101
        paddsw  xmm2, [ecx+16]
102
        paddsw  xmm4, [ecx+32]
103
        paddsw  xmm6, [ecx+48]
104
        paddsw  xmm1, [edx]
105
        paddsw  xmm3, [edx+16]
106
        paddsw  xmm5, [edx+32]
107
        paddsw  xmm7, [edx+48]
293 serge 108
 
2434 Serge 109
        paddsw  xmm0, xmm1
110
        movaps  [edi], xmm0
111
        paddsw  xmm2, xmm3
112
        movaps  [edi+16], xmm2
113
        paddsw  xmm4, xmm5
114
        movaps  [edi+32], xmm4
115
        paddsw  xmm6, xmm7
116
        movaps  [edi+48], xmm6
293 serge 117
 
2434 Serge 118
        movaps  xmm0, [eax+64]
119
        movaps  xmm2, [eax+80]
120
        movaps  xmm4, [eax+96]
121
        movaps  xmm6, [eax+112]
293 serge 122
 
2434 Serge 123
        movaps  xmm1, [ebx+64]
124
        movaps  xmm3, [ebx+80]
125
        movaps  xmm5, [ebx+96]
126
        movaps  xmm7, [ebx+112]
127
        paddsw  xmm0, [ecx+64]
128
        paddsw  xmm2, [ecx+80]
129
        paddsw  xmm4, [ecx+96]
130
        paddsw  xmm6, [ecx+112]
293 serge 131
 
2434 Serge 132
        paddsw  xmm1, [edx+64]
133
        paddsw  xmm3, [edx+80]
134
        paddsw  xmm5, [edx+96]
135
        paddsw  xmm7, [edx+112]
136
        paddsw  xmm0, xmm1
137
        movaps  [edi+64], xmm0
138
        paddsw  xmm2, xmm3
139
        movaps  [edi+80], xmm2
140
        paddsw  xmm4, xmm5
141
        movaps  [edi+96], xmm4
142
        paddsw  xmm6, xmm7
143
        movaps  [edi+112], xmm6
144
        ret
293 serge 145
end if