Rev 5363 | Rev 7164 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5363 | Rev 7124 | ||
---|---|---|---|
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
2 | ;; ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2015. All rights reserved. ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
5 | ;; ;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
7 | 7 | ||
8 | $Revision: 5363 $ |
8 | $Revision: 7124 $ |
9 | 9 | ||
10 | 10 | ||
11 | init_fpu: |
11 | init_fpu: |
12 | clts |
12 | clts |
13 | fninit |
13 | fninit |
- | 14 | ||
- | 15 | bt [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8 |
|
- | 16 | jnc .no_xsave |
|
- | 17 | ||
- | 18 | mov ecx, cr4 |
|
- | 19 | or ecx, CR4_OSXSAVE |
|
- | 20 | mov cr4, ecx |
|
- | 21 | ||
- | 22 | mov eax, 0x0d |
|
- | 23 | xor ecx, ecx |
|
- | 24 | cpuid |
|
- | 25 | mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
|
- | 26 | and ebx, eax |
|
- | 27 | xor ecx, ecx |
|
- | 28 | xgetbv |
|
- | 29 | or eax, ebx |
|
- | 30 | xor ecx, ecx |
|
- | 31 | xsetbv |
|
- | 32 | ||
- | 33 | mov eax, 0x0d |
|
- | 34 | xor ecx, ecx |
|
- | 35 | cpuid |
|
- | 36 | mov [xsave_area_size], ebx |
|
- | 37 | ||
- | 38 | test eax, XCR0_AVX512 |
|
- | 39 | jz @f |
|
- | 40 | call init_avx512 |
|
- | 41 | ret |
|
- | 42 | @@: |
|
- | 43 | test eax, XCR0_AVX |
|
- | 44 | jz @f |
|
- | 45 | call init_avx |
|
- | 46 | ret |
|
- | 47 | @@: |
|
- | 48 | test eax, XCR0_SSE |
|
- | 49 | jz @f |
|
- | 50 | call init_sse |
|
- | 51 | ret |
|
- | 52 | @@: |
|
- | 53 | call init_fpu_mmx |
|
- | 54 | ret |
|
- | 55 | .no_xsave: |
|
14 | 56 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
|
15 | bt [cpu_caps], CAPS_SSE |
57 | bt [cpu_caps], CAPS_SSE |
- | 58 | jnc @f |
|
- | 59 | call init_sse |
|
- | 60 | fxsave [fpu_data] |
|
- | 61 | ret |
|
- | 62 | @@: |
|
- | 63 | call init_fpu_mmx |
|
- | 64 | fnsave [fpu_data] |
|
- | 65 | ret |
|
- | 66 | ||
- | 67 | init_fpu_mmx: |
|
- | 68 | mov ecx, cr0 |
|
- | 69 | and ecx, not CR0_EM |
|
- | 70 | or ecx, CR0_MP + CR0_NE |
|
- | 71 | mov cr0, ecx |
|
- | 72 | ret |
|
16 | jnc .no_SSE |
73 | |
17 | 74 | init_sse: |
|
18 | mov ebx, cr4 |
75 | mov ebx, cr4 |
19 | mov ecx, cr0 |
76 | mov ecx, cr0 |
20 | or ebx, CR4_OSFXSR+CR4_OSXMMEXPT |
77 | or ebx, CR4_OSFXSR+CR4_OSXMMEXPT |
21 | mov cr4, ebx |
78 | mov cr4, ebx |
22 | 79 | ||
23 | and ecx, not (CR0_MP+CR0_EM) |
80 | and ecx, not (CR0_EM + CR0_MP) |
24 | or ecx, CR0_NE |
81 | or ecx, CR0_NE |
25 | mov cr0, ecx |
82 | mov cr0, ecx |
26 | 83 | ||
27 | mov dword [esp-4], SSE_INIT |
84 | mov dword [esp-4], MXCSR_INIT |
28 | ldmxcsr [esp-4] |
85 | ldmxcsr [esp-4] |
29 | 86 | ||
30 | xorps xmm0, xmm0 |
87 | xorps xmm0, xmm0 |
31 | xorps xmm1, xmm1 |
88 | xorps xmm1, xmm1 |
32 | xorps xmm2, xmm2 |
89 | xorps xmm2, xmm2 |
33 | xorps xmm3, xmm3 |
90 | xorps xmm3, xmm3 |
34 | xorps xmm4, xmm4 |
91 | xorps xmm4, xmm4 |
35 | xorps xmm5, xmm5 |
92 | xorps xmm5, xmm5 |
36 | xorps xmm6, xmm6 |
93 | xorps xmm6, xmm6 |
37 | xorps xmm7, xmm7 |
94 | xorps xmm7, xmm7 |
38 | fxsave [fpu_data] ;[eax] |
- | |
39 | ret |
95 | ret |
- | 96 | ||
40 | .no_SSE: |
97 | init_avx: |
- | 98 | mov ebx, cr4 |
|
- | 99 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
|
- | 100 | mov cr4, ebx |
|
- | 101 | ||
41 | mov ecx, cr0 |
102 | mov ecx, cr0 |
42 | and ecx, not CR0_EM |
103 | and ecx, not (CR0_EM + CR0_MP) |
43 | or ecx, CR0_MP+CR0_NE |
104 | or ecx, CR0_NE |
44 | mov cr0, ecx |
105 | mov cr0, ecx |
- | 106 | ||
- | 107 | mov dword [esp-4], MXCSR_INIT |
|
- | 108 | vldmxcsr [esp-4] |
|
- | 109 | ||
- | 110 | vzeroall |
|
- | 111 | ret |
|
- | 112 | ||
- | 113 | init_avx512: |
|
- | 114 | mov ebx, cr4 |
|
- | 115 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
|
- | 116 | mov cr4, ebx |
|
- | 117 | ||
45 | fnsave [fpu_data] |
118 | mov ecx, cr0 |
- | 119 | and ecx, not (CR0_EM + CR0_MP) |
|
- | 120 | or ecx, CR0_NE |
|
- | 121 | mov cr0, ecx |
|
- | 122 | ||
- | 123 | mov dword [esp-4], MXCSR_INIT |
|
- | 124 | vldmxcsr [esp-4] |
|
- | 125 | ||
- | 126 | vpxorq zmm0, zmm0, zmm0 |
|
- | 127 | vpxorq zmm1, zmm1, zmm1 |
|
- | 128 | vpxorq zmm2, zmm2, zmm2 |
|
- | 129 | vpxorq zmm3, zmm3, zmm3 |
|
- | 130 | vpxorq zmm4, zmm4, zmm4 |
|
- | 131 | vpxorq zmm5, zmm5, zmm5 |
|
- | 132 | vpxorq zmm6, zmm6, zmm6 |
|
- | 133 | vpxorq zmm7, zmm7, zmm7 |
|
- | 134 | ||
46 | ret |
135 | ret |
47 | 136 | ||
48 | ; param |
137 | ; param |
49 | ; eax= 512 bytes memory area |
138 | ; eax= 512 bytes memory area |
50 | 139 | ||
51 | align 4 |
140 | align 4 |
52 | fpu_save: |
141 | fpu_save: |
53 | push ecx |
142 | push ecx |
54 | push esi |
143 | push esi |
55 | push edi |
144 | push edi |
56 | 145 | ||
57 | pushfd |
146 | pushfd |
58 | cli |
147 | cli |
59 | 148 | ||
60 | clts |
149 | clts |
61 | mov edi, eax |
150 | mov edi, eax |
62 | 151 | ||
63 | mov ecx, [fpu_owner] |
152 | mov ecx, [fpu_owner] |
64 | mov esi, [CURRENT_TASK] |
153 | mov esi, [CURRENT_TASK] |
65 | cmp ecx, esi |
154 | cmp ecx, esi |
66 | jne .save |
155 | jne .save |
67 | 156 | ||
68 | call save_context |
157 | call save_context |
69 | jmp .exit |
158 | jmp .exit |
70 | .save: |
159 | .save: |
71 | mov [fpu_owner], esi |
160 | mov [fpu_owner], esi |
72 | 161 | ||
73 | shl ecx, 8 |
162 | shl ecx, 8 |
74 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
163 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
75 | 164 | ||
76 | call save_context |
165 | call save_context |
77 | 166 | ||
78 | shl esi, 8 |
167 | shl esi, 8 |
79 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
168 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
80 | mov ecx, 512/4 |
169 | mov ecx, 512/4 |
81 | cld |
170 | cld |
82 | rep movsd |
171 | rep movsd |
83 | fninit |
172 | fninit |
84 | .exit: |
173 | .exit: |
85 | popfd |
174 | popfd |
86 | pop edi |
175 | pop edi |
87 | pop esi |
176 | pop esi |
88 | pop ecx |
177 | pop ecx |
89 | ret |
178 | ret |
90 | 179 | ||
91 | align 4 |
180 | align 4 |
92 | save_context: |
181 | save_context: |
- | 182 | bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8 |
|
- | 183 | jnc .no_xsave |
|
- | 184 | xsave [eax] |
|
- | 185 | ret |
|
- | 186 | .no_xsave: |
|
93 | bt [cpu_caps], CAPS_SSE |
187 | bt [cpu_caps], CAPS_SSE |
94 | jnc .no_SSE |
188 | jnc .no_SSE |
95 | 189 | ||
96 | fxsave [eax] |
190 | fxsave [eax] |
97 | ret |
191 | ret |
98 | .no_SSE: |
192 | .no_SSE: |
99 | fnsave [eax] |
193 | fnsave [eax] |
100 | ret |
194 | ret |
101 | 195 | ||
102 | align 4 |
196 | align 4 |
103 | fpu_restore: |
197 | fpu_restore: |
104 | push ecx |
198 | push ecx |
105 | push esi |
199 | push esi |
106 | 200 | ||
107 | mov esi, eax |
201 | mov esi, eax |
108 | 202 | ||
109 | pushfd |
203 | pushfd |
110 | cli |
204 | cli |
111 | 205 | ||
112 | mov ecx, [fpu_owner] |
206 | mov ecx, [fpu_owner] |
113 | mov eax, [CURRENT_TASK] |
207 | mov eax, [CURRENT_TASK] |
114 | cmp ecx, eax |
208 | cmp ecx, eax |
115 | jne .copy |
209 | jne .copy |
116 | 210 | ||
117 | clts |
211 | clts |
- | 212 | bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8 |
|
- | 213 | jnc .no_xsave |
|
- | 214 | xrstor [esi] |
|
- | 215 | popfd |
|
- | 216 | pop esi |
|
- | 217 | pop ecx |
|
- | 218 | ret |
|
- | 219 | .no_xsave: |
|
118 | bt [cpu_caps], CAPS_SSE |
220 | bt [cpu_caps], CAPS_SSE |
119 | jnc .no_SSE |
221 | jnc .no_SSE |
120 | 222 | ||
121 | fxrstor [esi] |
223 | fxrstor [esi] |
122 | popfd |
224 | popfd |
123 | pop esi |
225 | pop esi |
124 | pop ecx |
226 | pop ecx |
125 | ret |
227 | ret |
126 | .no_SSE: |
228 | .no_SSE: |
127 | fnclex ;fix possible problems |
229 | fnclex ;fix possible problems |
128 | frstor [esi] |
230 | frstor [esi] |
129 | popfd |
231 | popfd |
130 | pop esi |
232 | pop esi |
131 | pop ecx |
233 | pop ecx |
132 | ret |
234 | ret |
133 | .copy: |
235 | .copy: |
134 | shl eax, 8 |
236 | shl eax, 8 |
135 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
237 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
136 | mov ecx, 512/4 |
238 | mov ecx, 512/4 |
137 | cld |
239 | cld |
138 | rep movsd |
240 | rep movsd |
139 | popfd |
241 | popfd |
140 | pop esi |
242 | pop esi |
141 | pop ecx |
243 | pop ecx |
142 | ret |
244 | ret |
143 | 245 | ||
144 | align 4 |
246 | align 4 |
145 | except_7: ;#NM exception handler |
247 | except_7: ;#NM exception handler |
146 | save_ring3_context |
248 | save_ring3_context |
147 | clts |
249 | clts |
148 | mov ax, app_data; |
250 | mov ax, app_data; |
149 | mov ds, ax |
251 | mov ds, ax |
150 | mov es, ax |
252 | mov es, ax |
151 | 253 | ||
152 | mov ebx, [fpu_owner] |
254 | mov ebx, [fpu_owner] |
153 | cmp ebx, [CURRENT_TASK] |
255 | cmp ebx, [CURRENT_TASK] |
154 | je .exit |
256 | je .exit |
155 | 257 | ||
156 | shl ebx, 8 |
258 | shl ebx, 8 |
157 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
259 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
158 | bt [cpu_caps], CAPS_SSE |
260 | bt [cpu_caps], CAPS_SSE |
159 | jnc .no_SSE |
261 | jnc .no_SSE |
160 | 262 | ||
161 | fxsave [eax] |
263 | fxsave [eax] |
162 | mov ebx, [CURRENT_TASK] |
264 | mov ebx, [CURRENT_TASK] |
163 | mov [fpu_owner], ebx |
265 | mov [fpu_owner], ebx |
164 | shl ebx, 8 |
266 | shl ebx, 8 |
165 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
267 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
166 | fxrstor [eax] |
268 | fxrstor [eax] |
167 | .exit: |
269 | .exit: |
168 | restore_ring3_context |
270 | restore_ring3_context |
169 | iret |
271 | iret |
170 | 272 | ||
171 | .no_SSE: |
273 | .no_SSE: |
172 | fnsave [eax] |
274 | fnsave [eax] |
173 | mov ebx, [CURRENT_TASK] |
275 | mov ebx, [CURRENT_TASK] |
174 | mov [fpu_owner], ebx |
276 | mov [fpu_owner], ebx |
175 | shl ebx, 8 |
277 | shl ebx, 8 |
176 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
278 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
177 | frstor [eax] |
279 | frstor [eax] |
178 | restore_ring3_context |
280 | restore_ring3_context |
179 | iret |
281 | iret |
180 | 282 | ||
181 | iglobal |
283 | iglobal |
182 | fpu_owner dd 2 |
284 | fpu_owner dd 2 |
183 | endg |
285 | endg |