Rev 7165 | Rev 7199 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2288 | clevermous | 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
||
7124 | dunkaist | 3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
2288 | clevermous | 4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
||
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
||
7 | |||
8 | $Revision: 7168 $ |
||
9 | |||
10 | |||
11 | init_fpu: |
||
12 | clts |
||
13 | fninit |
||
14 | |||
7168 | clevermous | 15 | bt [cpu_caps+(CAPS_XSAVE/32)], CAPS_XSAVE mod 32 |
7124 | dunkaist | 16 | jnc .no_xsave |
17 | |||
18 | mov ecx, cr4 |
||
19 | or ecx, CR4_OSXSAVE |
||
20 | mov cr4, ecx |
||
21 | |||
22 | mov eax, 0x0d |
||
23 | xor ecx, ecx |
||
24 | cpuid |
||
25 | mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
||
26 | and ebx, eax |
||
27 | xor ecx, ecx |
||
28 | xgetbv |
||
29 | or eax, ebx |
||
30 | xor ecx, ecx |
||
31 | xsetbv |
||
32 | |||
33 | mov eax, 0x0d |
||
34 | xor ecx, ecx |
||
35 | cpuid |
||
36 | mov [xsave_area_size], ebx |
||
7165 | clevermous | 37 | cmp ebx, fpu_data_size |
38 | ja $ |
||
7124 | dunkaist | 39 | |
40 | test eax, XCR0_AVX512 |
||
41 | jz @f |
||
42 | call init_avx512 |
||
7165 | clevermous | 43 | xsave [fpu_data] |
7124 | dunkaist | 44 | ret |
45 | @@: |
||
46 | test eax, XCR0_AVX |
||
47 | jz @f |
||
48 | call init_avx |
||
7165 | clevermous | 49 | xsave [fpu_data] |
7124 | dunkaist | 50 | ret |
51 | @@: |
||
52 | test eax, XCR0_SSE |
||
7164 | clevermous | 53 | jnz .sse |
54 | jmp .fpu_mmx |
||
7124 | dunkaist | 55 | .no_xsave: |
56 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
||
2288 | clevermous | 57 | bt [cpu_caps], CAPS_SSE |
7164 | clevermous | 58 | jnc .fpu_mmx |
59 | .sse: |
||
7124 | dunkaist | 60 | call init_sse |
61 | fxsave [fpu_data] |
||
62 | ret |
||
7164 | clevermous | 63 | .fpu_mmx: |
7124 | dunkaist | 64 | call init_fpu_mmx |
65 | fnsave [fpu_data] |
||
66 | ret |
||
2288 | clevermous | 67 | |
7124 | dunkaist | 68 | init_fpu_mmx: |
69 | mov ecx, cr0 |
||
70 | and ecx, not CR0_EM |
||
71 | or ecx, CR0_MP + CR0_NE |
||
72 | mov cr0, ecx |
||
73 | ret |
||
74 | |||
75 | init_sse: |
||
2288 | clevermous | 76 | mov ebx, cr4 |
77 | mov ecx, cr0 |
||
78 | or ebx, CR4_OSFXSR+CR4_OSXMMEXPT |
||
79 | mov cr4, ebx |
||
80 | |||
7124 | dunkaist | 81 | and ecx, not (CR0_EM + CR0_MP) |
2288 | clevermous | 82 | or ecx, CR0_NE |
83 | mov cr0, ecx |
||
84 | |||
7124 | dunkaist | 85 | mov dword [esp-4], MXCSR_INIT |
2288 | clevermous | 86 | ldmxcsr [esp-4] |
87 | |||
88 | xorps xmm0, xmm0 |
||
89 | xorps xmm1, xmm1 |
||
90 | xorps xmm2, xmm2 |
||
91 | xorps xmm3, xmm3 |
||
92 | xorps xmm4, xmm4 |
||
93 | xorps xmm5, xmm5 |
||
94 | xorps xmm6, xmm6 |
||
95 | xorps xmm7, xmm7 |
||
96 | ret |
||
7124 | dunkaist | 97 | |
98 | init_avx: |
||
99 | mov ebx, cr4 |
||
100 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
101 | mov cr4, ebx |
||
102 | |||
2288 | clevermous | 103 | mov ecx, cr0 |
7124 | dunkaist | 104 | and ecx, not (CR0_EM + CR0_MP) |
105 | or ecx, CR0_NE |
||
2288 | clevermous | 106 | mov cr0, ecx |
7124 | dunkaist | 107 | |
108 | mov dword [esp-4], MXCSR_INIT |
||
109 | vldmxcsr [esp-4] |
||
110 | |||
111 | vzeroall |
||
2288 | clevermous | 112 | ret |
113 | |||
7124 | dunkaist | 114 | init_avx512: |
115 | mov ebx, cr4 |
||
116 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
117 | mov cr4, ebx |
||
118 | |||
119 | mov ecx, cr0 |
||
120 | and ecx, not (CR0_EM + CR0_MP) |
||
121 | or ecx, CR0_NE |
||
122 | mov cr0, ecx |
||
123 | |||
124 | mov dword [esp-4], MXCSR_INIT |
||
125 | vldmxcsr [esp-4] |
||
126 | |||
127 | vpxorq zmm0, zmm0, zmm0 |
||
128 | vpxorq zmm1, zmm1, zmm1 |
||
129 | vpxorq zmm2, zmm2, zmm2 |
||
130 | vpxorq zmm3, zmm3, zmm3 |
||
131 | vpxorq zmm4, zmm4, zmm4 |
||
132 | vpxorq zmm5, zmm5, zmm5 |
||
133 | vpxorq zmm6, zmm6, zmm6 |
||
134 | vpxorq zmm7, zmm7, zmm7 |
||
135 | |||
136 | ret |
||
137 | |||
2288 | clevermous | 138 | ; param |
7168 | clevermous | 139 | ; eax= 512 bytes memory area aligned on a 16-byte boundary |
2288 | clevermous | 140 | |
141 | align 4 |
||
142 | fpu_save: |
||
143 | push ecx |
||
144 | push esi |
||
145 | push edi |
||
146 | |||
147 | pushfd |
||
148 | cli |
||
149 | |||
150 | clts |
||
151 | mov edi, eax |
||
152 | |||
153 | mov ecx, [fpu_owner] |
||
154 | mov esi, [CURRENT_TASK] |
||
155 | cmp ecx, esi |
||
156 | jne .save |
||
157 | |||
7168 | clevermous | 158 | call save_fpu_context |
2288 | clevermous | 159 | jmp .exit |
160 | .save: |
||
161 | mov [fpu_owner], esi |
||
162 | |||
163 | shl ecx, 8 |
||
164 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
||
165 | |||
166 | call save_context |
||
167 | |||
7168 | clevermous | 168 | ; first 512 bytes of XSAVE area have the same format as FXSAVE |
2288 | clevermous | 169 | shl esi, 8 |
170 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
||
171 | mov ecx, 512/4 |
||
172 | cld |
||
173 | rep movsd |
||
174 | fninit |
||
175 | .exit: |
||
176 | popfd |
||
177 | pop edi |
||
178 | pop esi |
||
179 | pop ecx |
||
180 | ret |
||
181 | |||
7168 | clevermous | 182 | avx_save_size: |
183 | mov eax, [xsave_area_size] |
||
184 | ret |
||
185 | |||
186 | ; param |
||
187 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
||
188 | |||
189 | avx_save: |
||
190 | push ecx |
||
191 | push esi |
||
192 | push edi |
||
193 | |||
194 | pushfd |
||
195 | cli |
||
196 | |||
197 | clts |
||
198 | mov edi, eax |
||
199 | |||
200 | mov ecx, [fpu_owner] |
||
201 | mov esi, [CURRENT_TASK] |
||
202 | cmp ecx, esi |
||
203 | jne .save |
||
204 | |||
205 | call save_context |
||
206 | jmp .exit |
||
207 | .save: |
||
208 | mov [fpu_owner], esi |
||
209 | |||
210 | shl ecx, 8 |
||
211 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
||
212 | |||
213 | call save_context |
||
214 | |||
215 | shl esi, 8 |
||
216 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
||
217 | mov ecx, [xsave_area_size] |
||
218 | add ecx, 3 |
||
219 | shr ecx, 2 |
||
220 | rep movsd |
||
221 | fninit |
||
222 | .exit: |
||
223 | popfd |
||
224 | pop edi |
||
225 | pop esi |
||
226 | pop ecx |
||
227 | ret |
||
228 | |||
2288 | clevermous | 229 | align 4 |
230 | save_context: |
||
7168 | clevermous | 231 | bt [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32 |
232 | jnc save_fpu_context |
||
7124 | dunkaist | 233 | xsave [eax] |
234 | ret |
||
7168 | clevermous | 235 | save_fpu_context: |
2288 | clevermous | 236 | bt [cpu_caps], CAPS_SSE |
237 | jnc .no_SSE |
||
238 | fxsave [eax] |
||
239 | ret |
||
240 | .no_SSE: |
||
241 | fnsave [eax] |
||
242 | ret |
||
243 | |||
7168 | clevermous | 244 | |
2288 | clevermous | 245 | align 4 |
246 | fpu_restore: |
||
247 | push ecx |
||
248 | push esi |
||
249 | |||
250 | mov esi, eax |
||
251 | |||
252 | pushfd |
||
253 | cli |
||
254 | |||
255 | mov ecx, [fpu_owner] |
||
256 | mov eax, [CURRENT_TASK] |
||
257 | cmp ecx, eax |
||
258 | jne .copy |
||
259 | |||
260 | clts |
||
7168 | clevermous | 261 | bt [cpu_caps], CAPS_SSE |
262 | jnc .no_SSE |
||
263 | |||
264 | fxrstor [esi] |
||
265 | popfd |
||
266 | pop esi |
||
267 | pop ecx |
||
268 | ret |
||
269 | .no_SSE: |
||
270 | fnclex ;fix possible problems |
||
271 | frstor [esi] |
||
272 | popfd |
||
273 | pop esi |
||
274 | pop ecx |
||
275 | ret |
||
276 | .copy: |
||
277 | shl eax, 8 |
||
278 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
||
279 | mov ecx, 512/4 |
||
280 | cld |
||
281 | rep movsd |
||
282 | popfd |
||
283 | pop esi |
||
284 | pop ecx |
||
285 | ret |
||
286 | |||
287 | avx_restore: |
||
288 | push ecx |
||
289 | push esi |
||
290 | |||
291 | mov esi, eax |
||
292 | |||
293 | pushfd |
||
294 | cli |
||
295 | |||
296 | mov ecx, [fpu_owner] |
||
297 | mov eax, [CURRENT_TASK] |
||
298 | cmp ecx, eax |
||
299 | jne .copy |
||
300 | |||
301 | clts |
||
302 | bt [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32 |
||
7124 | dunkaist | 303 | jnc .no_xsave |
304 | xrstor [esi] |
||
305 | popfd |
||
306 | pop esi |
||
307 | pop ecx |
||
308 | ret |
||
309 | .no_xsave: |
||
2288 | clevermous | 310 | bt [cpu_caps], CAPS_SSE |
311 | jnc .no_SSE |
||
312 | |||
313 | fxrstor [esi] |
||
314 | popfd |
||
315 | pop esi |
||
316 | pop ecx |
||
317 | ret |
||
318 | .no_SSE: |
||
319 | fnclex ;fix possible problems |
||
320 | frstor [esi] |
||
321 | popfd |
||
322 | pop esi |
||
323 | pop ecx |
||
324 | ret |
||
325 | .copy: |
||
326 | shl eax, 8 |
||
327 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
||
7168 | clevermous | 328 | mov ecx, [xsave_area_size] |
329 | add ecx, 3 |
||
330 | shr ecx, 2 |
||
2288 | clevermous | 331 | cld |
332 | rep movsd |
||
333 | popfd |
||
334 | pop esi |
||
335 | pop ecx |
||
336 | ret |
||
337 | |||
338 | align 4 |
||
339 | except_7: ;#NM exception handler |
||
340 | save_ring3_context |
||
341 | clts |
||
342 | mov ax, app_data; |
||
343 | mov ds, ax |
||
344 | mov es, ax |
||
345 | |||
346 | mov ebx, [fpu_owner] |
||
347 | cmp ebx, [CURRENT_TASK] |
||
348 | je .exit |
||
349 | |||
350 | shl ebx, 8 |
||
351 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
7168 | clevermous | 352 | bt [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32 |
353 | jnc .no_xsave |
||
354 | xsave [eax] |
||
355 | mov ebx, [CURRENT_TASK] |
||
356 | mov [fpu_owner], ebx |
||
357 | shl ebx, 8 |
||
358 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
359 | xrstor [eax] |
||
360 | .exit: |
||
361 | restore_ring3_context |
||
362 | iret |
||
363 | .no_xsave: |
||
2288 | clevermous | 364 | bt [cpu_caps], CAPS_SSE |
365 | jnc .no_SSE |
||
366 | |||
367 | fxsave [eax] |
||
368 | mov ebx, [CURRENT_TASK] |
||
369 | mov [fpu_owner], ebx |
||
370 | shl ebx, 8 |
||
371 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
372 | fxrstor [eax] |
||
373 | restore_ring3_context |
||
374 | iret |
||
375 | |||
376 | .no_SSE: |
||
377 | fnsave [eax] |
||
378 | mov ebx, [CURRENT_TASK] |
||
379 | mov [fpu_owner], ebx |
||
380 | shl ebx, 8 |
||
381 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
382 | frstor [eax] |
||
383 | restore_ring3_context |
||
384 | iret |
||
385 | |||
386 | iglobal |
||
3534 | clevermous | 387 | fpu_owner dd 2 |
2288 | clevermous | 388 | endg |