Rev 7199 | Rev 8869 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2288 | clevermous | 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
||
7124 | dunkaist | 3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
2288 | clevermous | 4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
||
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
||
7 | |||
8 | $Revision: 7276 $ |
||
9 | |||
10 | |||
11 | init_fpu: |
||
12 | clts |
||
13 | fninit |
||
14 | |||
7199 | dunkaist | 15 | bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
7276 | dunkaist | 16 | jnc .no_xsave |
7124 | dunkaist | 17 | |
18 | mov ecx, cr4 |
||
19 | or ecx, CR4_OSXSAVE |
||
20 | mov cr4, ecx |
||
7276 | dunkaist | 21 | ; don't call cpuid again |
22 | bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
||
7124 | dunkaist | 23 | |
7276 | dunkaist | 24 | ; zero xsave header |
25 | mov ecx, 64/4 |
||
26 | xor eax, eax |
||
27 | mov edi, fpu_data + 512 ; skip legacy region |
||
28 | rep stosd |
||
29 | |||
30 | mov eax, 0x0d ; extended state enumeration main leaf |
||
7124 | dunkaist | 31 | xor ecx, ecx |
32 | cpuid |
||
7276 | dunkaist | 33 | and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
34 | xor edx, edx |
||
35 | mov [xsave_eax], eax |
||
36 | mov [xsave_edx], edx |
||
7124 | dunkaist | 37 | xor ecx, ecx |
38 | xsetbv |
||
39 | |||
40 | mov eax, 0x0d |
||
41 | xor ecx, ecx |
||
42 | cpuid |
||
7276 | dunkaist | 43 | add ebx, 63 |
44 | and ebx, NOT 63 |
||
7124 | dunkaist | 45 | mov [xsave_area_size], ebx |
7165 | clevermous | 46 | cmp ebx, fpu_data_size |
47 | ja $ |
||
7124 | dunkaist | 48 | |
49 | test eax, XCR0_AVX512 |
||
50 | jz @f |
||
51 | call init_avx512 |
||
7276 | dunkaist | 52 | mov eax, [xsave_eax] |
53 | mov edx, [xsave_edx] |
||
7165 | clevermous | 54 | xsave [fpu_data] |
7124 | dunkaist | 55 | ret |
56 | @@: |
||
57 | test eax, XCR0_AVX |
||
58 | jz @f |
||
59 | call init_avx |
||
7276 | dunkaist | 60 | mov eax, [xsave_eax] |
61 | mov edx, [xsave_edx] |
||
7165 | clevermous | 62 | xsave [fpu_data] |
7124 | dunkaist | 63 | ret |
64 | @@: |
||
65 | test eax, XCR0_SSE |
||
7276 | dunkaist | 66 | jz $ |
67 | call init_sse |
||
68 | mov eax, [xsave_eax] |
||
69 | mov edx, [xsave_edx] |
||
70 | xsave [fpu_data] |
||
71 | ret |
||
7124 | dunkaist | 72 | .no_xsave: |
73 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
||
2288 | clevermous | 74 | bt [cpu_caps], CAPS_SSE |
7164 | clevermous | 75 | jnc .fpu_mmx |
76 | .sse: |
||
7124 | dunkaist | 77 | call init_sse |
78 | fxsave [fpu_data] |
||
79 | ret |
||
7164 | clevermous | 80 | .fpu_mmx: |
7124 | dunkaist | 81 | call init_fpu_mmx |
82 | fnsave [fpu_data] |
||
83 | ret |
||
2288 | clevermous | 84 | |
7124 | dunkaist | 85 | init_fpu_mmx: |
86 | mov ecx, cr0 |
||
87 | and ecx, not CR0_EM |
||
88 | or ecx, CR0_MP + CR0_NE |
||
89 | mov cr0, ecx |
||
90 | ret |
||
91 | |||
92 | init_sse: |
||
2288 | clevermous | 93 | mov ebx, cr4 |
94 | mov ecx, cr0 |
||
7276 | dunkaist | 95 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
2288 | clevermous | 96 | mov cr4, ebx |
97 | |||
7124 | dunkaist | 98 | and ecx, not (CR0_EM + CR0_MP) |
2288 | clevermous | 99 | or ecx, CR0_NE |
100 | mov cr0, ecx |
||
101 | |||
7124 | dunkaist | 102 | mov dword [esp-4], MXCSR_INIT |
2288 | clevermous | 103 | ldmxcsr [esp-4] |
104 | |||
105 | xorps xmm0, xmm0 |
||
106 | xorps xmm1, xmm1 |
||
107 | xorps xmm2, xmm2 |
||
108 | xorps xmm3, xmm3 |
||
109 | xorps xmm4, xmm4 |
||
110 | xorps xmm5, xmm5 |
||
111 | xorps xmm6, xmm6 |
||
112 | xorps xmm7, xmm7 |
||
113 | ret |
||
7124 | dunkaist | 114 | |
115 | init_avx: |
||
116 | mov ebx, cr4 |
||
117 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
118 | mov cr4, ebx |
||
119 | |||
2288 | clevermous | 120 | mov ecx, cr0 |
7124 | dunkaist | 121 | and ecx, not (CR0_EM + CR0_MP) |
122 | or ecx, CR0_NE |
||
2288 | clevermous | 123 | mov cr0, ecx |
7124 | dunkaist | 124 | |
125 | mov dword [esp-4], MXCSR_INIT |
||
126 | vldmxcsr [esp-4] |
||
127 | |||
128 | vzeroall |
||
2288 | clevermous | 129 | ret |
130 | |||
7124 | dunkaist | 131 | init_avx512: |
132 | mov ebx, cr4 |
||
133 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
134 | mov cr4, ebx |
||
135 | |||
136 | mov ecx, cr0 |
||
137 | and ecx, not (CR0_EM + CR0_MP) |
||
138 | or ecx, CR0_NE |
||
139 | mov cr0, ecx |
||
140 | |||
141 | mov dword [esp-4], MXCSR_INIT |
||
142 | vldmxcsr [esp-4] |
||
143 | |||
144 | vpxorq zmm0, zmm0, zmm0 |
||
145 | vpxorq zmm1, zmm1, zmm1 |
||
146 | vpxorq zmm2, zmm2, zmm2 |
||
147 | vpxorq zmm3, zmm3, zmm3 |
||
148 | vpxorq zmm4, zmm4, zmm4 |
||
149 | vpxorq zmm5, zmm5, zmm5 |
||
150 | vpxorq zmm6, zmm6, zmm6 |
||
151 | vpxorq zmm7, zmm7, zmm7 |
||
152 | |||
153 | ret |
||
154 | |||
2288 | clevermous | 155 | ; param |
7168 | clevermous | 156 | ; eax= 512 bytes memory area aligned on a 16-byte boundary |
2288 | clevermous | 157 | |
158 | align 4 |
||
159 | fpu_save: |
||
160 | push ecx |
||
161 | push esi |
||
162 | push edi |
||
163 | |||
164 | pushfd |
||
165 | cli |
||
166 | |||
167 | clts |
||
168 | mov edi, eax |
||
169 | |||
170 | mov ecx, [fpu_owner] |
||
171 | mov esi, [CURRENT_TASK] |
||
172 | cmp ecx, esi |
||
173 | jne .save |
||
174 | |||
7168 | clevermous | 175 | call save_fpu_context |
2288 | clevermous | 176 | jmp .exit |
177 | .save: |
||
178 | mov [fpu_owner], esi |
||
179 | |||
180 | shl ecx, 8 |
||
181 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
||
182 | |||
183 | call save_context |
||
184 | |||
7168 | clevermous | 185 | ; first 512 bytes of XSAVE area have the same format as FXSAVE |
2288 | clevermous | 186 | shl esi, 8 |
187 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
||
188 | mov ecx, 512/4 |
||
189 | cld |
||
190 | rep movsd |
||
191 | fninit |
||
192 | .exit: |
||
193 | popfd |
||
194 | pop edi |
||
195 | pop esi |
||
196 | pop ecx |
||
197 | ret |
||
198 | |||
7168 | clevermous | 199 | avx_save_size: |
200 | mov eax, [xsave_area_size] |
||
201 | ret |
||
202 | |||
203 | ; param |
||
204 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
||
205 | |||
7276 | dunkaist | 206 | align 4 |
7168 | clevermous | 207 | avx_save: |
208 | push ecx |
||
209 | push esi |
||
210 | push edi |
||
211 | |||
212 | pushfd |
||
213 | cli |
||
214 | |||
215 | clts |
||
216 | mov edi, eax |
||
217 | |||
218 | mov ecx, [fpu_owner] |
||
219 | mov esi, [CURRENT_TASK] |
||
220 | cmp ecx, esi |
||
221 | jne .save |
||
222 | |||
223 | call save_context |
||
224 | jmp .exit |
||
225 | .save: |
||
226 | mov [fpu_owner], esi |
||
227 | |||
228 | shl ecx, 8 |
||
229 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
||
230 | |||
231 | call save_context |
||
232 | |||
233 | shl esi, 8 |
||
234 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
||
235 | mov ecx, [xsave_area_size] |
||
236 | add ecx, 3 |
||
237 | shr ecx, 2 |
||
238 | rep movsd |
||
239 | fninit |
||
240 | .exit: |
||
241 | popfd |
||
242 | pop edi |
||
243 | pop esi |
||
244 | pop ecx |
||
245 | ret |
||
246 | |||
2288 | clevermous | 247 | align 4 |
248 | save_context: |
||
7199 | dunkaist | 249 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
7168 | clevermous | 250 | jnc save_fpu_context |
7276 | dunkaist | 251 | push eax edx |
252 | mov ecx, eax |
||
253 | mov eax, [xsave_eax] |
||
254 | mov edx, [xsave_edx] |
||
255 | xsave [ecx] |
||
256 | pop edx eax |
||
7124 | dunkaist | 257 | ret |
7168 | clevermous | 258 | save_fpu_context: |
2288 | clevermous | 259 | bt [cpu_caps], CAPS_SSE |
260 | jnc .no_SSE |
||
261 | fxsave [eax] |
||
262 | ret |
||
263 | .no_SSE: |
||
264 | fnsave [eax] |
||
265 | ret |
||
266 | |||
7168 | clevermous | 267 | |
2288 | clevermous | 268 | align 4 |
269 | fpu_restore: |
||
270 | push ecx |
||
271 | push esi |
||
272 | |||
273 | mov esi, eax |
||
274 | |||
275 | pushfd |
||
276 | cli |
||
277 | |||
278 | mov ecx, [fpu_owner] |
||
279 | mov eax, [CURRENT_TASK] |
||
280 | cmp ecx, eax |
||
281 | jne .copy |
||
282 | |||
283 | clts |
||
7168 | clevermous | 284 | bt [cpu_caps], CAPS_SSE |
285 | jnc .no_SSE |
||
286 | |||
287 | fxrstor [esi] |
||
288 | popfd |
||
289 | pop esi |
||
290 | pop ecx |
||
291 | ret |
||
292 | .no_SSE: |
||
293 | fnclex ;fix possible problems |
||
294 | frstor [esi] |
||
295 | popfd |
||
296 | pop esi |
||
297 | pop ecx |
||
298 | ret |
||
299 | .copy: |
||
300 | shl eax, 8 |
||
301 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
||
302 | mov ecx, 512/4 |
||
303 | cld |
||
304 | rep movsd |
||
305 | popfd |
||
306 | pop esi |
||
307 | pop ecx |
||
308 | ret |
||
309 | |||
7276 | dunkaist | 310 | align 4 |
7168 | clevermous | 311 | avx_restore: |
312 | push ecx |
||
313 | push esi |
||
314 | |||
315 | mov esi, eax |
||
316 | |||
317 | pushfd |
||
318 | cli |
||
319 | |||
320 | mov ecx, [fpu_owner] |
||
321 | mov eax, [CURRENT_TASK] |
||
322 | cmp ecx, eax |
||
323 | jne .copy |
||
324 | |||
325 | clts |
||
7199 | dunkaist | 326 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
7124 | dunkaist | 327 | jnc .no_xsave |
7276 | dunkaist | 328 | push edx |
329 | mov eax, [xsave_eax] |
||
330 | mov edx, [xsave_edx] |
||
7124 | dunkaist | 331 | xrstor [esi] |
7276 | dunkaist | 332 | pop edx |
7124 | dunkaist | 333 | popfd |
334 | pop esi |
||
335 | pop ecx |
||
336 | ret |
||
337 | .no_xsave: |
||
2288 | clevermous | 338 | bt [cpu_caps], CAPS_SSE |
339 | jnc .no_SSE |
||
340 | |||
341 | fxrstor [esi] |
||
342 | popfd |
||
343 | pop esi |
||
344 | pop ecx |
||
345 | ret |
||
346 | .no_SSE: |
||
347 | fnclex ;fix possible problems |
||
348 | frstor [esi] |
||
349 | popfd |
||
350 | pop esi |
||
351 | pop ecx |
||
352 | ret |
||
353 | .copy: |
||
354 | shl eax, 8 |
||
355 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
||
7168 | clevermous | 356 | mov ecx, [xsave_area_size] |
357 | add ecx, 3 |
||
358 | shr ecx, 2 |
||
2288 | clevermous | 359 | cld |
360 | rep movsd |
||
361 | popfd |
||
362 | pop esi |
||
363 | pop ecx |
||
364 | ret |
||
365 | |||
366 | align 4 |
||
367 | except_7: ;#NM exception handler |
||
368 | save_ring3_context |
||
369 | clts |
||
370 | mov ax, app_data; |
||
371 | mov ds, ax |
||
372 | mov es, ax |
||
373 | |||
374 | mov ebx, [fpu_owner] |
||
375 | cmp ebx, [CURRENT_TASK] |
||
376 | je .exit |
||
377 | |||
378 | shl ebx, 8 |
||
379 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
7199 | dunkaist | 380 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
7168 | clevermous | 381 | jnc .no_xsave |
7276 | dunkaist | 382 | mov ecx, eax |
383 | mov eax, [xsave_eax] |
||
384 | mov edx, [xsave_edx] |
||
385 | xsave [ecx] |
||
7168 | clevermous | 386 | mov ebx, [CURRENT_TASK] |
387 | mov [fpu_owner], ebx |
||
388 | shl ebx, 8 |
||
7276 | dunkaist | 389 | mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state] |
390 | xrstor [ecx] |
||
7168 | clevermous | 391 | .exit: |
392 | restore_ring3_context |
||
393 | iret |
||
394 | .no_xsave: |
||
2288 | clevermous | 395 | bt [cpu_caps], CAPS_SSE |
396 | jnc .no_SSE |
||
397 | |||
398 | fxsave [eax] |
||
399 | mov ebx, [CURRENT_TASK] |
||
400 | mov [fpu_owner], ebx |
||
401 | shl ebx, 8 |
||
402 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
403 | fxrstor [eax] |
||
404 | restore_ring3_context |
||
405 | iret |
||
406 | |||
407 | .no_SSE: |
||
408 | fnsave [eax] |
||
409 | mov ebx, [CURRENT_TASK] |
||
410 | mov [fpu_owner], ebx |
||
411 | shl ebx, 8 |
||
412 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
413 | frstor [eax] |
||
414 | restore_ring3_context |
||
415 | iret |
||
416 | |||
417 | iglobal |
||
3534 | clevermous | 418 | fpu_owner dd 2 |
2288 | clevermous | 419 | endg |