Rev 7199 | Rev 8869 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 7199 | Rev 7276 | ||
---|---|---|---|
Line 3... | Line 3... | ||
3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
5 | ;; ;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
Line 7... | Line 7... | ||
7 | 7 | ||
Line 8... | Line 8... | ||
8 | $Revision: 7199 $ |
8 | $Revision: 7276 $ |
9 | 9 | ||
10 | 10 | ||
Line 11... | Line 11... | ||
11 | init_fpu: |
11 | init_fpu: |
12 | clts |
12 | clts |
Line 13... | Line 13... | ||
13 | fninit |
13 | fninit |
14 | 14 | ||
15 | bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
15 | bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
- | 16 | jnc .no_xsave |
|
- | 17 | ||
Line -... | Line 18... | ||
- | 18 | mov ecx, cr4 |
|
16 | jmp .no_xsave ; not ready to be jnc so far |
19 | or ecx, CR4_OSXSAVE |
- | 20 | mov cr4, ecx |
|
- | 21 | ; don't call cpuid again |
|
- | 22 | bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
|
- | 23 | ||
- | 24 | ; zero xsave header |
|
17 | 25 | mov ecx, 64/4 |
|
18 | mov ecx, cr4 |
26 | xor eax, eax |
19 | or ecx, CR4_OSXSAVE |
27 | mov edi, fpu_data + 512 ; skip legacy region |
20 | mov cr4, ecx |
28 | rep stosd |
21 | 29 | ||
22 | mov eax, 0x0d |
- | |
23 | xor ecx, ecx |
30 | mov eax, 0x0d ; extended state enumeration main leaf |
24 | cpuid |
31 | xor ecx, ecx |
25 | mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
32 | cpuid |
Line 26... | Line 33... | ||
26 | and ebx, eax |
33 | and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
27 | xor ecx, ecx |
34 | xor edx, edx |
28 | xgetbv |
35 | mov [xsave_eax], eax |
- | 36 | mov [xsave_edx], edx |
|
- | 37 | xor ecx, ecx |
|
29 | or eax, ebx |
38 | xsetbv |
30 | xor ecx, ecx |
39 | |
31 | xsetbv |
40 | mov eax, 0x0d |
Line 32... | Line 41... | ||
32 | 41 | xor ecx, ecx |
|
33 | mov eax, 0x0d |
42 | cpuid |
34 | xor ecx, ecx |
43 | add ebx, 63 |
- | 44 | and ebx, NOT 63 |
|
- | 45 | mov [xsave_area_size], ebx |
|
35 | cpuid |
46 | cmp ebx, fpu_data_size |
36 | mov [xsave_area_size], ebx |
47 | ja $ |
37 | cmp ebx, fpu_data_size |
48 | |
38 | ja $ |
49 | test eax, XCR0_AVX512 |
39 | 50 | jz @f |
|
40 | test eax, XCR0_AVX512 |
51 | call init_avx512 |
- | 52 | mov eax, [xsave_eax] |
|
- | 53 | mov edx, [xsave_edx] |
|
41 | jz @f |
54 | xsave [fpu_data] |
42 | call init_avx512 |
55 | ret |
43 | xsave [fpu_data] |
56 | @@: |
44 | ret |
57 | test eax, XCR0_AVX |
45 | @@: |
58 | jz @f |
- | 59 | call init_avx |
|
- | 60 | mov eax, [xsave_eax] |
|
- | 61 | mov edx, [xsave_edx] |
|
46 | test eax, XCR0_AVX |
62 | xsave [fpu_data] |
- | 63 | ret |
|
47 | jz @f |
64 | @@: |
48 | call init_avx |
65 | test eax, XCR0_SSE |
49 | xsave [fpu_data] |
66 | jz $ |
50 | ret |
67 | call init_sse |
51 | @@: |
68 | mov eax, [xsave_eax] |
Line 184... | Line 201... | ||
184 | ret |
201 | ret |
Line 185... | Line 202... | ||
185 | 202 | ||
186 | ; param |
203 | ; param |
Line -... | Line 204... | ||
- | 204 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
|
187 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
205 | |
188 | 206 | align 4 |
|
189 | avx_save: |
207 | avx_save: |
190 | push ecx |
208 | push ecx |
Line 228... | Line 246... | ||
228 | 246 | ||
229 | align 4 |
247 | align 4 |
230 | save_context: |
248 | save_context: |
231 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
249 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
- | 250 | jnc save_fpu_context |
|
- | 251 | push eax edx |
|
- | 252 | mov ecx, eax |
|
- | 253 | mov eax, [xsave_eax] |
|
232 | jnc save_fpu_context |
254 | mov edx, [xsave_edx] |
- | 255 | xsave [ecx] |
|
233 | xsave [eax] |
256 | pop edx eax |
234 | ret |
257 | ret |
235 | save_fpu_context: |
258 | save_fpu_context: |
236 | bt [cpu_caps], CAPS_SSE |
259 | bt [cpu_caps], CAPS_SSE |
237 | jnc .no_SSE |
260 | jnc .no_SSE |
Line 282... | Line 305... | ||
282 | popfd |
305 | popfd |
283 | pop esi |
306 | pop esi |
284 | pop ecx |
307 | pop ecx |
285 | ret |
308 | ret |
Line -... | Line 309... | ||
- | 309 | ||
286 | 310 | align 4 |
|
287 | avx_restore: |
311 | avx_restore: |
288 | push ecx |
312 | push ecx |
Line 289... | Line 313... | ||
289 | push esi |
313 | push esi |
Line 299... | Line 323... | ||
299 | jne .copy |
323 | jne .copy |
Line 300... | Line 324... | ||
300 | 324 | ||
301 | clts |
325 | clts |
302 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
326 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
- | 327 | jnc .no_xsave |
|
- | 328 | push edx |
|
- | 329 | mov eax, [xsave_eax] |
|
303 | jnc .no_xsave |
330 | mov edx, [xsave_edx] |
- | 331 | xrstor [esi] |
|
304 | xrstor [esi] |
332 | pop edx |
305 | popfd |
333 | popfd |
306 | pop esi |
334 | pop esi |
307 | pop ecx |
335 | pop ecx |
308 | ret |
336 | ret |
Line 349... | Line 377... | ||
349 | 377 | ||
350 | shl ebx, 8 |
378 | shl ebx, 8 |
351 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
379 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
352 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
380 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
- | 381 | jnc .no_xsave |
|
- | 382 | mov ecx, eax |
|
- | 383 | mov eax, [xsave_eax] |
|
353 | jnc .no_xsave |
384 | mov edx, [xsave_edx] |
354 | xsave [eax] |
385 | xsave [ecx] |
355 | mov ebx, [CURRENT_TASK] |
386 | mov ebx, [CURRENT_TASK] |
356 | mov [fpu_owner], ebx |
387 | mov [fpu_owner], ebx |
357 | shl ebx, 8 |
388 | shl ebx, 8 |
358 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
389 | mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state] |
359 | xrstor [eax] |
390 | xrstor [ecx] |
360 | .exit: |
391 | .exit: |
361 | restore_ring3_context |
392 | restore_ring3_context |
362 | iret |
393 | iret |
363 | .no_xsave: |
394 | .no_xsave: |