13,35 → 13,26 |
fninit |
|
bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
jnc .no_xsave |
jmp .no_xsave ; not ready to be jnc so far |
|
mov ecx, cr4 |
or ecx, CR4_OSXSAVE |
mov cr4, ecx |
; don't call cpuid again |
bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
|
; zero xsave header |
mov ecx, 64/4 |
xor eax, eax |
mov edi, fpu_data + 512 ; skip legacy region |
rep stosd |
|
mov eax, 0x0d ; extended state enumeration main leaf |
mov eax, 0x0d |
xor ecx, ecx |
cpuid |
and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
xor edx, edx |
mov [xsave_eax], eax |
mov [xsave_edx], edx |
mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
and ebx, eax |
xor ecx, ecx |
xgetbv |
or eax, ebx |
xor ecx, ecx |
xsetbv |
|
mov eax, 0x0d |
xor ecx, ecx |
cpuid |
add ebx, 63 |
and ebx, NOT 63 |
mov [xsave_area_size], ebx |
cmp ebx, fpu_data_size |
ja $ |
49,8 → 40,6 |
test eax, XCR0_AVX512 |
jz @f |
call init_avx512 |
mov eax, [xsave_eax] |
mov edx, [xsave_edx] |
xsave [fpu_data] |
ret |
@@: |
57,18 → 46,12 |
test eax, XCR0_AVX |
jz @f |
call init_avx |
mov eax, [xsave_eax] |
mov edx, [xsave_edx] |
xsave [fpu_data] |
ret |
@@: |
test eax, XCR0_SSE |
jz $ |
call init_sse |
mov eax, [xsave_eax] |
mov edx, [xsave_edx] |
xsave [fpu_data] |
ret |
jnz .sse |
jmp .fpu_mmx |
.no_xsave: |
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
bt [cpu_caps], CAPS_SSE |
203,7 → 186,6 |
; param |
; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
|
align 4 |
avx_save: |
push ecx |
push esi |
248,12 → 230,7 |
save_context: |
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
jnc save_fpu_context |
push eax edx |
mov ecx, eax |
mov eax, [xsave_eax] |
mov edx, [xsave_edx] |
xsave [ecx] |
pop edx eax |
xsave [eax] |
ret |
save_fpu_context: |
bt [cpu_caps], CAPS_SSE |
307,7 → 284,6 |
pop ecx |
ret |
|
align 4 |
avx_restore: |
push ecx |
push esi |
325,11 → 301,7 |
clts |
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
jnc .no_xsave |
push edx |
mov eax, [xsave_eax] |
mov edx, [xsave_edx] |
xrstor [esi] |
pop edx |
popfd |
pop esi |
pop ecx |
379,15 → 351,12 |
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
jnc .no_xsave |
mov ecx, eax |
mov eax, [xsave_eax] |
mov edx, [xsave_edx] |
xsave [ecx] |
xsave [eax] |
mov ebx, [CURRENT_TASK] |
mov [fpu_owner], ebx |
shl ebx, 8 |
mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state] |
xrstor [ecx] |
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
xrstor [eax] |
.exit: |
restore_ring3_context |
iret |