Subversion Repositories Kolibri OS

Rev

Rev 7199 | Rev 8869 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7199 Rev 7276
Line 3... Line 3...
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
4
;; Distributed under terms of the GNU General Public License    ;;
4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Line 7... Line 7...
7
 
7
 
Line 8... Line 8...
8
$Revision: 7199 $
8
$Revision: 7276 $
9
 
9
 
10
 
10
 
Line 11... Line 11...
11
init_fpu:
11
init_fpu:
12
        clts
12
        clts
Line 13... Line 13...
13
        fninit
13
        fninit
14
 
14
 
15
        bt      [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
15
        bt      [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
-
 
16
        jnc     .no_xsave
-
 
17
 
Line -... Line 18...
-
 
18
        mov     ecx, cr4
16
        jmp     .no_xsave       ; not ready to be jnc so far
19
        or      ecx, CR4_OSXSAVE
-
 
20
        mov     cr4, ecx
-
 
21
        ; don't call cpuid again
-
 
22
        bts     [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
-
 
23
 
-
 
24
        ; zero xsave header
17
 
25
        mov     ecx, 64/4
18
        mov     ecx, cr4
26
        xor     eax, eax
19
        or      ecx, CR4_OSXSAVE
27
        mov     edi, fpu_data + 512     ; skip legacy region
20
        mov     cr4, ecx
28
        rep stosd
21
 
29
 
22
        mov     eax, 0x0d
-
 
23
        xor     ecx, ecx
30
        mov     eax, 0x0d       ; extended state enumeration main leaf
24
        cpuid
31
        xor     ecx, ecx
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
32
        cpuid
Line 26... Line 33...
26
        and     ebx, eax
33
        and     eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
27
        xor     ecx, ecx
34
        xor     edx, edx
28
        xgetbv
35
        mov     [xsave_eax], eax
-
 
36
        mov     [xsave_edx], edx
-
 
37
        xor     ecx, ecx
29
        or      eax, ebx
38
        xsetbv
30
        xor     ecx, ecx
39
 
31
        xsetbv
40
        mov     eax, 0x0d
Line 32... Line 41...
32
 
41
        xor     ecx, ecx
33
        mov     eax, 0x0d
42
        cpuid
34
        xor     ecx, ecx
43
        add     ebx, 63
-
 
44
        and     ebx, NOT 63
-
 
45
        mov     [xsave_area_size], ebx
35
        cpuid
46
        cmp     ebx, fpu_data_size
36
        mov     [xsave_area_size], ebx
47
        ja      $
37
        cmp     ebx, fpu_data_size
48
 
38
        ja      $
49
        test    eax, XCR0_AVX512
39
 
50
        jz      @f
40
        test    eax, XCR0_AVX512
51
        call    init_avx512
-
 
52
        mov     eax, [xsave_eax]
-
 
53
        mov     edx, [xsave_edx]
41
        jz      @f
54
        xsave   [fpu_data]
42
        call    init_avx512
55
        ret
43
        xsave   [fpu_data]
56
@@:
44
        ret
57
        test    eax, XCR0_AVX
45
@@:
58
        jz      @f
-
 
59
        call    init_avx
-
 
60
        mov     eax, [xsave_eax]
-
 
61
        mov     edx, [xsave_edx]
46
        test    eax, XCR0_AVX
62
        xsave   [fpu_data]
-
 
63
        ret
47
        jz      @f
64
@@:
48
        call    init_avx
65
        test    eax, XCR0_SSE
49
        xsave   [fpu_data]
66
        jz      $
50
        ret
67
        call    init_sse
51
@@:
68
        mov     eax, [xsave_eax]
Line 184... Line 201...
184
        ret
201
        ret
Line 185... Line 202...
185
 
202
 
186
; param
203
; param
Line -... Line 204...
-
 
204
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
187
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
205
 
188
 
206
align 4
189
avx_save:
207
avx_save:
190
        push    ecx
208
        push    ecx
Line 228... Line 246...
228
 
246
 
229
align 4
247
align 4
230
save_context:
248
save_context:
231
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
249
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
-
 
250
        jnc     save_fpu_context
-
 
251
        push    eax edx
-
 
252
        mov     ecx, eax
-
 
253
        mov     eax, [xsave_eax]
232
        jnc     save_fpu_context
254
        mov     edx, [xsave_edx]
-
 
255
        xsave   [ecx]
233
        xsave   [eax]
256
        pop     edx eax
234
        ret
257
        ret
235
save_fpu_context:
258
save_fpu_context:
236
        bt      [cpu_caps], CAPS_SSE
259
        bt      [cpu_caps], CAPS_SSE
237
        jnc     .no_SSE
260
        jnc     .no_SSE
Line 282... Line 305...
282
        popfd
305
        popfd
283
        pop     esi
306
        pop     esi
284
        pop     ecx
307
        pop     ecx
285
        ret
308
        ret
Line -... Line 309...
-
 
309
 
286
 
310
align 4
287
avx_restore:
311
avx_restore:
288
        push    ecx
312
        push    ecx
Line 289... Line 313...
289
        push    esi
313
        push    esi
Line 299... Line 323...
299
        jne     .copy
323
        jne     .copy
Line 300... Line 324...
300
 
324
 
301
        clts
325
        clts
302
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
326
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
-
 
327
        jnc     .no_xsave
-
 
328
        push    edx
-
 
329
        mov     eax, [xsave_eax]
303
        jnc     .no_xsave
330
        mov     edx, [xsave_edx]
-
 
331
        xrstor  [esi]
304
        xrstor  [esi]
332
        pop     edx
305
        popfd
333
        popfd
306
        pop     esi
334
        pop     esi
307
        pop     ecx
335
        pop     ecx
308
        ret
336
        ret
Line 349... Line 377...
349
 
377
 
350
        shl     ebx, 8
378
        shl     ebx, 8
351
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
379
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
352
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
380
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
-
 
381
        jnc     .no_xsave
-
 
382
        mov     ecx, eax
-
 
383
        mov     eax, [xsave_eax]
353
        jnc     .no_xsave
384
        mov     edx, [xsave_edx]
354
        xsave   [eax]
385
        xsave   [ecx]
355
        mov     ebx, [CURRENT_TASK]
386
        mov     ebx, [CURRENT_TASK]
356
        mov     [fpu_owner], ebx
387
        mov     [fpu_owner], ebx
357
        shl     ebx, 8
388
        shl     ebx, 8
358
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
389
        mov     ecx, [ebx+SLOT_BASE+APPDATA.fpu_state]
359
        xrstor  [eax]
390
        xrstor  [ecx]
360
.exit:
391
.exit:
361
        restore_ring3_context
392
        restore_ring3_context
362
        iret
393
        iret
363
.no_xsave:
394
.no_xsave: