Subversion Repositories Kolibri OS

Rev

Rev 9715 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
10051 ace_dent 3
;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
 
9
init_fpu:
10
        clts
11
        fninit
12
 
7199 dunkaist 13
        bt      [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
7276 dunkaist 14
        jnc     .no_xsave
7124 dunkaist 15
 
16
        mov     ecx, cr4
17
        or      ecx, CR4_OSXSAVE
18
        mov     cr4, ecx
7276 dunkaist 19
        ; don't call cpuid again
20
        bts     [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7124 dunkaist 21
 
7276 dunkaist 22
        ; zero xsave header
23
        mov     ecx, 64/4
24
        xor     eax, eax
25
        mov     edi, fpu_data + 512     ; skip legacy region
26
        rep stosd
27
 
28
        mov     eax, 0x0d       ; extended state enumeration main leaf
7124 dunkaist 29
        xor     ecx, ecx
30
        cpuid
7276 dunkaist 31
        and     eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
32
        xor     edx, edx
33
        mov     [xsave_eax], eax
34
        mov     [xsave_edx], edx
7124 dunkaist 35
        xor     ecx, ecx
36
        xsetbv
37
 
38
        mov     eax, 0x0d
39
        xor     ecx, ecx
40
        cpuid
7276 dunkaist 41
        add     ebx, 63
42
        and     ebx, NOT 63
7124 dunkaist 43
        mov     [xsave_area_size], ebx
7165 clevermous 44
        cmp     ebx, fpu_data_size
45
        ja      $
7124 dunkaist 46
 
47
        test    eax, XCR0_AVX512
48
        jz      @f
49
        call    init_avx512
7276 dunkaist 50
        mov     eax, [xsave_eax]
51
        mov     edx, [xsave_edx]
7165 clevermous 52
        xsave   [fpu_data]
7124 dunkaist 53
        ret
54
@@:
55
        test    eax, XCR0_AVX
56
        jz      @f
57
        call    init_avx
7276 dunkaist 58
        mov     eax, [xsave_eax]
59
        mov     edx, [xsave_edx]
7165 clevermous 60
        xsave   [fpu_data]
7124 dunkaist 61
        ret
62
@@:
63
        test    eax, XCR0_SSE
7276 dunkaist 64
        jz      $
65
        call    init_sse
66
        mov     eax, [xsave_eax]
67
        mov     edx, [xsave_edx]
68
        xsave   [fpu_data]
69
        ret
7124 dunkaist 70
.no_xsave:
71
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
2288 clevermous 72
        bt      [cpu_caps], CAPS_SSE
7164 clevermous 73
        jnc     .fpu_mmx
74
.sse:
7124 dunkaist 75
        call    init_sse
76
        fxsave  [fpu_data]
77
        ret
7164 clevermous 78
.fpu_mmx:
7124 dunkaist 79
        call    init_fpu_mmx
80
        fnsave  [fpu_data]
81
        ret
2288 clevermous 82
 
7124 dunkaist 83
init_fpu_mmx:
84
        mov     ecx, cr0
85
        and     ecx, not CR0_EM
86
        or      ecx, CR0_MP + CR0_NE
87
        mov     cr0, ecx
88
        ret
89
 
90
init_sse:
2288 clevermous 91
        mov     ebx, cr4
92
        mov     ecx, cr0
7276 dunkaist 93
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
2288 clevermous 94
        mov     cr4, ebx
95
 
7124 dunkaist 96
        and     ecx, not (CR0_EM + CR0_MP)
2288 clevermous 97
        or      ecx, CR0_NE
98
        mov     cr0, ecx
99
 
7124 dunkaist 100
        mov     dword [esp-4], MXCSR_INIT
2288 clevermous 101
        ldmxcsr [esp-4]
102
 
103
        xorps   xmm0, xmm0
104
        xorps   xmm1, xmm1
105
        xorps   xmm2, xmm2
106
        xorps   xmm3, xmm3
107
        xorps   xmm4, xmm4
108
        xorps   xmm5, xmm5
109
        xorps   xmm6, xmm6
110
        xorps   xmm7, xmm7
111
        ret
7124 dunkaist 112
 
113
init_avx:
114
        mov     ebx, cr4
115
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
116
        mov     cr4, ebx
117
 
2288 clevermous 118
        mov     ecx, cr0
7124 dunkaist 119
        and     ecx, not (CR0_EM + CR0_MP)
120
        or      ecx, CR0_NE
2288 clevermous 121
        mov     cr0, ecx
7124 dunkaist 122
 
123
        mov     dword [esp-4], MXCSR_INIT
124
        vldmxcsr [esp-4]
125
 
126
        vzeroall
2288 clevermous 127
        ret
128
 
7124 dunkaist 129
init_avx512:
130
        mov     ebx, cr4
131
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
132
        mov     cr4, ebx
133
 
134
        mov     ecx, cr0
135
        and     ecx, not (CR0_EM + CR0_MP)
136
        or      ecx, CR0_NE
137
        mov     cr0, ecx
138
 
139
        mov     dword [esp-4], MXCSR_INIT
140
        vldmxcsr [esp-4]
141
 
142
        vpxorq  zmm0, zmm0, zmm0
143
        vpxorq  zmm1, zmm1, zmm1
144
        vpxorq  zmm2, zmm2, zmm2
145
        vpxorq  zmm3, zmm3, zmm3
146
        vpxorq  zmm4, zmm4, zmm4
147
        vpxorq  zmm5, zmm5, zmm5
148
        vpxorq  zmm6, zmm6, zmm6
149
        vpxorq  zmm7, zmm7, zmm7
150
 
151
        ret
152
 
2288 clevermous 153
; param
7168 clevermous 154
;  eax= 512 bytes memory area aligned on a 16-byte boundary
2288 clevermous 155
 
156
align 4
157
fpu_save:
158
        push    ecx
159
        push    esi
160
        push    edi
161
 
162
        pushfd
163
        cli
164
 
165
        clts
166
        mov     edi, eax
167
 
168
        mov     ecx, [fpu_owner]
8869 rgimad 169
        mov     esi, [current_slot_idx]
2288 clevermous 170
        cmp     ecx, esi
171
        jne     .save
172
 
7168 clevermous 173
        call    save_fpu_context
2288 clevermous 174
        jmp     .exit
175
.save:
176
        mov     [fpu_owner], esi
177
 
9715 Doczom 178
        shl     ecx, BSF sizeof.APPDATA
179
        mov     eax, [SLOT_BASE + ecx + APPDATA.fpu_state]
2288 clevermous 180
 
181
        call    save_context
182
 
7168 clevermous 183
; first 512 bytes of XSAVE area have the same format as FXSAVE
9715 Doczom 184
        shl     esi, BSF sizeof.APPDATA
185
        mov     esi, [SLOT_BASE + esi + APPDATA.fpu_state]
2288 clevermous 186
        mov     ecx, 512/4
187
        cld
188
        rep movsd
189
        fninit
190
.exit:
191
        popfd
192
        pop     edi
193
        pop     esi
194
        pop     ecx
195
        ret
196
 
7168 clevermous 197
avx_save_size:
198
        mov     eax, [xsave_area_size]
199
        ret
200
 
201
; param
202
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
203
 
7276 dunkaist 204
align 4
7168 clevermous 205
avx_save:
206
        push    ecx
207
        push    esi
208
        push    edi
209
 
210
        pushfd
211
        cli
212
 
213
        clts
214
        mov     edi, eax
215
 
216
        mov     ecx, [fpu_owner]
8869 rgimad 217
        mov     esi, [current_slot_idx]
7168 clevermous 218
        cmp     ecx, esi
219
        jne     .save
220
 
221
        call    save_context
222
        jmp     .exit
223
.save:
224
        mov     [fpu_owner], esi
225
 
9715 Doczom 226
        shl     ecx, BSF sizeof.APPDATA
227
        mov     eax, [SLOT_BASE + ecx + APPDATA.fpu_state]
7168 clevermous 228
 
229
        call    save_context
230
 
9715 Doczom 231
        shl     esi, BSF sizeof.APPDATA
232
        mov     esi, [SLOT_BASE + esi + APPDATA.fpu_state]
7168 clevermous 233
        mov     ecx, [xsave_area_size]
234
        add     ecx, 3
235
        shr     ecx, 2
236
        rep movsd
237
        fninit
238
.exit:
239
        popfd
240
        pop     edi
241
        pop     esi
242
        pop     ecx
243
        ret
244
 
2288 clevermous 245
align 4
246
save_context:
9715 Doczom 247
        bt      [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7168 clevermous 248
        jnc     save_fpu_context
7276 dunkaist 249
        push    eax edx
250
        mov     ecx, eax
251
        mov     eax, [xsave_eax]
252
        mov     edx, [xsave_edx]
253
        xsave   [ecx]
254
        pop     edx eax
7124 dunkaist 255
        ret
7168 clevermous 256
save_fpu_context:
2288 clevermous 257
        bt      [cpu_caps], CAPS_SSE
258
        jnc     .no_SSE
259
        fxsave  [eax]
260
        ret
261
.no_SSE:
262
        fnsave  [eax]
263
        ret
264
 
7168 clevermous 265
 
2288 clevermous 266
align 4
267
fpu_restore:
268
        push    ecx
269
        push    esi
270
 
271
        mov     esi, eax
272
 
273
        pushfd
274
        cli
275
 
276
        mov     ecx, [fpu_owner]
8869 rgimad 277
        mov     eax, [current_slot_idx]
2288 clevermous 278
        cmp     ecx, eax
279
        jne     .copy
280
 
281
        clts
7168 clevermous 282
        bt      [cpu_caps], CAPS_SSE
283
        jnc     .no_SSE
284
 
285
        fxrstor [esi]
286
        popfd
287
        pop     esi
288
        pop     ecx
289
        ret
290
.no_SSE:
291
        fnclex                  ;fix possible problems
292
        frstor  [esi]
293
        popfd
294
        pop     esi
295
        pop     ecx
296
        ret
297
.copy:
9715 Doczom 298
        shl     eax, BSF sizeof.APPDATA
299
        mov     edi, [SLOT_BASE + eax + APPDATA.fpu_state]
7168 clevermous 300
        mov     ecx, 512/4
301
        cld
302
        rep movsd
303
        popfd
304
        pop     esi
305
        pop     ecx
306
        ret
307
 
7276 dunkaist 308
align 4
7168 clevermous 309
avx_restore:
310
        push    ecx
311
        push    esi
312
 
313
        mov     esi, eax
314
 
315
        pushfd
316
        cli
317
 
318
        mov     ecx, [fpu_owner]
8869 rgimad 319
        mov     eax, [current_slot_idx]
7168 clevermous 320
        cmp     ecx, eax
321
        jne     .copy
322
 
323
        clts
9715 Doczom 324
        bt      [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7124 dunkaist 325
        jnc     .no_xsave
7276 dunkaist 326
        push    edx
327
        mov     eax, [xsave_eax]
328
        mov     edx, [xsave_edx]
7124 dunkaist 329
        xrstor  [esi]
7276 dunkaist 330
        pop     edx
7124 dunkaist 331
        popfd
332
        pop     esi
333
        pop     ecx
334
        ret
335
.no_xsave:
2288 clevermous 336
        bt      [cpu_caps], CAPS_SSE
337
        jnc     .no_SSE
338
 
339
        fxrstor [esi]
340
        popfd
341
        pop     esi
342
        pop     ecx
343
        ret
344
.no_SSE:
345
        fnclex                  ;fix possible problems
346
        frstor  [esi]
347
        popfd
348
        pop     esi
349
        pop     ecx
350
        ret
351
.copy:
9715 Doczom 352
        shl     eax, BSF sizeof.APPDATA
353
        mov     edi, [SLOT_BASE + eax + APPDATA.fpu_state]
7168 clevermous 354
        mov     ecx, [xsave_area_size]
355
        add     ecx, 3
356
        shr     ecx, 2
2288 clevermous 357
        cld
358
        rep movsd
359
        popfd
360
        pop     esi
361
        pop     ecx
362
        ret
363
 
364
align 4
365
except_7:                  ;#NM exception handler
366
        save_ring3_context
367
        clts
368
        mov     ax, app_data;
369
        mov     ds, ax
370
        mov     es, ax
371
 
372
        mov     ebx, [fpu_owner]
8869 rgimad 373
        cmp     ebx, [current_slot_idx]
2288 clevermous 374
        je      .exit
375
 
9715 Doczom 376
        shl     ebx, BSF sizeof.APPDATA
377
        mov     eax, [SLOT_BASE + ebx + APPDATA.fpu_state]
378
        bt      [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7168 clevermous 379
        jnc     .no_xsave
7276 dunkaist 380
        mov     ecx, eax
381
        mov     eax, [xsave_eax]
382
        mov     edx, [xsave_edx]
383
        xsave   [ecx]
8869 rgimad 384
        mov     ebx, [current_slot_idx]
7168 clevermous 385
        mov     [fpu_owner], ebx
9715 Doczom 386
        shl     ebx, BSF sizeof.APPDATA
387
        mov     ecx, [SLOT_BASE + ebx + APPDATA.fpu_state]
7276 dunkaist 388
        xrstor  [ecx]
7168 clevermous 389
.exit:
390
        restore_ring3_context
391
        iret
392
.no_xsave:
2288 clevermous 393
        bt      [cpu_caps], CAPS_SSE
394
        jnc     .no_SSE
395
 
396
        fxsave  [eax]
8869 rgimad 397
        mov     ebx, [current_slot_idx]
2288 clevermous 398
        mov     [fpu_owner], ebx
9715 Doczom 399
        shl     ebx, BSF sizeof.APPDATA
400
        mov     eax, [SLOT_BASE + ebx + APPDATA.fpu_state]
2288 clevermous 401
        fxrstor [eax]
402
        restore_ring3_context
403
        iret
404
 
405
.no_SSE:
406
        fnsave  [eax]
8869 rgimad 407
        mov     ebx, [current_slot_idx]
2288 clevermous 408
        mov     [fpu_owner], ebx
9715 Doczom 409
        shl     ebx, BSF sizeof.APPDATA
410
        mov     eax, [SLOT_BASE + ebx + APPDATA.fpu_state]
2288 clevermous 411
        frstor  [eax]
412
        restore_ring3_context
413
        iret
414
 
415
iglobal
3534 clevermous 416
  fpu_owner dd 2
2288 clevermous 417
endg