Subversion Repositories Kolibri OS

Rev

Rev 7199 | Rev 8869 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
7124 dunkaist 3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
$Revision: 7276 $
9
 
10
 
11
init_fpu:
12
        clts
13
        fninit
14
 
7199 dunkaist 15
        bt      [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
7276 dunkaist 16
        jnc     .no_xsave
7124 dunkaist 17
 
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
7276 dunkaist 21
        ; don't call cpuid again
22
        bts     [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7124 dunkaist 23
 
7276 dunkaist 24
        ; zero xsave header
25
        mov     ecx, 64/4
26
        xor     eax, eax
27
        mov     edi, fpu_data + 512     ; skip legacy region
28
        rep stosd
29
 
30
        mov     eax, 0x0d       ; extended state enumeration main leaf
7124 dunkaist 31
        xor     ecx, ecx
32
        cpuid
7276 dunkaist 33
        and     eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
34
        xor     edx, edx
35
        mov     [xsave_eax], eax
36
        mov     [xsave_edx], edx
7124 dunkaist 37
        xor     ecx, ecx
38
        xsetbv
39
 
40
        mov     eax, 0x0d
41
        xor     ecx, ecx
42
        cpuid
7276 dunkaist 43
        add     ebx, 63
44
        and     ebx, NOT 63
7124 dunkaist 45
        mov     [xsave_area_size], ebx
7165 clevermous 46
        cmp     ebx, fpu_data_size
47
        ja      $
7124 dunkaist 48
 
49
        test    eax, XCR0_AVX512
50
        jz      @f
51
        call    init_avx512
7276 dunkaist 52
        mov     eax, [xsave_eax]
53
        mov     edx, [xsave_edx]
7165 clevermous 54
        xsave   [fpu_data]
7124 dunkaist 55
        ret
56
@@:
57
        test    eax, XCR0_AVX
58
        jz      @f
59
        call    init_avx
7276 dunkaist 60
        mov     eax, [xsave_eax]
61
        mov     edx, [xsave_edx]
7165 clevermous 62
        xsave   [fpu_data]
7124 dunkaist 63
        ret
64
@@:
65
        test    eax, XCR0_SSE
7276 dunkaist 66
        jz      $
67
        call    init_sse
68
        mov     eax, [xsave_eax]
69
        mov     edx, [xsave_edx]
70
        xsave   [fpu_data]
71
        ret
7124 dunkaist 72
.no_xsave:
73
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
2288 clevermous 74
        bt      [cpu_caps], CAPS_SSE
7164 clevermous 75
        jnc     .fpu_mmx
76
.sse:
7124 dunkaist 77
        call    init_sse
78
        fxsave  [fpu_data]
79
        ret
7164 clevermous 80
.fpu_mmx:
7124 dunkaist 81
        call    init_fpu_mmx
82
        fnsave  [fpu_data]
83
        ret
2288 clevermous 84
 
7124 dunkaist 85
init_fpu_mmx:
86
        mov     ecx, cr0
87
        and     ecx, not CR0_EM
88
        or      ecx, CR0_MP + CR0_NE
89
        mov     cr0, ecx
90
        ret
91
 
92
init_sse:
2288 clevermous 93
        mov     ebx, cr4
94
        mov     ecx, cr0
7276 dunkaist 95
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
2288 clevermous 96
        mov     cr4, ebx
97
 
7124 dunkaist 98
        and     ecx, not (CR0_EM + CR0_MP)
2288 clevermous 99
        or      ecx, CR0_NE
100
        mov     cr0, ecx
101
 
7124 dunkaist 102
        mov     dword [esp-4], MXCSR_INIT
2288 clevermous 103
        ldmxcsr [esp-4]
104
 
105
        xorps   xmm0, xmm0
106
        xorps   xmm1, xmm1
107
        xorps   xmm2, xmm2
108
        xorps   xmm3, xmm3
109
        xorps   xmm4, xmm4
110
        xorps   xmm5, xmm5
111
        xorps   xmm6, xmm6
112
        xorps   xmm7, xmm7
113
        ret
7124 dunkaist 114
 
115
init_avx:
116
        mov     ebx, cr4
117
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
118
        mov     cr4, ebx
119
 
2288 clevermous 120
        mov     ecx, cr0
7124 dunkaist 121
        and     ecx, not (CR0_EM + CR0_MP)
122
        or      ecx, CR0_NE
2288 clevermous 123
        mov     cr0, ecx
7124 dunkaist 124
 
125
        mov     dword [esp-4], MXCSR_INIT
126
        vldmxcsr [esp-4]
127
 
128
        vzeroall
2288 clevermous 129
        ret
130
 
7124 dunkaist 131
init_avx512:
132
        mov     ebx, cr4
133
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
134
        mov     cr4, ebx
135
 
136
        mov     ecx, cr0
137
        and     ecx, not (CR0_EM + CR0_MP)
138
        or      ecx, CR0_NE
139
        mov     cr0, ecx
140
 
141
        mov     dword [esp-4], MXCSR_INIT
142
        vldmxcsr [esp-4]
143
 
144
        vpxorq  zmm0, zmm0, zmm0
145
        vpxorq  zmm1, zmm1, zmm1
146
        vpxorq  zmm2, zmm2, zmm2
147
        vpxorq  zmm3, zmm3, zmm3
148
        vpxorq  zmm4, zmm4, zmm4
149
        vpxorq  zmm5, zmm5, zmm5
150
        vpxorq  zmm6, zmm6, zmm6
151
        vpxorq  zmm7, zmm7, zmm7
152
 
153
        ret
154
 
2288 clevermous 155
; param
7168 clevermous 156
;  eax= 512 bytes memory area aligned on a 16-byte boundary
2288 clevermous 157
 
158
align 4
159
fpu_save:
160
        push    ecx
161
        push    esi
162
        push    edi
163
 
164
        pushfd
165
        cli
166
 
167
        clts
168
        mov     edi, eax
169
 
170
        mov     ecx, [fpu_owner]
171
        mov     esi, [CURRENT_TASK]
172
        cmp     ecx, esi
173
        jne     .save
174
 
7168 clevermous 175
        call    save_fpu_context
2288 clevermous 176
        jmp     .exit
177
.save:
178
        mov     [fpu_owner], esi
179
 
180
        shl     ecx, 8
181
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
182
 
183
        call    save_context
184
 
7168 clevermous 185
; first 512 bytes of XSAVE area have the same format as FXSAVE
2288 clevermous 186
        shl     esi, 8
187
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
188
        mov     ecx, 512/4
189
        cld
190
        rep movsd
191
        fninit
192
.exit:
193
        popfd
194
        pop     edi
195
        pop     esi
196
        pop     ecx
197
        ret
198
 
7168 clevermous 199
avx_save_size:
200
        mov     eax, [xsave_area_size]
201
        ret
202
 
203
; param
204
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
205
 
7276 dunkaist 206
align 4
7168 clevermous 207
avx_save:
208
        push    ecx
209
        push    esi
210
        push    edi
211
 
212
        pushfd
213
        cli
214
 
215
        clts
216
        mov     edi, eax
217
 
218
        mov     ecx, [fpu_owner]
219
        mov     esi, [CURRENT_TASK]
220
        cmp     ecx, esi
221
        jne     .save
222
 
223
        call    save_context
224
        jmp     .exit
225
.save:
226
        mov     [fpu_owner], esi
227
 
228
        shl     ecx, 8
229
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
230
 
231
        call    save_context
232
 
233
        shl     esi, 8
234
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
235
        mov     ecx, [xsave_area_size]
236
        add     ecx, 3
237
        shr     ecx, 2
238
        rep movsd
239
        fninit
240
.exit:
241
        popfd
242
        pop     edi
243
        pop     esi
244
        pop     ecx
245
        ret
246
 
2288 clevermous 247
align 4
248
save_context:
7199 dunkaist 249
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7168 clevermous 250
        jnc     save_fpu_context
7276 dunkaist 251
        push    eax edx
252
        mov     ecx, eax
253
        mov     eax, [xsave_eax]
254
        mov     edx, [xsave_edx]
255
        xsave   [ecx]
256
        pop     edx eax
7124 dunkaist 257
        ret
7168 clevermous 258
save_fpu_context:
2288 clevermous 259
        bt      [cpu_caps], CAPS_SSE
260
        jnc     .no_SSE
261
        fxsave  [eax]
262
        ret
263
.no_SSE:
264
        fnsave  [eax]
265
        ret
266
 
7168 clevermous 267
 
2288 clevermous 268
align 4
269
fpu_restore:
270
        push    ecx
271
        push    esi
272
 
273
        mov     esi, eax
274
 
275
        pushfd
276
        cli
277
 
278
        mov     ecx, [fpu_owner]
279
        mov     eax, [CURRENT_TASK]
280
        cmp     ecx, eax
281
        jne     .copy
282
 
283
        clts
7168 clevermous 284
        bt      [cpu_caps], CAPS_SSE
285
        jnc     .no_SSE
286
 
287
        fxrstor [esi]
288
        popfd
289
        pop     esi
290
        pop     ecx
291
        ret
292
.no_SSE:
293
        fnclex                  ;fix possible problems
294
        frstor  [esi]
295
        popfd
296
        pop     esi
297
        pop     ecx
298
        ret
299
.copy:
300
        shl     eax, 8
301
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
302
        mov     ecx, 512/4
303
        cld
304
        rep movsd
305
        popfd
306
        pop     esi
307
        pop     ecx
308
        ret
309
 
7276 dunkaist 310
align 4
7168 clevermous 311
avx_restore:
312
        push    ecx
313
        push    esi
314
 
315
        mov     esi, eax
316
 
317
        pushfd
318
        cli
319
 
320
        mov     ecx, [fpu_owner]
321
        mov     eax, [CURRENT_TASK]
322
        cmp     ecx, eax
323
        jne     .copy
324
 
325
        clts
7199 dunkaist 326
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7124 dunkaist 327
        jnc     .no_xsave
7276 dunkaist 328
        push    edx
329
        mov     eax, [xsave_eax]
330
        mov     edx, [xsave_edx]
7124 dunkaist 331
        xrstor  [esi]
7276 dunkaist 332
        pop     edx
7124 dunkaist 333
        popfd
334
        pop     esi
335
        pop     ecx
336
        ret
337
.no_xsave:
2288 clevermous 338
        bt      [cpu_caps], CAPS_SSE
339
        jnc     .no_SSE
340
 
341
        fxrstor [esi]
342
        popfd
343
        pop     esi
344
        pop     ecx
345
        ret
346
.no_SSE:
347
        fnclex                  ;fix possible problems
348
        frstor  [esi]
349
        popfd
350
        pop     esi
351
        pop     ecx
352
        ret
353
.copy:
354
        shl     eax, 8
355
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
7168 clevermous 356
        mov     ecx, [xsave_area_size]
357
        add     ecx, 3
358
        shr     ecx, 2
2288 clevermous 359
        cld
360
        rep movsd
361
        popfd
362
        pop     esi
363
        pop     ecx
364
        ret
365
 
366
align 4
367
except_7:                  ;#NM exception handler
368
        save_ring3_context
369
        clts
370
        mov     ax, app_data;
371
        mov     ds, ax
372
        mov     es, ax
373
 
374
        mov     ebx, [fpu_owner]
375
        cmp     ebx, [CURRENT_TASK]
376
        je      .exit
377
 
378
        shl     ebx, 8
379
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
7199 dunkaist 380
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
7168 clevermous 381
        jnc     .no_xsave
7276 dunkaist 382
        mov     ecx, eax
383
        mov     eax, [xsave_eax]
384
        mov     edx, [xsave_edx]
385
        xsave   [ecx]
7168 clevermous 386
        mov     ebx, [CURRENT_TASK]
387
        mov     [fpu_owner], ebx
388
        shl     ebx, 8
7276 dunkaist 389
        mov     ecx, [ebx+SLOT_BASE+APPDATA.fpu_state]
390
        xrstor  [ecx]
7168 clevermous 391
.exit:
392
        restore_ring3_context
393
        iret
394
.no_xsave:
2288 clevermous 395
        bt      [cpu_caps], CAPS_SSE
396
        jnc     .no_SSE
397
 
398
        fxsave  [eax]
399
        mov     ebx, [CURRENT_TASK]
400
        mov     [fpu_owner], ebx
401
        shl     ebx, 8
402
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
403
        fxrstor [eax]
404
        restore_ring3_context
405
        iret
406
 
407
.no_SSE:
408
        fnsave  [eax]
409
        mov     ebx, [CURRENT_TASK]
410
        mov     [fpu_owner], ebx
411
        shl     ebx, 8
412
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
413
        frstor  [eax]
414
        restore_ring3_context
415
        iret
416
 
417
iglobal
3534 clevermous 418
  fpu_owner dd 2
2288 clevermous 419
endg