Subversion Repositories Kolibri OS

Rev

Rev 7165 | Rev 7199 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
7124 dunkaist 3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
$Revision: 7168 $
9
 
10
 
11
init_fpu:
12
        clts
13
        fninit
14
 
7168 clevermous 15
        bt      [cpu_caps+(CAPS_XSAVE/32)], CAPS_XSAVE mod 32
7124 dunkaist 16
        jnc     .no_xsave
17
 
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
21
 
22
        mov     eax, 0x0d
23
        xor     ecx, ecx
24
        cpuid
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
26
        and     ebx, eax
27
        xor     ecx, ecx
28
        xgetbv
29
        or      eax, ebx
30
        xor     ecx, ecx
31
        xsetbv
32
 
33
        mov     eax, 0x0d
34
        xor     ecx, ecx
35
        cpuid
36
        mov     [xsave_area_size], ebx
7165 clevermous 37
        cmp     ebx, fpu_data_size
38
        ja      $
7124 dunkaist 39
 
40
        test    eax, XCR0_AVX512
41
        jz      @f
42
        call    init_avx512
7165 clevermous 43
        xsave   [fpu_data]
7124 dunkaist 44
        ret
45
@@:
46
        test    eax, XCR0_AVX
47
        jz      @f
48
        call    init_avx
7165 clevermous 49
        xsave   [fpu_data]
7124 dunkaist 50
        ret
51
@@:
52
        test    eax, XCR0_SSE
7164 clevermous 53
        jnz     .sse
54
        jmp     .fpu_mmx
7124 dunkaist 55
.no_xsave:
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
2288 clevermous 57
        bt      [cpu_caps], CAPS_SSE
7164 clevermous 58
        jnc     .fpu_mmx
59
.sse:
7124 dunkaist 60
        call    init_sse
61
        fxsave  [fpu_data]
62
        ret
7164 clevermous 63
.fpu_mmx:
7124 dunkaist 64
        call    init_fpu_mmx
65
        fnsave  [fpu_data]
66
        ret
2288 clevermous 67
 
7124 dunkaist 68
init_fpu_mmx:
69
        mov     ecx, cr0
70
        and     ecx, not CR0_EM
71
        or      ecx, CR0_MP + CR0_NE
72
        mov     cr0, ecx
73
        ret
74
 
75
init_sse:
2288 clevermous 76
        mov     ebx, cr4
77
        mov     ecx, cr0
78
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
79
        mov     cr4, ebx
80
 
7124 dunkaist 81
        and     ecx, not (CR0_EM + CR0_MP)
2288 clevermous 82
        or      ecx, CR0_NE
83
        mov     cr0, ecx
84
 
7124 dunkaist 85
        mov     dword [esp-4], MXCSR_INIT
2288 clevermous 86
        ldmxcsr [esp-4]
87
 
88
        xorps   xmm0, xmm0
89
        xorps   xmm1, xmm1
90
        xorps   xmm2, xmm2
91
        xorps   xmm3, xmm3
92
        xorps   xmm4, xmm4
93
        xorps   xmm5, xmm5
94
        xorps   xmm6, xmm6
95
        xorps   xmm7, xmm7
96
        ret
7124 dunkaist 97
 
98
init_avx:
99
        mov     ebx, cr4
100
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
101
        mov     cr4, ebx
102
 
2288 clevermous 103
        mov     ecx, cr0
7124 dunkaist 104
        and     ecx, not (CR0_EM + CR0_MP)
105
        or      ecx, CR0_NE
2288 clevermous 106
        mov     cr0, ecx
7124 dunkaist 107
 
108
        mov     dword [esp-4], MXCSR_INIT
109
        vldmxcsr [esp-4]
110
 
111
        vzeroall
2288 clevermous 112
        ret
113
 
7124 dunkaist 114
init_avx512:
115
        mov     ebx, cr4
116
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
117
        mov     cr4, ebx
118
 
119
        mov     ecx, cr0
120
        and     ecx, not (CR0_EM + CR0_MP)
121
        or      ecx, CR0_NE
122
        mov     cr0, ecx
123
 
124
        mov     dword [esp-4], MXCSR_INIT
125
        vldmxcsr [esp-4]
126
 
127
        vpxorq  zmm0, zmm0, zmm0
128
        vpxorq  zmm1, zmm1, zmm1
129
        vpxorq  zmm2, zmm2, zmm2
130
        vpxorq  zmm3, zmm3, zmm3
131
        vpxorq  zmm4, zmm4, zmm4
132
        vpxorq  zmm5, zmm5, zmm5
133
        vpxorq  zmm6, zmm6, zmm6
134
        vpxorq  zmm7, zmm7, zmm7
135
 
136
        ret
137
 
2288 clevermous 138
; param
7168 clevermous 139
;  eax= 512 bytes memory area aligned on a 16-byte boundary
2288 clevermous 140
 
141
align 4
142
fpu_save:
143
        push    ecx
144
        push    esi
145
        push    edi
146
 
147
        pushfd
148
        cli
149
 
150
        clts
151
        mov     edi, eax
152
 
153
        mov     ecx, [fpu_owner]
154
        mov     esi, [CURRENT_TASK]
155
        cmp     ecx, esi
156
        jne     .save
157
 
7168 clevermous 158
        call    save_fpu_context
2288 clevermous 159
        jmp     .exit
160
.save:
161
        mov     [fpu_owner], esi
162
 
163
        shl     ecx, 8
164
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
165
 
166
        call    save_context
167
 
7168 clevermous 168
; first 512 bytes of XSAVE area have the same format as FXSAVE
2288 clevermous 169
        shl     esi, 8
170
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
171
        mov     ecx, 512/4
172
        cld
173
        rep movsd
174
        fninit
175
.exit:
176
        popfd
177
        pop     edi
178
        pop     esi
179
        pop     ecx
180
        ret
181
 
7168 clevermous 182
avx_save_size:
183
        mov     eax, [xsave_area_size]
184
        ret
185
 
186
; param
187
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
188
 
189
avx_save:
190
        push    ecx
191
        push    esi
192
        push    edi
193
 
194
        pushfd
195
        cli
196
 
197
        clts
198
        mov     edi, eax
199
 
200
        mov     ecx, [fpu_owner]
201
        mov     esi, [CURRENT_TASK]
202
        cmp     ecx, esi
203
        jne     .save
204
 
205
        call    save_context
206
        jmp     .exit
207
.save:
208
        mov     [fpu_owner], esi
209
 
210
        shl     ecx, 8
211
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
212
 
213
        call    save_context
214
 
215
        shl     esi, 8
216
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
217
        mov     ecx, [xsave_area_size]
218
        add     ecx, 3
219
        shr     ecx, 2
220
        rep movsd
221
        fninit
222
.exit:
223
        popfd
224
        pop     edi
225
        pop     esi
226
        pop     ecx
227
        ret
228
 
2288 clevermous 229
align 4
230
save_context:
7168 clevermous 231
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
232
        jnc     save_fpu_context
7124 dunkaist 233
        xsave   [eax]
234
        ret
7168 clevermous 235
save_fpu_context:
2288 clevermous 236
        bt      [cpu_caps], CAPS_SSE
237
        jnc     .no_SSE
238
        fxsave  [eax]
239
        ret
240
.no_SSE:
241
        fnsave  [eax]
242
        ret
243
 
7168 clevermous 244
 
2288 clevermous 245
align 4
246
fpu_restore:
247
        push    ecx
248
        push    esi
249
 
250
        mov     esi, eax
251
 
252
        pushfd
253
        cli
254
 
255
        mov     ecx, [fpu_owner]
256
        mov     eax, [CURRENT_TASK]
257
        cmp     ecx, eax
258
        jne     .copy
259
 
260
        clts
7168 clevermous 261
        bt      [cpu_caps], CAPS_SSE
262
        jnc     .no_SSE
263
 
264
        fxrstor [esi]
265
        popfd
266
        pop     esi
267
        pop     ecx
268
        ret
269
.no_SSE:
270
        fnclex                  ;fix possible problems
271
        frstor  [esi]
272
        popfd
273
        pop     esi
274
        pop     ecx
275
        ret
276
.copy:
277
        shl     eax, 8
278
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
279
        mov     ecx, 512/4
280
        cld
281
        rep movsd
282
        popfd
283
        pop     esi
284
        pop     ecx
285
        ret
286
 
287
avx_restore:
288
        push    ecx
289
        push    esi
290
 
291
        mov     esi, eax
292
 
293
        pushfd
294
        cli
295
 
296
        mov     ecx, [fpu_owner]
297
        mov     eax, [CURRENT_TASK]
298
        cmp     ecx, eax
299
        jne     .copy
300
 
301
        clts
302
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
7124 dunkaist 303
        jnc     .no_xsave
304
        xrstor  [esi]
305
        popfd
306
        pop     esi
307
        pop     ecx
308
        ret
309
.no_xsave:
2288 clevermous 310
        bt      [cpu_caps], CAPS_SSE
311
        jnc     .no_SSE
312
 
313
        fxrstor [esi]
314
        popfd
315
        pop     esi
316
        pop     ecx
317
        ret
318
.no_SSE:
319
        fnclex                  ;fix possible problems
320
        frstor  [esi]
321
        popfd
322
        pop     esi
323
        pop     ecx
324
        ret
325
.copy:
326
        shl     eax, 8
327
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
7168 clevermous 328
        mov     ecx, [xsave_area_size]
329
        add     ecx, 3
330
        shr     ecx, 2
2288 clevermous 331
        cld
332
        rep movsd
333
        popfd
334
        pop     esi
335
        pop     ecx
336
        ret
337
 
338
align 4
339
except_7:                  ;#NM exception handler
340
        save_ring3_context
341
        clts
342
        mov     ax, app_data;
343
        mov     ds, ax
344
        mov     es, ax
345
 
346
        mov     ebx, [fpu_owner]
347
        cmp     ebx, [CURRENT_TASK]
348
        je      .exit
349
 
350
        shl     ebx, 8
351
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
7168 clevermous 352
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
353
        jnc     .no_xsave
354
        xsave   [eax]
355
        mov     ebx, [CURRENT_TASK]
356
        mov     [fpu_owner], ebx
357
        shl     ebx, 8
358
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
359
        xrstor  [eax]
360
.exit:
361
        restore_ring3_context
362
        iret
363
.no_xsave:
2288 clevermous 364
        bt      [cpu_caps], CAPS_SSE
365
        jnc     .no_SSE
366
 
367
        fxsave  [eax]
368
        mov     ebx, [CURRENT_TASK]
369
        mov     [fpu_owner], ebx
370
        shl     ebx, 8
371
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
372
        fxrstor [eax]
373
        restore_ring3_context
374
        iret
375
 
376
.no_SSE:
377
        fnsave  [eax]
378
        mov     ebx, [CURRENT_TASK]
379
        mov     [fpu_owner], ebx
380
        shl     ebx, 8
381
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
382
        frstor  [eax]
383
        restore_ring3_context
384
        iret
385
 
386
iglobal
3534 clevermous 387
  fpu_owner dd 2
2288 clevermous 388
endg