Subversion Repositories Kolibri OS

Rev

Rev 5363 | Rev 7164 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
7124 dunkaist 3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
$Revision: 7124 $
9
 
10
 
11
init_fpu:
12
        clts
13
        fninit
14
 
7124 dunkaist 15
        bt      [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
16
        jnc     .no_xsave
17
 
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
21
 
22
        mov     eax, 0x0d
23
        xor     ecx, ecx
24
        cpuid
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
26
        and     ebx, eax
27
        xor     ecx, ecx
28
        xgetbv
29
        or      eax, ebx
30
        xor     ecx, ecx
31
        xsetbv
32
 
33
        mov     eax, 0x0d
34
        xor     ecx, ecx
35
        cpuid
36
        mov     [xsave_area_size], ebx
37
 
38
        test    eax, XCR0_AVX512
39
        jz      @f
40
        call    init_avx512
41
        ret
42
@@:
43
        test    eax, XCR0_AVX
44
        jz      @f
45
        call    init_avx
46
        ret
47
@@:
48
        test    eax, XCR0_SSE
49
        jz      @f
50
        call    init_sse
51
        ret
52
@@:
53
        call    init_fpu_mmx
54
        ret
55
.no_xsave:
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
2288 clevermous 57
        bt      [cpu_caps], CAPS_SSE
7124 dunkaist 58
        jnc     @f
59
        call    init_sse
60
        fxsave  [fpu_data]
61
        ret
62
@@:
63
        call    init_fpu_mmx
64
        fnsave  [fpu_data]
65
        ret
2288 clevermous 66
 
7124 dunkaist 67
init_fpu_mmx:
68
        mov     ecx, cr0
69
        and     ecx, not CR0_EM
70
        or      ecx, CR0_MP + CR0_NE
71
        mov     cr0, ecx
72
        ret
73
 
74
init_sse:
2288 clevermous 75
        mov     ebx, cr4
76
        mov     ecx, cr0
77
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
78
        mov     cr4, ebx
79
 
7124 dunkaist 80
        and     ecx, not (CR0_EM + CR0_MP)
2288 clevermous 81
        or      ecx, CR0_NE
82
        mov     cr0, ecx
83
 
7124 dunkaist 84
        mov     dword [esp-4], MXCSR_INIT
2288 clevermous 85
        ldmxcsr [esp-4]
86
 
87
        xorps   xmm0, xmm0
88
        xorps   xmm1, xmm1
89
        xorps   xmm2, xmm2
90
        xorps   xmm3, xmm3
91
        xorps   xmm4, xmm4
92
        xorps   xmm5, xmm5
93
        xorps   xmm6, xmm6
94
        xorps   xmm7, xmm7
95
        ret
7124 dunkaist 96
 
97
init_avx:
98
        mov     ebx, cr4
99
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
100
        mov     cr4, ebx
101
 
2288 clevermous 102
        mov     ecx, cr0
7124 dunkaist 103
        and     ecx, not (CR0_EM + CR0_MP)
104
        or      ecx, CR0_NE
2288 clevermous 105
        mov     cr0, ecx
7124 dunkaist 106
 
107
        mov     dword [esp-4], MXCSR_INIT
108
        vldmxcsr [esp-4]
109
 
110
        vzeroall
2288 clevermous 111
        ret
112
 
7124 dunkaist 113
init_avx512:
114
        mov     ebx, cr4
115
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
116
        mov     cr4, ebx
117
 
118
        mov     ecx, cr0
119
        and     ecx, not (CR0_EM + CR0_MP)
120
        or      ecx, CR0_NE
121
        mov     cr0, ecx
122
 
123
        mov     dword [esp-4], MXCSR_INIT
124
        vldmxcsr [esp-4]
125
 
126
        vpxorq  zmm0, zmm0, zmm0
127
        vpxorq  zmm1, zmm1, zmm1
128
        vpxorq  zmm2, zmm2, zmm2
129
        vpxorq  zmm3, zmm3, zmm3
130
        vpxorq  zmm4, zmm4, zmm4
131
        vpxorq  zmm5, zmm5, zmm5
132
        vpxorq  zmm6, zmm6, zmm6
133
        vpxorq  zmm7, zmm7, zmm7
134
 
135
        ret
136
 
2288 clevermous 137
; param
138
;  eax= 512 bytes memory area
139
 
140
align 4
141
fpu_save:
142
        push    ecx
143
        push    esi
144
        push    edi
145
 
146
        pushfd
147
        cli
148
 
149
        clts
150
        mov     edi, eax
151
 
152
        mov     ecx, [fpu_owner]
153
        mov     esi, [CURRENT_TASK]
154
        cmp     ecx, esi
155
        jne     .save
156
 
157
        call    save_context
158
        jmp     .exit
159
.save:
160
        mov     [fpu_owner], esi
161
 
162
        shl     ecx, 8
163
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
164
 
165
        call    save_context
166
 
167
        shl     esi, 8
168
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
169
        mov     ecx, 512/4
170
        cld
171
        rep movsd
172
        fninit
173
.exit:
174
        popfd
175
        pop     edi
176
        pop     esi
177
        pop     ecx
178
        ret
179
 
180
align 4
181
save_context:
7124 dunkaist 182
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
183
        jnc     .no_xsave
184
        xsave   [eax]
185
        ret
186
.no_xsave:
2288 clevermous 187
        bt      [cpu_caps], CAPS_SSE
188
        jnc     .no_SSE
189
 
190
        fxsave  [eax]
191
        ret
192
.no_SSE:
193
        fnsave  [eax]
194
        ret
195
 
196
align 4
197
fpu_restore:
198
        push    ecx
199
        push    esi
200
 
201
        mov     esi, eax
202
 
203
        pushfd
204
        cli
205
 
206
        mov     ecx, [fpu_owner]
207
        mov     eax, [CURRENT_TASK]
208
        cmp     ecx, eax
209
        jne     .copy
210
 
211
        clts
7124 dunkaist 212
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
213
        jnc     .no_xsave
214
        xrstor  [esi]
215
        popfd
216
        pop     esi
217
        pop     ecx
218
        ret
219
.no_xsave:
2288 clevermous 220
        bt      [cpu_caps], CAPS_SSE
221
        jnc     .no_SSE
222
 
223
        fxrstor [esi]
224
        popfd
225
        pop     esi
226
        pop     ecx
227
        ret
228
.no_SSE:
229
        fnclex                  ;fix possible problems
230
        frstor  [esi]
231
        popfd
232
        pop     esi
233
        pop     ecx
234
        ret
235
.copy:
236
        shl     eax, 8
237
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
238
        mov     ecx, 512/4
239
        cld
240
        rep movsd
241
        popfd
242
        pop     esi
243
        pop     ecx
244
        ret
245
 
246
align 4
247
except_7:                  ;#NM exception handler
248
        save_ring3_context
249
        clts
250
        mov     ax, app_data;
251
        mov     ds, ax
252
        mov     es, ax
253
 
254
        mov     ebx, [fpu_owner]
255
        cmp     ebx, [CURRENT_TASK]
256
        je      .exit
257
 
258
        shl     ebx, 8
259
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
260
        bt      [cpu_caps], CAPS_SSE
261
        jnc     .no_SSE
262
 
263
        fxsave  [eax]
264
        mov     ebx, [CURRENT_TASK]
265
        mov     [fpu_owner], ebx
266
        shl     ebx, 8
267
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
268
        fxrstor [eax]
269
.exit:
270
        restore_ring3_context
271
        iret
272
 
273
.no_SSE:
274
        fnsave  [eax]
275
        mov     ebx, [CURRENT_TASK]
276
        mov     [fpu_owner], ebx
277
        shl     ebx, 8
278
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
279
        frstor  [eax]
280
        restore_ring3_context
281
        iret
282
 
283
iglobal
3534 clevermous 284
  fpu_owner dd 2
2288 clevermous 285
endg