Subversion Repositories Kolibri OS

Rev

Rev 7124 | Rev 7165 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
7124 dunkaist 3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
$Revision: 7164 $
9
 
10
 
11
init_fpu:
12
        clts
13
        fninit
14
 
7124 dunkaist 15
        bt      [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
16
        jnc     .no_xsave
17
 
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
21
 
22
        mov     eax, 0x0d
23
        xor     ecx, ecx
24
        cpuid
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
26
        and     ebx, eax
27
        xor     ecx, ecx
28
        xgetbv
29
        or      eax, ebx
30
        xor     ecx, ecx
31
        xsetbv
32
 
33
        mov     eax, 0x0d
34
        xor     ecx, ecx
35
        cpuid
36
        mov     [xsave_area_size], ebx
37
 
38
        test    eax, XCR0_AVX512
39
        jz      @f
40
        call    init_avx512
41
        ret
42
@@:
43
        test    eax, XCR0_AVX
44
        jz      @f
45
        call    init_avx
46
        ret
47
@@:
48
        test    eax, XCR0_SSE
7164 clevermous 49
        jnz     .sse
50
        jmp     .fpu_mmx
7124 dunkaist 51
.no_xsave:
52
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
2288 clevermous 53
        bt      [cpu_caps], CAPS_SSE
7164 clevermous 54
        jnc     .fpu_mmx
55
.sse:
7124 dunkaist 56
        call    init_sse
57
        fxsave  [fpu_data]
58
        ret
7164 clevermous 59
.fpu_mmx:
7124 dunkaist 60
        call    init_fpu_mmx
61
        fnsave  [fpu_data]
62
        ret
2288 clevermous 63
 
7124 dunkaist 64
init_fpu_mmx:
65
        mov     ecx, cr0
66
        and     ecx, not CR0_EM
67
        or      ecx, CR0_MP + CR0_NE
68
        mov     cr0, ecx
69
        ret
70
 
71
init_sse:
2288 clevermous 72
        mov     ebx, cr4
73
        mov     ecx, cr0
74
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
75
        mov     cr4, ebx
76
 
7124 dunkaist 77
        and     ecx, not (CR0_EM + CR0_MP)
2288 clevermous 78
        or      ecx, CR0_NE
79
        mov     cr0, ecx
80
 
7124 dunkaist 81
        mov     dword [esp-4], MXCSR_INIT
2288 clevermous 82
        ldmxcsr [esp-4]
83
 
84
        xorps   xmm0, xmm0
85
        xorps   xmm1, xmm1
86
        xorps   xmm2, xmm2
87
        xorps   xmm3, xmm3
88
        xorps   xmm4, xmm4
89
        xorps   xmm5, xmm5
90
        xorps   xmm6, xmm6
91
        xorps   xmm7, xmm7
92
        ret
7124 dunkaist 93
 
94
init_avx:
95
        mov     ebx, cr4
96
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
97
        mov     cr4, ebx
98
 
2288 clevermous 99
        mov     ecx, cr0
7124 dunkaist 100
        and     ecx, not (CR0_EM + CR0_MP)
101
        or      ecx, CR0_NE
2288 clevermous 102
        mov     cr0, ecx
7124 dunkaist 103
 
104
        mov     dword [esp-4], MXCSR_INIT
105
        vldmxcsr [esp-4]
106
 
107
        vzeroall
2288 clevermous 108
        ret
109
 
7124 dunkaist 110
init_avx512:
111
        mov     ebx, cr4
112
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
113
        mov     cr4, ebx
114
 
115
        mov     ecx, cr0
116
        and     ecx, not (CR0_EM + CR0_MP)
117
        or      ecx, CR0_NE
118
        mov     cr0, ecx
119
 
120
        mov     dword [esp-4], MXCSR_INIT
121
        vldmxcsr [esp-4]
122
 
123
        vpxorq  zmm0, zmm0, zmm0
124
        vpxorq  zmm1, zmm1, zmm1
125
        vpxorq  zmm2, zmm2, zmm2
126
        vpxorq  zmm3, zmm3, zmm3
127
        vpxorq  zmm4, zmm4, zmm4
128
        vpxorq  zmm5, zmm5, zmm5
129
        vpxorq  zmm6, zmm6, zmm6
130
        vpxorq  zmm7, zmm7, zmm7
131
 
132
        ret
133
 
2288 clevermous 134
; param
135
;  eax= 512 bytes memory area
136
 
137
align 4
138
fpu_save:
139
        push    ecx
140
        push    esi
141
        push    edi
142
 
143
        pushfd
144
        cli
145
 
146
        clts
147
        mov     edi, eax
148
 
149
        mov     ecx, [fpu_owner]
150
        mov     esi, [CURRENT_TASK]
151
        cmp     ecx, esi
152
        jne     .save
153
 
154
        call    save_context
155
        jmp     .exit
156
.save:
157
        mov     [fpu_owner], esi
158
 
159
        shl     ecx, 8
160
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
161
 
162
        call    save_context
163
 
164
        shl     esi, 8
165
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
166
        mov     ecx, 512/4
167
        cld
168
        rep movsd
169
        fninit
170
.exit:
171
        popfd
172
        pop     edi
173
        pop     esi
174
        pop     ecx
175
        ret
176
 
177
align 4
178
save_context:
7124 dunkaist 179
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
180
        jnc     .no_xsave
181
        xsave   [eax]
182
        ret
183
.no_xsave:
2288 clevermous 184
        bt      [cpu_caps], CAPS_SSE
185
        jnc     .no_SSE
186
 
187
        fxsave  [eax]
188
        ret
189
.no_SSE:
190
        fnsave  [eax]
191
        ret
192
 
193
align 4
194
fpu_restore:
195
        push    ecx
196
        push    esi
197
 
198
        mov     esi, eax
199
 
200
        pushfd
201
        cli
202
 
203
        mov     ecx, [fpu_owner]
204
        mov     eax, [CURRENT_TASK]
205
        cmp     ecx, eax
206
        jne     .copy
207
 
208
        clts
7124 dunkaist 209
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
210
        jnc     .no_xsave
211
        xrstor  [esi]
212
        popfd
213
        pop     esi
214
        pop     ecx
215
        ret
216
.no_xsave:
2288 clevermous 217
        bt      [cpu_caps], CAPS_SSE
218
        jnc     .no_SSE
219
 
220
        fxrstor [esi]
221
        popfd
222
        pop     esi
223
        pop     ecx
224
        ret
225
.no_SSE:
226
        fnclex                  ;fix possible problems
227
        frstor  [esi]
228
        popfd
229
        pop     esi
230
        pop     ecx
231
        ret
232
.copy:
233
        shl     eax, 8
234
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
235
        mov     ecx, 512/4
236
        cld
237
        rep movsd
238
        popfd
239
        pop     esi
240
        pop     ecx
241
        ret
242
 
243
align 4
244
except_7:                  ;#NM exception handler
245
        save_ring3_context
246
        clts
247
        mov     ax, app_data;
248
        mov     ds, ax
249
        mov     es, ax
250
 
251
        mov     ebx, [fpu_owner]
252
        cmp     ebx, [CURRENT_TASK]
253
        je      .exit
254
 
255
        shl     ebx, 8
256
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
257
        bt      [cpu_caps], CAPS_SSE
258
        jnc     .no_SSE
259
 
260
        fxsave  [eax]
261
        mov     ebx, [CURRENT_TASK]
262
        mov     [fpu_owner], ebx
263
        shl     ebx, 8
264
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
265
        fxrstor [eax]
266
.exit:
267
        restore_ring3_context
268
        iret
269
 
270
.no_SSE:
271
        fnsave  [eax]
272
        mov     ebx, [CURRENT_TASK]
273
        mov     [fpu_owner], ebx
274
        shl     ebx, 8
275
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
276
        frstor  [eax]
277
        restore_ring3_context
278
        iret
279
 
280
iglobal
3534 clevermous 281
  fpu_owner dd 2
2288 clevermous 282
endg