Subversion Repositories Kolibri OS

Rev

Rev 7164 | Rev 7168 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2288 clevermous 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
7124 dunkaist 3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
2288 clevermous 4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
8
$Revision: 7165 $
9
 
10
 
11
init_fpu:
12
        clts
13
        fninit
14
 
7124 dunkaist 15
        bt      [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
16
        jnc     .no_xsave
17
 
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
21
 
22
        mov     eax, 0x0d
23
        xor     ecx, ecx
24
        cpuid
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
26
        and     ebx, eax
27
        xor     ecx, ecx
28
        xgetbv
29
        or      eax, ebx
30
        xor     ecx, ecx
31
        xsetbv
32
 
33
        mov     eax, 0x0d
34
        xor     ecx, ecx
35
        cpuid
36
        mov     [xsave_area_size], ebx
7165 clevermous 37
        cmp     ebx, fpu_data_size
38
        ja      $
7124 dunkaist 39
 
40
        test    eax, XCR0_AVX512
41
        jz      @f
42
        call    init_avx512
7165 clevermous 43
        xsave   [fpu_data]
7124 dunkaist 44
        ret
45
@@:
46
        test    eax, XCR0_AVX
47
        jz      @f
48
        call    init_avx
7165 clevermous 49
        xsave   [fpu_data]
7124 dunkaist 50
        ret
51
@@:
52
        test    eax, XCR0_SSE
7164 clevermous 53
        jnz     .sse
54
        jmp     .fpu_mmx
7124 dunkaist 55
.no_xsave:
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
2288 clevermous 57
        bt      [cpu_caps], CAPS_SSE
7164 clevermous 58
        jnc     .fpu_mmx
59
.sse:
7124 dunkaist 60
        call    init_sse
61
        fxsave  [fpu_data]
62
        ret
7164 clevermous 63
.fpu_mmx:
7124 dunkaist 64
        call    init_fpu_mmx
65
        fnsave  [fpu_data]
66
        ret
2288 clevermous 67
 
7124 dunkaist 68
init_fpu_mmx:
69
        mov     ecx, cr0
70
        and     ecx, not CR0_EM
71
        or      ecx, CR0_MP + CR0_NE
72
        mov     cr0, ecx
73
        ret
74
 
75
init_sse:
2288 clevermous 76
        mov     ebx, cr4
77
        mov     ecx, cr0
78
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
79
        mov     cr4, ebx
80
 
7124 dunkaist 81
        and     ecx, not (CR0_EM + CR0_MP)
2288 clevermous 82
        or      ecx, CR0_NE
83
        mov     cr0, ecx
84
 
7124 dunkaist 85
        mov     dword [esp-4], MXCSR_INIT
2288 clevermous 86
        ldmxcsr [esp-4]
87
 
88
        xorps   xmm0, xmm0
89
        xorps   xmm1, xmm1
90
        xorps   xmm2, xmm2
91
        xorps   xmm3, xmm3
92
        xorps   xmm4, xmm4
93
        xorps   xmm5, xmm5
94
        xorps   xmm6, xmm6
95
        xorps   xmm7, xmm7
96
        ret
7124 dunkaist 97
 
98
init_avx:
99
        mov     ebx, cr4
100
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
101
        mov     cr4, ebx
102
 
2288 clevermous 103
        mov     ecx, cr0
7124 dunkaist 104
        and     ecx, not (CR0_EM + CR0_MP)
105
        or      ecx, CR0_NE
2288 clevermous 106
        mov     cr0, ecx
7124 dunkaist 107
 
108
        mov     dword [esp-4], MXCSR_INIT
109
        vldmxcsr [esp-4]
110
 
111
        vzeroall
2288 clevermous 112
        ret
113
 
7124 dunkaist 114
init_avx512:
115
        mov     ebx, cr4
116
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
117
        mov     cr4, ebx
118
 
119
        mov     ecx, cr0
120
        and     ecx, not (CR0_EM + CR0_MP)
121
        or      ecx, CR0_NE
122
        mov     cr0, ecx
123
 
124
        mov     dword [esp-4], MXCSR_INIT
125
        vldmxcsr [esp-4]
126
 
127
        vpxorq  zmm0, zmm0, zmm0
128
        vpxorq  zmm1, zmm1, zmm1
129
        vpxorq  zmm2, zmm2, zmm2
130
        vpxorq  zmm3, zmm3, zmm3
131
        vpxorq  zmm4, zmm4, zmm4
132
        vpxorq  zmm5, zmm5, zmm5
133
        vpxorq  zmm6, zmm6, zmm6
134
        vpxorq  zmm7, zmm7, zmm7
135
 
136
        ret
137
 
2288 clevermous 138
; param
139
;  eax= 512 bytes memory area
140
 
141
align 4
142
fpu_save:
143
        push    ecx
144
        push    esi
145
        push    edi
146
 
147
        pushfd
148
        cli
149
 
150
        clts
151
        mov     edi, eax
152
 
153
        mov     ecx, [fpu_owner]
154
        mov     esi, [CURRENT_TASK]
155
        cmp     ecx, esi
156
        jne     .save
157
 
158
        call    save_context
159
        jmp     .exit
160
.save:
161
        mov     [fpu_owner], esi
162
 
163
        shl     ecx, 8
164
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
165
 
166
        call    save_context
167
 
168
        shl     esi, 8
169
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
170
        mov     ecx, 512/4
171
        cld
172
        rep movsd
173
        fninit
174
.exit:
175
        popfd
176
        pop     edi
177
        pop     esi
178
        pop     ecx
179
        ret
180
 
181
align 4
182
save_context:
7124 dunkaist 183
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
184
        jnc     .no_xsave
185
        xsave   [eax]
186
        ret
187
.no_xsave:
2288 clevermous 188
        bt      [cpu_caps], CAPS_SSE
189
        jnc     .no_SSE
190
 
191
        fxsave  [eax]
192
        ret
193
.no_SSE:
194
        fnsave  [eax]
195
        ret
196
 
197
align 4
198
fpu_restore:
199
        push    ecx
200
        push    esi
201
 
202
        mov     esi, eax
203
 
204
        pushfd
205
        cli
206
 
207
        mov     ecx, [fpu_owner]
208
        mov     eax, [CURRENT_TASK]
209
        cmp     ecx, eax
210
        jne     .copy
211
 
212
        clts
7124 dunkaist 213
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
214
        jnc     .no_xsave
215
        xrstor  [esi]
216
        popfd
217
        pop     esi
218
        pop     ecx
219
        ret
220
.no_xsave:
2288 clevermous 221
        bt      [cpu_caps], CAPS_SSE
222
        jnc     .no_SSE
223
 
224
        fxrstor [esi]
225
        popfd
226
        pop     esi
227
        pop     ecx
228
        ret
229
.no_SSE:
230
        fnclex                  ;fix possible problems
231
        frstor  [esi]
232
        popfd
233
        pop     esi
234
        pop     ecx
235
        ret
236
.copy:
237
        shl     eax, 8
238
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
239
        mov     ecx, 512/4
240
        cld
241
        rep movsd
242
        popfd
243
        pop     esi
244
        pop     ecx
245
        ret
246
 
247
align 4
248
except_7:                  ;#NM exception handler
249
        save_ring3_context
250
        clts
251
        mov     ax, app_data;
252
        mov     ds, ax
253
        mov     es, ax
254
 
255
        mov     ebx, [fpu_owner]
256
        cmp     ebx, [CURRENT_TASK]
257
        je      .exit
258
 
259
        shl     ebx, 8
260
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
261
        bt      [cpu_caps], CAPS_SSE
262
        jnc     .no_SSE
263
 
264
        fxsave  [eax]
265
        mov     ebx, [CURRENT_TASK]
266
        mov     [fpu_owner], ebx
267
        shl     ebx, 8
268
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
269
        fxrstor [eax]
270
.exit:
271
        restore_ring3_context
272
        iret
273
 
274
.no_SSE:
275
        fnsave  [eax]
276
        mov     ebx, [CURRENT_TASK]
277
        mov     [fpu_owner], ebx
278
        shl     ebx, 8
279
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
280
        frstor  [eax]
281
        restore_ring3_context
282
        iret
283
 
284
iglobal
3534 clevermous 285
  fpu_owner dd 2
2288 clevermous 286
endg