Rev 9715 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2288 | clevermous | 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
||
10051 | ace_dent | 3 | ;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; |
2288 | clevermous | 4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
||
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
||
7 | |||
8 | |||
9 | init_fpu: |
||
10 | clts |
||
11 | fninit |
||
12 | |||
7199 | dunkaist | 13 | bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
7276 | dunkaist | 14 | jnc .no_xsave |
7124 | dunkaist | 15 | |
16 | mov ecx, cr4 |
||
17 | or ecx, CR4_OSXSAVE |
||
18 | mov cr4, ecx |
||
7276 | dunkaist | 19 | ; don't call cpuid again |
20 | bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
||
7124 | dunkaist | 21 | |
7276 | dunkaist | 22 | ; zero xsave header |
23 | mov ecx, 64/4 |
||
24 | xor eax, eax |
||
25 | mov edi, fpu_data + 512 ; skip legacy region |
||
26 | rep stosd |
||
27 | |||
28 | mov eax, 0x0d ; extended state enumeration main leaf |
||
7124 | dunkaist | 29 | xor ecx, ecx |
30 | cpuid |
||
7276 | dunkaist | 31 | and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
32 | xor edx, edx |
||
33 | mov [xsave_eax], eax |
||
34 | mov [xsave_edx], edx |
||
7124 | dunkaist | 35 | xor ecx, ecx |
36 | xsetbv |
||
37 | |||
38 | mov eax, 0x0d |
||
39 | xor ecx, ecx |
||
40 | cpuid |
||
7276 | dunkaist | 41 | add ebx, 63 |
42 | and ebx, NOT 63 |
||
7124 | dunkaist | 43 | mov [xsave_area_size], ebx |
7165 | clevermous | 44 | cmp ebx, fpu_data_size |
45 | ja $ |
||
7124 | dunkaist | 46 | |
47 | test eax, XCR0_AVX512 |
||
48 | jz @f |
||
49 | call init_avx512 |
||
7276 | dunkaist | 50 | mov eax, [xsave_eax] |
51 | mov edx, [xsave_edx] |
||
7165 | clevermous | 52 | xsave [fpu_data] |
7124 | dunkaist | 53 | ret |
54 | @@: |
||
55 | test eax, XCR0_AVX |
||
56 | jz @f |
||
57 | call init_avx |
||
7276 | dunkaist | 58 | mov eax, [xsave_eax] |
59 | mov edx, [xsave_edx] |
||
7165 | clevermous | 60 | xsave [fpu_data] |
7124 | dunkaist | 61 | ret |
62 | @@: |
||
63 | test eax, XCR0_SSE |
||
7276 | dunkaist | 64 | jz $ |
65 | call init_sse |
||
66 | mov eax, [xsave_eax] |
||
67 | mov edx, [xsave_edx] |
||
68 | xsave [fpu_data] |
||
69 | ret |
||
7124 | dunkaist | 70 | .no_xsave: |
71 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
||
2288 | clevermous | 72 | bt [cpu_caps], CAPS_SSE |
7164 | clevermous | 73 | jnc .fpu_mmx |
74 | .sse: |
||
7124 | dunkaist | 75 | call init_sse |
76 | fxsave [fpu_data] |
||
77 | ret |
||
7164 | clevermous | 78 | .fpu_mmx: |
7124 | dunkaist | 79 | call init_fpu_mmx |
80 | fnsave [fpu_data] |
||
81 | ret |
||
2288 | clevermous | 82 | |
7124 | dunkaist | 83 | init_fpu_mmx: |
84 | mov ecx, cr0 |
||
85 | and ecx, not CR0_EM |
||
86 | or ecx, CR0_MP + CR0_NE |
||
87 | mov cr0, ecx |
||
88 | ret |
||
89 | |||
90 | init_sse: |
||
2288 | clevermous | 91 | mov ebx, cr4 |
92 | mov ecx, cr0 |
||
7276 | dunkaist | 93 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
2288 | clevermous | 94 | mov cr4, ebx |
95 | |||
7124 | dunkaist | 96 | and ecx, not (CR0_EM + CR0_MP) |
2288 | clevermous | 97 | or ecx, CR0_NE |
98 | mov cr0, ecx |
||
99 | |||
7124 | dunkaist | 100 | mov dword [esp-4], MXCSR_INIT |
2288 | clevermous | 101 | ldmxcsr [esp-4] |
102 | |||
103 | xorps xmm0, xmm0 |
||
104 | xorps xmm1, xmm1 |
||
105 | xorps xmm2, xmm2 |
||
106 | xorps xmm3, xmm3 |
||
107 | xorps xmm4, xmm4 |
||
108 | xorps xmm5, xmm5 |
||
109 | xorps xmm6, xmm6 |
||
110 | xorps xmm7, xmm7 |
||
111 | ret |
||
7124 | dunkaist | 112 | |
113 | init_avx: |
||
114 | mov ebx, cr4 |
||
115 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
116 | mov cr4, ebx |
||
117 | |||
2288 | clevermous | 118 | mov ecx, cr0 |
7124 | dunkaist | 119 | and ecx, not (CR0_EM + CR0_MP) |
120 | or ecx, CR0_NE |
||
2288 | clevermous | 121 | mov cr0, ecx |
7124 | dunkaist | 122 | |
123 | mov dword [esp-4], MXCSR_INIT |
||
124 | vldmxcsr [esp-4] |
||
125 | |||
126 | vzeroall |
||
2288 | clevermous | 127 | ret |
128 | |||
7124 | dunkaist | 129 | init_avx512: |
130 | mov ebx, cr4 |
||
131 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
132 | mov cr4, ebx |
||
133 | |||
134 | mov ecx, cr0 |
||
135 | and ecx, not (CR0_EM + CR0_MP) |
||
136 | or ecx, CR0_NE |
||
137 | mov cr0, ecx |
||
138 | |||
139 | mov dword [esp-4], MXCSR_INIT |
||
140 | vldmxcsr [esp-4] |
||
141 | |||
142 | vpxorq zmm0, zmm0, zmm0 |
||
143 | vpxorq zmm1, zmm1, zmm1 |
||
144 | vpxorq zmm2, zmm2, zmm2 |
||
145 | vpxorq zmm3, zmm3, zmm3 |
||
146 | vpxorq zmm4, zmm4, zmm4 |
||
147 | vpxorq zmm5, zmm5, zmm5 |
||
148 | vpxorq zmm6, zmm6, zmm6 |
||
149 | vpxorq zmm7, zmm7, zmm7 |
||
150 | |||
151 | ret |
||
152 | |||
2288 | clevermous | 153 | ; param |
7168 | clevermous | 154 | ; eax= 512 bytes memory area aligned on a 16-byte boundary |
2288 | clevermous | 155 | |
156 | align 4 |
||
157 | fpu_save: |
||
158 | push ecx |
||
159 | push esi |
||
160 | push edi |
||
161 | |||
162 | pushfd |
||
163 | cli |
||
164 | |||
165 | clts |
||
166 | mov edi, eax |
||
167 | |||
168 | mov ecx, [fpu_owner] |
||
8869 | rgimad | 169 | mov esi, [current_slot_idx] |
2288 | clevermous | 170 | cmp ecx, esi |
171 | jne .save |
||
172 | |||
7168 | clevermous | 173 | call save_fpu_context |
2288 | clevermous | 174 | jmp .exit |
175 | .save: |
||
176 | mov [fpu_owner], esi |
||
177 | |||
9715 | Doczom | 178 | shl ecx, BSF sizeof.APPDATA |
179 | mov eax, [SLOT_BASE + ecx + APPDATA.fpu_state] |
||
2288 | clevermous | 180 | |
181 | call save_context |
||
182 | |||
7168 | clevermous | 183 | ; first 512 bytes of XSAVE area have the same format as FXSAVE |
9715 | Doczom | 184 | shl esi, BSF sizeof.APPDATA |
185 | mov esi, [SLOT_BASE + esi + APPDATA.fpu_state] |
||
2288 | clevermous | 186 | mov ecx, 512/4 |
187 | cld |
||
188 | rep movsd |
||
189 | fninit |
||
190 | .exit: |
||
191 | popfd |
||
192 | pop edi |
||
193 | pop esi |
||
194 | pop ecx |
||
195 | ret |
||
196 | |||
7168 | clevermous | 197 | avx_save_size: |
198 | mov eax, [xsave_area_size] |
||
199 | ret |
||
200 | |||
201 | ; param |
||
202 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
||
203 | |||
7276 | dunkaist | 204 | align 4 |
7168 | clevermous | 205 | avx_save: |
206 | push ecx |
||
207 | push esi |
||
208 | push edi |
||
209 | |||
210 | pushfd |
||
211 | cli |
||
212 | |||
213 | clts |
||
214 | mov edi, eax |
||
215 | |||
216 | mov ecx, [fpu_owner] |
||
8869 | rgimad | 217 | mov esi, [current_slot_idx] |
7168 | clevermous | 218 | cmp ecx, esi |
219 | jne .save |
||
220 | |||
221 | call save_context |
||
222 | jmp .exit |
||
223 | .save: |
||
224 | mov [fpu_owner], esi |
||
225 | |||
9715 | Doczom | 226 | shl ecx, BSF sizeof.APPDATA |
227 | mov eax, [SLOT_BASE + ecx + APPDATA.fpu_state] |
||
7168 | clevermous | 228 | |
229 | call save_context |
||
230 | |||
9715 | Doczom | 231 | shl esi, BSF sizeof.APPDATA |
232 | mov esi, [SLOT_BASE + esi + APPDATA.fpu_state] |
||
7168 | clevermous | 233 | mov ecx, [xsave_area_size] |
234 | add ecx, 3 |
||
235 | shr ecx, 2 |
||
236 | rep movsd |
||
237 | fninit |
||
238 | .exit: |
||
239 | popfd |
||
240 | pop edi |
||
241 | pop esi |
||
242 | pop ecx |
||
243 | ret |
||
244 | |||
2288 | clevermous | 245 | align 4 |
246 | save_context: |
||
9715 | Doczom | 247 | bt [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
7168 | clevermous | 248 | jnc save_fpu_context |
7276 | dunkaist | 249 | push eax edx |
250 | mov ecx, eax |
||
251 | mov eax, [xsave_eax] |
||
252 | mov edx, [xsave_edx] |
||
253 | xsave [ecx] |
||
254 | pop edx eax |
||
7124 | dunkaist | 255 | ret |
7168 | clevermous | 256 | save_fpu_context: |
2288 | clevermous | 257 | bt [cpu_caps], CAPS_SSE |
258 | jnc .no_SSE |
||
259 | fxsave [eax] |
||
260 | ret |
||
261 | .no_SSE: |
||
262 | fnsave [eax] |
||
263 | ret |
||
264 | |||
7168 | clevermous | 265 | |
2288 | clevermous | 266 | align 4 |
267 | fpu_restore: |
||
268 | push ecx |
||
269 | push esi |
||
270 | |||
271 | mov esi, eax |
||
272 | |||
273 | pushfd |
||
274 | cli |
||
275 | |||
276 | mov ecx, [fpu_owner] |
||
8869 | rgimad | 277 | mov eax, [current_slot_idx] |
2288 | clevermous | 278 | cmp ecx, eax |
279 | jne .copy |
||
280 | |||
281 | clts |
||
7168 | clevermous | 282 | bt [cpu_caps], CAPS_SSE |
283 | jnc .no_SSE |
||
284 | |||
285 | fxrstor [esi] |
||
286 | popfd |
||
287 | pop esi |
||
288 | pop ecx |
||
289 | ret |
||
290 | .no_SSE: |
||
291 | fnclex ;fix possible problems |
||
292 | frstor [esi] |
||
293 | popfd |
||
294 | pop esi |
||
295 | pop ecx |
||
296 | ret |
||
297 | .copy: |
||
9715 | Doczom | 298 | shl eax, BSF sizeof.APPDATA |
299 | mov edi, [SLOT_BASE + eax + APPDATA.fpu_state] |
||
7168 | clevermous | 300 | mov ecx, 512/4 |
301 | cld |
||
302 | rep movsd |
||
303 | popfd |
||
304 | pop esi |
||
305 | pop ecx |
||
306 | ret |
||
307 | |||
7276 | dunkaist | 308 | align 4 |
7168 | clevermous | 309 | avx_restore: |
310 | push ecx |
||
311 | push esi |
||
312 | |||
313 | mov esi, eax |
||
314 | |||
315 | pushfd |
||
316 | cli |
||
317 | |||
318 | mov ecx, [fpu_owner] |
||
8869 | rgimad | 319 | mov eax, [current_slot_idx] |
7168 | clevermous | 320 | cmp ecx, eax |
321 | jne .copy |
||
322 | |||
323 | clts |
||
9715 | Doczom | 324 | bt [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
7124 | dunkaist | 325 | jnc .no_xsave |
7276 | dunkaist | 326 | push edx |
327 | mov eax, [xsave_eax] |
||
328 | mov edx, [xsave_edx] |
||
7124 | dunkaist | 329 | xrstor [esi] |
7276 | dunkaist | 330 | pop edx |
7124 | dunkaist | 331 | popfd |
332 | pop esi |
||
333 | pop ecx |
||
334 | ret |
||
335 | .no_xsave: |
||
2288 | clevermous | 336 | bt [cpu_caps], CAPS_SSE |
337 | jnc .no_SSE |
||
338 | |||
339 | fxrstor [esi] |
||
340 | popfd |
||
341 | pop esi |
||
342 | pop ecx |
||
343 | ret |
||
344 | .no_SSE: |
||
345 | fnclex ;fix possible problems |
||
346 | frstor [esi] |
||
347 | popfd |
||
348 | pop esi |
||
349 | pop ecx |
||
350 | ret |
||
351 | .copy: |
||
9715 | Doczom | 352 | shl eax, BSF sizeof.APPDATA |
353 | mov edi, [SLOT_BASE + eax + APPDATA.fpu_state] |
||
7168 | clevermous | 354 | mov ecx, [xsave_area_size] |
355 | add ecx, 3 |
||
356 | shr ecx, 2 |
||
2288 | clevermous | 357 | cld |
358 | rep movsd |
||
359 | popfd |
||
360 | pop esi |
||
361 | pop ecx |
||
362 | ret |
||
363 | |||
364 | align 4 |
||
365 | except_7: ;#NM exception handler |
||
366 | save_ring3_context |
||
367 | clts |
||
368 | mov ax, app_data; |
||
369 | mov ds, ax |
||
370 | mov es, ax |
||
371 | |||
372 | mov ebx, [fpu_owner] |
||
8869 | rgimad | 373 | cmp ebx, [current_slot_idx] |
2288 | clevermous | 374 | je .exit |
375 | |||
9715 | Doczom | 376 | shl ebx, BSF sizeof.APPDATA |
377 | mov eax, [SLOT_BASE + ebx + APPDATA.fpu_state] |
||
378 | bt [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
||
7168 | clevermous | 379 | jnc .no_xsave |
7276 | dunkaist | 380 | mov ecx, eax |
381 | mov eax, [xsave_eax] |
||
382 | mov edx, [xsave_edx] |
||
383 | xsave [ecx] |
||
8869 | rgimad | 384 | mov ebx, [current_slot_idx] |
7168 | clevermous | 385 | mov [fpu_owner], ebx |
9715 | Doczom | 386 | shl ebx, BSF sizeof.APPDATA |
387 | mov ecx, [SLOT_BASE + ebx + APPDATA.fpu_state] |
||
7276 | dunkaist | 388 | xrstor [ecx] |
7168 | clevermous | 389 | .exit: |
390 | restore_ring3_context |
||
391 | iret |
||
392 | .no_xsave: |
||
2288 | clevermous | 393 | bt [cpu_caps], CAPS_SSE |
394 | jnc .no_SSE |
||
395 | |||
396 | fxsave [eax] |
||
8869 | rgimad | 397 | mov ebx, [current_slot_idx] |
2288 | clevermous | 398 | mov [fpu_owner], ebx |
9715 | Doczom | 399 | shl ebx, BSF sizeof.APPDATA |
400 | mov eax, [SLOT_BASE + ebx + APPDATA.fpu_state] |
||
2288 | clevermous | 401 | fxrstor [eax] |
402 | restore_ring3_context |
||
403 | iret |
||
404 | |||
405 | .no_SSE: |
||
406 | fnsave [eax] |
||
8869 | rgimad | 407 | mov ebx, [current_slot_idx] |
2288 | clevermous | 408 | mov [fpu_owner], ebx |
9715 | Doczom | 409 | shl ebx, BSF sizeof.APPDATA |
410 | mov eax, [SLOT_BASE + ebx + APPDATA.fpu_state] |
||
2288 | clevermous | 411 | frstor [eax] |
412 | restore_ring3_context |
||
413 | iret |
||
414 | |||
415 | iglobal |
||
3534 | clevermous | 416 | fpu_owner dd 2 |
2288 | clevermous | 417 | endg |