Rev 8869 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8869 | Rev 9715 | ||
---|---|---|---|
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
2 | ;; ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
3 | ;; Copyright (C) KolibriOS team 2004-2022. All rights reserved. ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
5 | ;; ;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
7 | 7 | ||
8 | $Revision: 8869 $ |
8 | $Revision: 9715 $ |
9 | 9 | ||
10 | 10 | ||
11 | init_fpu: |
11 | init_fpu: |
12 | clts |
12 | clts |
13 | fninit |
13 | fninit |
14 | 14 | ||
15 | bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
15 | bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32 |
16 | jnc .no_xsave |
16 | jnc .no_xsave |
17 | 17 | ||
18 | mov ecx, cr4 |
18 | mov ecx, cr4 |
19 | or ecx, CR4_OSXSAVE |
19 | or ecx, CR4_OSXSAVE |
20 | mov cr4, ecx |
20 | mov cr4, ecx |
21 | ; don't call cpuid again |
21 | ; don't call cpuid again |
22 | bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
22 | bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
23 | 23 | ||
24 | ; zero xsave header |
24 | ; zero xsave header |
25 | mov ecx, 64/4 |
25 | mov ecx, 64/4 |
26 | xor eax, eax |
26 | xor eax, eax |
27 | mov edi, fpu_data + 512 ; skip legacy region |
27 | mov edi, fpu_data + 512 ; skip legacy region |
28 | rep stosd |
28 | rep stosd |
29 | 29 | ||
30 | mov eax, 0x0d ; extended state enumeration main leaf |
30 | mov eax, 0x0d ; extended state enumeration main leaf |
31 | xor ecx, ecx |
31 | xor ecx, ecx |
32 | cpuid |
32 | cpuid |
33 | and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
33 | and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
34 | xor edx, edx |
34 | xor edx, edx |
35 | mov [xsave_eax], eax |
35 | mov [xsave_eax], eax |
36 | mov [xsave_edx], edx |
36 | mov [xsave_edx], edx |
37 | xor ecx, ecx |
37 | xor ecx, ecx |
38 | xsetbv |
38 | xsetbv |
39 | 39 | ||
40 | mov eax, 0x0d |
40 | mov eax, 0x0d |
41 | xor ecx, ecx |
41 | xor ecx, ecx |
42 | cpuid |
42 | cpuid |
43 | add ebx, 63 |
43 | add ebx, 63 |
44 | and ebx, NOT 63 |
44 | and ebx, NOT 63 |
45 | mov [xsave_area_size], ebx |
45 | mov [xsave_area_size], ebx |
46 | cmp ebx, fpu_data_size |
46 | cmp ebx, fpu_data_size |
47 | ja $ |
47 | ja $ |
48 | 48 | ||
49 | test eax, XCR0_AVX512 |
49 | test eax, XCR0_AVX512 |
50 | jz @f |
50 | jz @f |
51 | call init_avx512 |
51 | call init_avx512 |
52 | mov eax, [xsave_eax] |
52 | mov eax, [xsave_eax] |
53 | mov edx, [xsave_edx] |
53 | mov edx, [xsave_edx] |
54 | xsave [fpu_data] |
54 | xsave [fpu_data] |
55 | ret |
55 | ret |
56 | @@: |
56 | @@: |
57 | test eax, XCR0_AVX |
57 | test eax, XCR0_AVX |
58 | jz @f |
58 | jz @f |
59 | call init_avx |
59 | call init_avx |
60 | mov eax, [xsave_eax] |
60 | mov eax, [xsave_eax] |
61 | mov edx, [xsave_edx] |
61 | mov edx, [xsave_edx] |
62 | xsave [fpu_data] |
62 | xsave [fpu_data] |
63 | ret |
63 | ret |
64 | @@: |
64 | @@: |
65 | test eax, XCR0_SSE |
65 | test eax, XCR0_SSE |
66 | jz $ |
66 | jz $ |
67 | call init_sse |
67 | call init_sse |
68 | mov eax, [xsave_eax] |
68 | mov eax, [xsave_eax] |
69 | mov edx, [xsave_edx] |
69 | mov edx, [xsave_edx] |
70 | xsave [fpu_data] |
70 | xsave [fpu_data] |
71 | ret |
71 | ret |
72 | .no_xsave: |
72 | .no_xsave: |
73 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
73 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
74 | bt [cpu_caps], CAPS_SSE |
74 | bt [cpu_caps], CAPS_SSE |
75 | jnc .fpu_mmx |
75 | jnc .fpu_mmx |
76 | .sse: |
76 | .sse: |
77 | call init_sse |
77 | call init_sse |
78 | fxsave [fpu_data] |
78 | fxsave [fpu_data] |
79 | ret |
79 | ret |
80 | .fpu_mmx: |
80 | .fpu_mmx: |
81 | call init_fpu_mmx |
81 | call init_fpu_mmx |
82 | fnsave [fpu_data] |
82 | fnsave [fpu_data] |
83 | ret |
83 | ret |
84 | 84 | ||
85 | init_fpu_mmx: |
85 | init_fpu_mmx: |
86 | mov ecx, cr0 |
86 | mov ecx, cr0 |
87 | and ecx, not CR0_EM |
87 | and ecx, not CR0_EM |
88 | or ecx, CR0_MP + CR0_NE |
88 | or ecx, CR0_MP + CR0_NE |
89 | mov cr0, ecx |
89 | mov cr0, ecx |
90 | ret |
90 | ret |
91 | 91 | ||
92 | init_sse: |
92 | init_sse: |
93 | mov ebx, cr4 |
93 | mov ebx, cr4 |
94 | mov ecx, cr0 |
94 | mov ecx, cr0 |
95 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
95 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
96 | mov cr4, ebx |
96 | mov cr4, ebx |
97 | 97 | ||
98 | and ecx, not (CR0_EM + CR0_MP) |
98 | and ecx, not (CR0_EM + CR0_MP) |
99 | or ecx, CR0_NE |
99 | or ecx, CR0_NE |
100 | mov cr0, ecx |
100 | mov cr0, ecx |
101 | 101 | ||
102 | mov dword [esp-4], MXCSR_INIT |
102 | mov dword [esp-4], MXCSR_INIT |
103 | ldmxcsr [esp-4] |
103 | ldmxcsr [esp-4] |
104 | 104 | ||
105 | xorps xmm0, xmm0 |
105 | xorps xmm0, xmm0 |
106 | xorps xmm1, xmm1 |
106 | xorps xmm1, xmm1 |
107 | xorps xmm2, xmm2 |
107 | xorps xmm2, xmm2 |
108 | xorps xmm3, xmm3 |
108 | xorps xmm3, xmm3 |
109 | xorps xmm4, xmm4 |
109 | xorps xmm4, xmm4 |
110 | xorps xmm5, xmm5 |
110 | xorps xmm5, xmm5 |
111 | xorps xmm6, xmm6 |
111 | xorps xmm6, xmm6 |
112 | xorps xmm7, xmm7 |
112 | xorps xmm7, xmm7 |
113 | ret |
113 | ret |
114 | 114 | ||
115 | init_avx: |
115 | init_avx: |
116 | mov ebx, cr4 |
116 | mov ebx, cr4 |
117 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
117 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
118 | mov cr4, ebx |
118 | mov cr4, ebx |
119 | 119 | ||
120 | mov ecx, cr0 |
120 | mov ecx, cr0 |
121 | and ecx, not (CR0_EM + CR0_MP) |
121 | and ecx, not (CR0_EM + CR0_MP) |
122 | or ecx, CR0_NE |
122 | or ecx, CR0_NE |
123 | mov cr0, ecx |
123 | mov cr0, ecx |
124 | 124 | ||
125 | mov dword [esp-4], MXCSR_INIT |
125 | mov dword [esp-4], MXCSR_INIT |
126 | vldmxcsr [esp-4] |
126 | vldmxcsr [esp-4] |
127 | 127 | ||
128 | vzeroall |
128 | vzeroall |
129 | ret |
129 | ret |
130 | 130 | ||
131 | init_avx512: |
131 | init_avx512: |
132 | mov ebx, cr4 |
132 | mov ebx, cr4 |
133 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
133 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
134 | mov cr4, ebx |
134 | mov cr4, ebx |
135 | 135 | ||
136 | mov ecx, cr0 |
136 | mov ecx, cr0 |
137 | and ecx, not (CR0_EM + CR0_MP) |
137 | and ecx, not (CR0_EM + CR0_MP) |
138 | or ecx, CR0_NE |
138 | or ecx, CR0_NE |
139 | mov cr0, ecx |
139 | mov cr0, ecx |
140 | 140 | ||
141 | mov dword [esp-4], MXCSR_INIT |
141 | mov dword [esp-4], MXCSR_INIT |
142 | vldmxcsr [esp-4] |
142 | vldmxcsr [esp-4] |
143 | 143 | ||
144 | vpxorq zmm0, zmm0, zmm0 |
144 | vpxorq zmm0, zmm0, zmm0 |
145 | vpxorq zmm1, zmm1, zmm1 |
145 | vpxorq zmm1, zmm1, zmm1 |
146 | vpxorq zmm2, zmm2, zmm2 |
146 | vpxorq zmm2, zmm2, zmm2 |
147 | vpxorq zmm3, zmm3, zmm3 |
147 | vpxorq zmm3, zmm3, zmm3 |
148 | vpxorq zmm4, zmm4, zmm4 |
148 | vpxorq zmm4, zmm4, zmm4 |
149 | vpxorq zmm5, zmm5, zmm5 |
149 | vpxorq zmm5, zmm5, zmm5 |
150 | vpxorq zmm6, zmm6, zmm6 |
150 | vpxorq zmm6, zmm6, zmm6 |
151 | vpxorq zmm7, zmm7, zmm7 |
151 | vpxorq zmm7, zmm7, zmm7 |
152 | 152 | ||
153 | ret |
153 | ret |
154 | 154 | ||
155 | ; param |
155 | ; param |
156 | ; eax= 512 bytes memory area aligned on a 16-byte boundary |
156 | ; eax= 512 bytes memory area aligned on a 16-byte boundary |
157 | 157 | ||
158 | align 4 |
158 | align 4 |
159 | fpu_save: |
159 | fpu_save: |
160 | push ecx |
160 | push ecx |
161 | push esi |
161 | push esi |
162 | push edi |
162 | push edi |
163 | 163 | ||
164 | pushfd |
164 | pushfd |
165 | cli |
165 | cli |
166 | 166 | ||
167 | clts |
167 | clts |
168 | mov edi, eax |
168 | mov edi, eax |
169 | 169 | ||
170 | mov ecx, [fpu_owner] |
170 | mov ecx, [fpu_owner] |
171 | mov esi, [current_slot_idx] |
171 | mov esi, [current_slot_idx] |
172 | cmp ecx, esi |
172 | cmp ecx, esi |
173 | jne .save |
173 | jne .save |
174 | 174 | ||
175 | call save_fpu_context |
175 | call save_fpu_context |
176 | jmp .exit |
176 | jmp .exit |
177 | .save: |
177 | .save: |
178 | mov [fpu_owner], esi |
178 | mov [fpu_owner], esi |
179 | 179 | ||
180 | shl ecx, 8 |
180 | shl ecx, BSF sizeof.APPDATA |
181 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
181 | mov eax, [SLOT_BASE + ecx + APPDATA.fpu_state] |
182 | 182 | ||
183 | call save_context |
183 | call save_context |
184 | 184 | ||
185 | ; first 512 bytes of XSAVE area have the same format as FXSAVE |
185 | ; first 512 bytes of XSAVE area have the same format as FXSAVE |
186 | shl esi, 8 |
186 | shl esi, BSF sizeof.APPDATA |
187 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
187 | mov esi, [SLOT_BASE + esi + APPDATA.fpu_state] |
188 | mov ecx, 512/4 |
188 | mov ecx, 512/4 |
189 | cld |
189 | cld |
190 | rep movsd |
190 | rep movsd |
191 | fninit |
191 | fninit |
192 | .exit: |
192 | .exit: |
193 | popfd |
193 | popfd |
194 | pop edi |
194 | pop edi |
195 | pop esi |
195 | pop esi |
196 | pop ecx |
196 | pop ecx |
197 | ret |
197 | ret |
198 | 198 | ||
199 | avx_save_size: |
199 | avx_save_size: |
200 | mov eax, [xsave_area_size] |
200 | mov eax, [xsave_area_size] |
201 | ret |
201 | ret |
202 | 202 | ||
203 | ; param |
203 | ; param |
204 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
204 | ; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary |
205 | 205 | ||
206 | align 4 |
206 | align 4 |
207 | avx_save: |
207 | avx_save: |
208 | push ecx |
208 | push ecx |
209 | push esi |
209 | push esi |
210 | push edi |
210 | push edi |
211 | 211 | ||
212 | pushfd |
212 | pushfd |
213 | cli |
213 | cli |
214 | 214 | ||
215 | clts |
215 | clts |
216 | mov edi, eax |
216 | mov edi, eax |
217 | 217 | ||
218 | mov ecx, [fpu_owner] |
218 | mov ecx, [fpu_owner] |
219 | mov esi, [current_slot_idx] |
219 | mov esi, [current_slot_idx] |
220 | cmp ecx, esi |
220 | cmp ecx, esi |
221 | jne .save |
221 | jne .save |
222 | 222 | ||
223 | call save_context |
223 | call save_context |
224 | jmp .exit |
224 | jmp .exit |
225 | .save: |
225 | .save: |
226 | mov [fpu_owner], esi |
226 | mov [fpu_owner], esi |
227 | 227 | ||
228 | shl ecx, 8 |
228 | shl ecx, BSF sizeof.APPDATA |
229 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
229 | mov eax, [SLOT_BASE + ecx + APPDATA.fpu_state] |
230 | 230 | ||
231 | call save_context |
231 | call save_context |
232 | 232 | ||
233 | shl esi, 8 |
233 | shl esi, BSF sizeof.APPDATA |
234 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
234 | mov esi, [SLOT_BASE + esi + APPDATA.fpu_state] |
235 | mov ecx, [xsave_area_size] |
235 | mov ecx, [xsave_area_size] |
236 | add ecx, 3 |
236 | add ecx, 3 |
237 | shr ecx, 2 |
237 | shr ecx, 2 |
238 | rep movsd |
238 | rep movsd |
239 | fninit |
239 | fninit |
240 | .exit: |
240 | .exit: |
241 | popfd |
241 | popfd |
242 | pop edi |
242 | pop edi |
243 | pop esi |
243 | pop esi |
244 | pop ecx |
244 | pop ecx |
245 | ret |
245 | ret |
246 | 246 | ||
247 | align 4 |
247 | align 4 |
248 | save_context: |
248 | save_context: |
249 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
249 | bt [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
250 | jnc save_fpu_context |
250 | jnc save_fpu_context |
251 | push eax edx |
251 | push eax edx |
252 | mov ecx, eax |
252 | mov ecx, eax |
253 | mov eax, [xsave_eax] |
253 | mov eax, [xsave_eax] |
254 | mov edx, [xsave_edx] |
254 | mov edx, [xsave_edx] |
255 | xsave [ecx] |
255 | xsave [ecx] |
256 | pop edx eax |
256 | pop edx eax |
257 | ret |
257 | ret |
258 | save_fpu_context: |
258 | save_fpu_context: |
259 | bt [cpu_caps], CAPS_SSE |
259 | bt [cpu_caps], CAPS_SSE |
260 | jnc .no_SSE |
260 | jnc .no_SSE |
261 | fxsave [eax] |
261 | fxsave [eax] |
262 | ret |
262 | ret |
263 | .no_SSE: |
263 | .no_SSE: |
264 | fnsave [eax] |
264 | fnsave [eax] |
265 | ret |
265 | ret |
266 | 266 | ||
267 | 267 | ||
268 | align 4 |
268 | align 4 |
269 | fpu_restore: |
269 | fpu_restore: |
270 | push ecx |
270 | push ecx |
271 | push esi |
271 | push esi |
272 | 272 | ||
273 | mov esi, eax |
273 | mov esi, eax |
274 | 274 | ||
275 | pushfd |
275 | pushfd |
276 | cli |
276 | cli |
277 | 277 | ||
278 | mov ecx, [fpu_owner] |
278 | mov ecx, [fpu_owner] |
279 | mov eax, [current_slot_idx] |
279 | mov eax, [current_slot_idx] |
280 | cmp ecx, eax |
280 | cmp ecx, eax |
281 | jne .copy |
281 | jne .copy |
282 | 282 | ||
283 | clts |
283 | clts |
284 | bt [cpu_caps], CAPS_SSE |
284 | bt [cpu_caps], CAPS_SSE |
285 | jnc .no_SSE |
285 | jnc .no_SSE |
286 | 286 | ||
287 | fxrstor [esi] |
287 | fxrstor [esi] |
288 | popfd |
288 | popfd |
289 | pop esi |
289 | pop esi |
290 | pop ecx |
290 | pop ecx |
291 | ret |
291 | ret |
292 | .no_SSE: |
292 | .no_SSE: |
293 | fnclex ;fix possible problems |
293 | fnclex ;fix possible problems |
294 | frstor [esi] |
294 | frstor [esi] |
295 | popfd |
295 | popfd |
296 | pop esi |
296 | pop esi |
297 | pop ecx |
297 | pop ecx |
298 | ret |
298 | ret |
299 | .copy: |
299 | .copy: |
300 | shl eax, 8 |
300 | shl eax, BSF sizeof.APPDATA |
301 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
301 | mov edi, [SLOT_BASE + eax + APPDATA.fpu_state] |
302 | mov ecx, 512/4 |
302 | mov ecx, 512/4 |
303 | cld |
303 | cld |
304 | rep movsd |
304 | rep movsd |
305 | popfd |
305 | popfd |
306 | pop esi |
306 | pop esi |
307 | pop ecx |
307 | pop ecx |
308 | ret |
308 | ret |
309 | 309 | ||
310 | align 4 |
310 | align 4 |
311 | avx_restore: |
311 | avx_restore: |
312 | push ecx |
312 | push ecx |
313 | push esi |
313 | push esi |
314 | 314 | ||
315 | mov esi, eax |
315 | mov esi, eax |
316 | 316 | ||
317 | pushfd |
317 | pushfd |
318 | cli |
318 | cli |
319 | 319 | ||
320 | mov ecx, [fpu_owner] |
320 | mov ecx, [fpu_owner] |
321 | mov eax, [current_slot_idx] |
321 | mov eax, [current_slot_idx] |
322 | cmp ecx, eax |
322 | cmp ecx, eax |
323 | jne .copy |
323 | jne .copy |
324 | 324 | ||
325 | clts |
325 | clts |
326 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
326 | bt [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
327 | jnc .no_xsave |
327 | jnc .no_xsave |
328 | push edx |
328 | push edx |
329 | mov eax, [xsave_eax] |
329 | mov eax, [xsave_eax] |
330 | mov edx, [xsave_edx] |
330 | mov edx, [xsave_edx] |
331 | xrstor [esi] |
331 | xrstor [esi] |
332 | pop edx |
332 | pop edx |
333 | popfd |
333 | popfd |
334 | pop esi |
334 | pop esi |
335 | pop ecx |
335 | pop ecx |
336 | ret |
336 | ret |
337 | .no_xsave: |
337 | .no_xsave: |
338 | bt [cpu_caps], CAPS_SSE |
338 | bt [cpu_caps], CAPS_SSE |
339 | jnc .no_SSE |
339 | jnc .no_SSE |
340 | 340 | ||
341 | fxrstor [esi] |
341 | fxrstor [esi] |
342 | popfd |
342 | popfd |
343 | pop esi |
343 | pop esi |
344 | pop ecx |
344 | pop ecx |
345 | ret |
345 | ret |
346 | .no_SSE: |
346 | .no_SSE: |
347 | fnclex ;fix possible problems |
347 | fnclex ;fix possible problems |
348 | frstor [esi] |
348 | frstor [esi] |
349 | popfd |
349 | popfd |
350 | pop esi |
350 | pop esi |
351 | pop ecx |
351 | pop ecx |
352 | ret |
352 | ret |
353 | .copy: |
353 | .copy: |
354 | shl eax, 8 |
354 | shl eax, BSF sizeof.APPDATA |
355 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
355 | mov edi, [SLOT_BASE + eax + APPDATA.fpu_state] |
356 | mov ecx, [xsave_area_size] |
356 | mov ecx, [xsave_area_size] |
357 | add ecx, 3 |
357 | add ecx, 3 |
358 | shr ecx, 2 |
358 | shr ecx, 2 |
359 | cld |
359 | cld |
360 | rep movsd |
360 | rep movsd |
361 | popfd |
361 | popfd |
362 | pop esi |
362 | pop esi |
363 | pop ecx |
363 | pop ecx |
364 | ret |
364 | ret |
365 | 365 | ||
366 | align 4 |
366 | align 4 |
367 | except_7: ;#NM exception handler |
367 | except_7: ;#NM exception handler |
368 | save_ring3_context |
368 | save_ring3_context |
369 | clts |
369 | clts |
370 | mov ax, app_data; |
370 | mov ax, app_data; |
371 | mov ds, ax |
371 | mov ds, ax |
372 | mov es, ax |
372 | mov es, ax |
373 | 373 | ||
374 | mov ebx, [fpu_owner] |
374 | mov ebx, [fpu_owner] |
375 | cmp ebx, [current_slot_idx] |
375 | cmp ebx, [current_slot_idx] |
376 | je .exit |
376 | je .exit |
377 | 377 | ||
378 | shl ebx, 8 |
378 | shl ebx, BSF sizeof.APPDATA |
379 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
379 | mov eax, [SLOT_BASE + ebx + APPDATA.fpu_state] |
380 | bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
380 | bt [cpu_caps + (CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32 |
381 | jnc .no_xsave |
381 | jnc .no_xsave |
382 | mov ecx, eax |
382 | mov ecx, eax |
383 | mov eax, [xsave_eax] |
383 | mov eax, [xsave_eax] |
384 | mov edx, [xsave_edx] |
384 | mov edx, [xsave_edx] |
385 | xsave [ecx] |
385 | xsave [ecx] |
386 | mov ebx, [current_slot_idx] |
386 | mov ebx, [current_slot_idx] |
387 | mov [fpu_owner], ebx |
387 | mov [fpu_owner], ebx |
388 | shl ebx, 8 |
388 | shl ebx, BSF sizeof.APPDATA |
389 | mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state] |
389 | mov ecx, [SLOT_BASE + ebx + APPDATA.fpu_state] |
390 | xrstor [ecx] |
390 | xrstor [ecx] |
391 | .exit: |
391 | .exit: |
392 | restore_ring3_context |
392 | restore_ring3_context |
393 | iret |
393 | iret |
394 | .no_xsave: |
394 | .no_xsave: |
395 | bt [cpu_caps], CAPS_SSE |
395 | bt [cpu_caps], CAPS_SSE |
396 | jnc .no_SSE |
396 | jnc .no_SSE |
397 | 397 | ||
398 | fxsave [eax] |
398 | fxsave [eax] |
399 | mov ebx, [current_slot_idx] |
399 | mov ebx, [current_slot_idx] |
400 | mov [fpu_owner], ebx |
400 | mov [fpu_owner], ebx |
401 | shl ebx, 8 |
401 | shl ebx, BSF sizeof.APPDATA |
402 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
402 | mov eax, [SLOT_BASE + ebx + APPDATA.fpu_state] |
403 | fxrstor [eax] |
403 | fxrstor [eax] |
404 | restore_ring3_context |
404 | restore_ring3_context |
405 | iret |
405 | iret |
406 | 406 | ||
407 | .no_SSE: |
407 | .no_SSE: |
408 | fnsave [eax] |
408 | fnsave [eax] |
409 | mov ebx, [current_slot_idx] |
409 | mov ebx, [current_slot_idx] |
410 | mov [fpu_owner], ebx |
410 | mov [fpu_owner], ebx |
411 | shl ebx, 8 |
411 | shl ebx, BSF sizeof.APPDATA |
412 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
412 | mov eax, [SLOT_BASE + ebx + APPDATA.fpu_state] |
413 | frstor [eax] |
413 | frstor [eax] |
414 | restore_ring3_context |
414 | restore_ring3_context |
415 | iret |
415 | iret |
416 | 416 | ||
417 | iglobal |
417 | iglobal |
418 | fpu_owner dd 2 |
418 | fpu_owner dd 2 |
419 | endg |
419 | endg |