Rev 7124 | Rev 7165 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2288 | clevermous | 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ;; ;; |
||
7124 | dunkaist | 3 | ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; |
2288 | clevermous | 4 | ;; Distributed under terms of the GNU General Public License ;; |
5 | ;; ;; |
||
6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
||
7 | |||
8 | $Revision: 7164 $ |
||
9 | |||
10 | |||
11 | init_fpu: |
||
12 | clts |
||
13 | fninit |
||
14 | |||
7124 | dunkaist | 15 | bt [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8 |
16 | jnc .no_xsave |
||
17 | |||
18 | mov ecx, cr4 |
||
19 | or ecx, CR4_OSXSAVE |
||
20 | mov cr4, ecx |
||
21 | |||
22 | mov eax, 0x0d |
||
23 | xor ecx, ecx |
||
24 | cpuid |
||
25 | mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 |
||
26 | and ebx, eax |
||
27 | xor ecx, ecx |
||
28 | xgetbv |
||
29 | or eax, ebx |
||
30 | xor ecx, ecx |
||
31 | xsetbv |
||
32 | |||
33 | mov eax, 0x0d |
||
34 | xor ecx, ecx |
||
35 | cpuid |
||
36 | mov [xsave_area_size], ebx |
||
37 | |||
38 | test eax, XCR0_AVX512 |
||
39 | jz @f |
||
40 | call init_avx512 |
||
41 | ret |
||
42 | @@: |
||
43 | test eax, XCR0_AVX |
||
44 | jz @f |
||
45 | call init_avx |
||
46 | ret |
||
47 | @@: |
||
48 | test eax, XCR0_SSE |
||
7164 | clevermous | 49 | jnz .sse |
50 | jmp .fpu_mmx |
||
7124 | dunkaist | 51 | .no_xsave: |
52 | mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE |
||
2288 | clevermous | 53 | bt [cpu_caps], CAPS_SSE |
7164 | clevermous | 54 | jnc .fpu_mmx |
55 | .sse: |
||
7124 | dunkaist | 56 | call init_sse |
57 | fxsave [fpu_data] |
||
58 | ret |
||
7164 | clevermous | 59 | .fpu_mmx: |
7124 | dunkaist | 60 | call init_fpu_mmx |
61 | fnsave [fpu_data] |
||
62 | ret |
||
2288 | clevermous | 63 | |
7124 | dunkaist | 64 | init_fpu_mmx: |
65 | mov ecx, cr0 |
||
66 | and ecx, not CR0_EM |
||
67 | or ecx, CR0_MP + CR0_NE |
||
68 | mov cr0, ecx |
||
69 | ret |
||
70 | |||
71 | init_sse: |
||
2288 | clevermous | 72 | mov ebx, cr4 |
73 | mov ecx, cr0 |
||
74 | or ebx, CR4_OSFXSR+CR4_OSXMMEXPT |
||
75 | mov cr4, ebx |
||
76 | |||
7124 | dunkaist | 77 | and ecx, not (CR0_EM + CR0_MP) |
2288 | clevermous | 78 | or ecx, CR0_NE |
79 | mov cr0, ecx |
||
80 | |||
7124 | dunkaist | 81 | mov dword [esp-4], MXCSR_INIT |
2288 | clevermous | 82 | ldmxcsr [esp-4] |
83 | |||
84 | xorps xmm0, xmm0 |
||
85 | xorps xmm1, xmm1 |
||
86 | xorps xmm2, xmm2 |
||
87 | xorps xmm3, xmm3 |
||
88 | xorps xmm4, xmm4 |
||
89 | xorps xmm5, xmm5 |
||
90 | xorps xmm6, xmm6 |
||
91 | xorps xmm7, xmm7 |
||
92 | ret |
||
7124 | dunkaist | 93 | |
94 | init_avx: |
||
95 | mov ebx, cr4 |
||
96 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
97 | mov cr4, ebx |
||
98 | |||
2288 | clevermous | 99 | mov ecx, cr0 |
7124 | dunkaist | 100 | and ecx, not (CR0_EM + CR0_MP) |
101 | or ecx, CR0_NE |
||
2288 | clevermous | 102 | mov cr0, ecx |
7124 | dunkaist | 103 | |
104 | mov dword [esp-4], MXCSR_INIT |
||
105 | vldmxcsr [esp-4] |
||
106 | |||
107 | vzeroall |
||
2288 | clevermous | 108 | ret |
109 | |||
7124 | dunkaist | 110 | init_avx512: |
111 | mov ebx, cr4 |
||
112 | or ebx, CR4_OSFXSR + CR4_OSXMMEXPT |
||
113 | mov cr4, ebx |
||
114 | |||
115 | mov ecx, cr0 |
||
116 | and ecx, not (CR0_EM + CR0_MP) |
||
117 | or ecx, CR0_NE |
||
118 | mov cr0, ecx |
||
119 | |||
120 | mov dword [esp-4], MXCSR_INIT |
||
121 | vldmxcsr [esp-4] |
||
122 | |||
123 | vpxorq zmm0, zmm0, zmm0 |
||
124 | vpxorq zmm1, zmm1, zmm1 |
||
125 | vpxorq zmm2, zmm2, zmm2 |
||
126 | vpxorq zmm3, zmm3, zmm3 |
||
127 | vpxorq zmm4, zmm4, zmm4 |
||
128 | vpxorq zmm5, zmm5, zmm5 |
||
129 | vpxorq zmm6, zmm6, zmm6 |
||
130 | vpxorq zmm7, zmm7, zmm7 |
||
131 | |||
132 | ret |
||
133 | |||
2288 | clevermous | 134 | ; param |
135 | ; eax= 512 bytes memory area |
||
136 | |||
137 | align 4 |
||
138 | fpu_save: |
||
139 | push ecx |
||
140 | push esi |
||
141 | push edi |
||
142 | |||
143 | pushfd |
||
144 | cli |
||
145 | |||
146 | clts |
||
147 | mov edi, eax |
||
148 | |||
149 | mov ecx, [fpu_owner] |
||
150 | mov esi, [CURRENT_TASK] |
||
151 | cmp ecx, esi |
||
152 | jne .save |
||
153 | |||
154 | call save_context |
||
155 | jmp .exit |
||
156 | .save: |
||
157 | mov [fpu_owner], esi |
||
158 | |||
159 | shl ecx, 8 |
||
160 | mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state] |
||
161 | |||
162 | call save_context |
||
163 | |||
164 | shl esi, 8 |
||
165 | mov esi, [esi+SLOT_BASE+APPDATA.fpu_state] |
||
166 | mov ecx, 512/4 |
||
167 | cld |
||
168 | rep movsd |
||
169 | fninit |
||
170 | .exit: |
||
171 | popfd |
||
172 | pop edi |
||
173 | pop esi |
||
174 | pop ecx |
||
175 | ret |
||
176 | |||
177 | align 4 |
||
178 | save_context: |
||
7124 | dunkaist | 179 | bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8 |
180 | jnc .no_xsave |
||
181 | xsave [eax] |
||
182 | ret |
||
183 | .no_xsave: |
||
2288 | clevermous | 184 | bt [cpu_caps], CAPS_SSE |
185 | jnc .no_SSE |
||
186 | |||
187 | fxsave [eax] |
||
188 | ret |
||
189 | .no_SSE: |
||
190 | fnsave [eax] |
||
191 | ret |
||
192 | |||
193 | align 4 |
||
194 | fpu_restore: |
||
195 | push ecx |
||
196 | push esi |
||
197 | |||
198 | mov esi, eax |
||
199 | |||
200 | pushfd |
||
201 | cli |
||
202 | |||
203 | mov ecx, [fpu_owner] |
||
204 | mov eax, [CURRENT_TASK] |
||
205 | cmp ecx, eax |
||
206 | jne .copy |
||
207 | |||
208 | clts |
||
7124 | dunkaist | 209 | bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8 |
210 | jnc .no_xsave |
||
211 | xrstor [esi] |
||
212 | popfd |
||
213 | pop esi |
||
214 | pop ecx |
||
215 | ret |
||
216 | .no_xsave: |
||
2288 | clevermous | 217 | bt [cpu_caps], CAPS_SSE |
218 | jnc .no_SSE |
||
219 | |||
220 | fxrstor [esi] |
||
221 | popfd |
||
222 | pop esi |
||
223 | pop ecx |
||
224 | ret |
||
225 | .no_SSE: |
||
226 | fnclex ;fix possible problems |
||
227 | frstor [esi] |
||
228 | popfd |
||
229 | pop esi |
||
230 | pop ecx |
||
231 | ret |
||
232 | .copy: |
||
233 | shl eax, 8 |
||
234 | mov edi, [eax+SLOT_BASE+APPDATA.fpu_state] |
||
235 | mov ecx, 512/4 |
||
236 | cld |
||
237 | rep movsd |
||
238 | popfd |
||
239 | pop esi |
||
240 | pop ecx |
||
241 | ret |
||
242 | |||
243 | align 4 |
||
244 | except_7: ;#NM exception handler |
||
245 | save_ring3_context |
||
246 | clts |
||
247 | mov ax, app_data; |
||
248 | mov ds, ax |
||
249 | mov es, ax |
||
250 | |||
251 | mov ebx, [fpu_owner] |
||
252 | cmp ebx, [CURRENT_TASK] |
||
253 | je .exit |
||
254 | |||
255 | shl ebx, 8 |
||
256 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
257 | bt [cpu_caps], CAPS_SSE |
||
258 | jnc .no_SSE |
||
259 | |||
260 | fxsave [eax] |
||
261 | mov ebx, [CURRENT_TASK] |
||
262 | mov [fpu_owner], ebx |
||
263 | shl ebx, 8 |
||
264 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
265 | fxrstor [eax] |
||
266 | .exit: |
||
267 | restore_ring3_context |
||
268 | iret |
||
269 | |||
270 | .no_SSE: |
||
271 | fnsave [eax] |
||
272 | mov ebx, [CURRENT_TASK] |
||
273 | mov [fpu_owner], ebx |
||
274 | shl ebx, 8 |
||
275 | mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state] |
||
276 | frstor [eax] |
||
277 | restore_ring3_context |
||
278 | iret |
||
279 | |||
280 | iglobal |
||
3534 | clevermous | 281 | fpu_owner dd 2 |
2288 | clevermous | 282 | endg |