Subversion Repositories Kolibri OS

Rev

Rev 7168 | Rev 7276 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7168 Rev 7199
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
2
;;                                                              ;;
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
4
;; Distributed under terms of the GNU General Public License    ;;
4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
7
 
8
$Revision: 7168 $
8
$Revision: 7199 $
9
 
9
 
10
 
10
 
11
init_fpu:
11
init_fpu:
12
        clts
12
        clts
13
        fninit
13
        fninit
14
 
14
 
15
        bt      [cpu_caps+(CAPS_XSAVE/32)], CAPS_XSAVE mod 32
15
        bt      [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
16
        jnc     .no_xsave
16
        jmp     .no_xsave       ; not ready to be jnc so far
17
 
17
 
18
        mov     ecx, cr4
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
20
        mov     cr4, ecx
21
 
21
 
22
        mov     eax, 0x0d
22
        mov     eax, 0x0d
23
        xor     ecx, ecx
23
        xor     ecx, ecx
24
        cpuid
24
        cpuid
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
26
        and     ebx, eax
26
        and     ebx, eax
27
        xor     ecx, ecx
27
        xor     ecx, ecx
28
        xgetbv
28
        xgetbv
29
        or      eax, ebx
29
        or      eax, ebx
30
        xor     ecx, ecx
30
        xor     ecx, ecx
31
        xsetbv
31
        xsetbv
32
 
32
 
33
        mov     eax, 0x0d
33
        mov     eax, 0x0d
34
        xor     ecx, ecx
34
        xor     ecx, ecx
35
        cpuid
35
        cpuid
36
        mov     [xsave_area_size], ebx
36
        mov     [xsave_area_size], ebx
37
        cmp     ebx, fpu_data_size
37
        cmp     ebx, fpu_data_size
38
        ja      $
38
        ja      $
39
 
39
 
40
        test    eax, XCR0_AVX512
40
        test    eax, XCR0_AVX512
41
        jz      @f
41
        jz      @f
42
        call    init_avx512
42
        call    init_avx512
43
        xsave   [fpu_data]
43
        xsave   [fpu_data]
44
        ret
44
        ret
45
@@:
45
@@:
46
        test    eax, XCR0_AVX
46
        test    eax, XCR0_AVX
47
        jz      @f
47
        jz      @f
48
        call    init_avx
48
        call    init_avx
49
        xsave   [fpu_data]
49
        xsave   [fpu_data]
50
        ret
50
        ret
51
@@:
51
@@:
52
        test    eax, XCR0_SSE
52
        test    eax, XCR0_SSE
53
        jnz     .sse
53
        jnz     .sse
54
        jmp     .fpu_mmx
54
        jmp     .fpu_mmx
55
.no_xsave:
55
.no_xsave:
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
57
        bt      [cpu_caps], CAPS_SSE
57
        bt      [cpu_caps], CAPS_SSE
58
        jnc     .fpu_mmx
58
        jnc     .fpu_mmx
59
.sse:
59
.sse:
60
        call    init_sse
60
        call    init_sse
61
        fxsave  [fpu_data]
61
        fxsave  [fpu_data]
62
        ret
62
        ret
63
.fpu_mmx:
63
.fpu_mmx:
64
        call    init_fpu_mmx
64
        call    init_fpu_mmx
65
        fnsave  [fpu_data]
65
        fnsave  [fpu_data]
66
        ret
66
        ret
67
 
67
 
68
init_fpu_mmx:
68
init_fpu_mmx:
69
        mov     ecx, cr0
69
        mov     ecx, cr0
70
        and     ecx, not CR0_EM
70
        and     ecx, not CR0_EM
71
        or      ecx, CR0_MP + CR0_NE
71
        or      ecx, CR0_MP + CR0_NE
72
        mov     cr0, ecx
72
        mov     cr0, ecx
73
        ret
73
        ret
74
 
74
 
75
init_sse:
75
init_sse:
76
        mov     ebx, cr4
76
        mov     ebx, cr4
77
        mov     ecx, cr0
77
        mov     ecx, cr0
78
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
78
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
79
        mov     cr4, ebx
79
        mov     cr4, ebx
80
 
80
 
81
        and     ecx, not (CR0_EM + CR0_MP)
81
        and     ecx, not (CR0_EM + CR0_MP)
82
        or      ecx, CR0_NE
82
        or      ecx, CR0_NE
83
        mov     cr0, ecx
83
        mov     cr0, ecx
84
 
84
 
85
        mov     dword [esp-4], MXCSR_INIT
85
        mov     dword [esp-4], MXCSR_INIT
86
        ldmxcsr [esp-4]
86
        ldmxcsr [esp-4]
87
 
87
 
88
        xorps   xmm0, xmm0
88
        xorps   xmm0, xmm0
89
        xorps   xmm1, xmm1
89
        xorps   xmm1, xmm1
90
        xorps   xmm2, xmm2
90
        xorps   xmm2, xmm2
91
        xorps   xmm3, xmm3
91
        xorps   xmm3, xmm3
92
        xorps   xmm4, xmm4
92
        xorps   xmm4, xmm4
93
        xorps   xmm5, xmm5
93
        xorps   xmm5, xmm5
94
        xorps   xmm6, xmm6
94
        xorps   xmm6, xmm6
95
        xorps   xmm7, xmm7
95
        xorps   xmm7, xmm7
96
        ret
96
        ret
97
 
97
 
98
init_avx:
98
init_avx:
99
        mov     ebx, cr4
99
        mov     ebx, cr4
100
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
100
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
101
        mov     cr4, ebx
101
        mov     cr4, ebx
102
 
102
 
103
        mov     ecx, cr0
103
        mov     ecx, cr0
104
        and     ecx, not (CR0_EM + CR0_MP)
104
        and     ecx, not (CR0_EM + CR0_MP)
105
        or      ecx, CR0_NE
105
        or      ecx, CR0_NE
106
        mov     cr0, ecx
106
        mov     cr0, ecx
107
 
107
 
108
        mov     dword [esp-4], MXCSR_INIT
108
        mov     dword [esp-4], MXCSR_INIT
109
        vldmxcsr [esp-4]
109
        vldmxcsr [esp-4]
110
 
110
 
111
        vzeroall
111
        vzeroall
112
        ret
112
        ret
113
 
113
 
114
init_avx512:
114
init_avx512:
115
        mov     ebx, cr4
115
        mov     ebx, cr4
116
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
116
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
117
        mov     cr4, ebx
117
        mov     cr4, ebx
118
 
118
 
119
        mov     ecx, cr0
119
        mov     ecx, cr0
120
        and     ecx, not (CR0_EM + CR0_MP)
120
        and     ecx, not (CR0_EM + CR0_MP)
121
        or      ecx, CR0_NE
121
        or      ecx, CR0_NE
122
        mov     cr0, ecx
122
        mov     cr0, ecx
123
 
123
 
124
        mov     dword [esp-4], MXCSR_INIT
124
        mov     dword [esp-4], MXCSR_INIT
125
        vldmxcsr [esp-4]
125
        vldmxcsr [esp-4]
126
 
126
 
127
        vpxorq  zmm0, zmm0, zmm0
127
        vpxorq  zmm0, zmm0, zmm0
128
        vpxorq  zmm1, zmm1, zmm1
128
        vpxorq  zmm1, zmm1, zmm1
129
        vpxorq  zmm2, zmm2, zmm2
129
        vpxorq  zmm2, zmm2, zmm2
130
        vpxorq  zmm3, zmm3, zmm3
130
        vpxorq  zmm3, zmm3, zmm3
131
        vpxorq  zmm4, zmm4, zmm4
131
        vpxorq  zmm4, zmm4, zmm4
132
        vpxorq  zmm5, zmm5, zmm5
132
        vpxorq  zmm5, zmm5, zmm5
133
        vpxorq  zmm6, zmm6, zmm6
133
        vpxorq  zmm6, zmm6, zmm6
134
        vpxorq  zmm7, zmm7, zmm7
134
        vpxorq  zmm7, zmm7, zmm7
135
 
135
 
136
        ret
136
        ret
137
 
137
 
138
; param
138
; param
139
;  eax= 512 bytes memory area aligned on a 16-byte boundary
139
;  eax= 512 bytes memory area aligned on a 16-byte boundary
140
 
140
 
141
align 4
141
align 4
142
fpu_save:
142
fpu_save:
143
        push    ecx
143
        push    ecx
144
        push    esi
144
        push    esi
145
        push    edi
145
        push    edi
146
 
146
 
147
        pushfd
147
        pushfd
148
        cli
148
        cli
149
 
149
 
150
        clts
150
        clts
151
        mov     edi, eax
151
        mov     edi, eax
152
 
152
 
153
        mov     ecx, [fpu_owner]
153
        mov     ecx, [fpu_owner]
154
        mov     esi, [CURRENT_TASK]
154
        mov     esi, [CURRENT_TASK]
155
        cmp     ecx, esi
155
        cmp     ecx, esi
156
        jne     .save
156
        jne     .save
157
 
157
 
158
        call    save_fpu_context
158
        call    save_fpu_context
159
        jmp     .exit
159
        jmp     .exit
160
.save:
160
.save:
161
        mov     [fpu_owner], esi
161
        mov     [fpu_owner], esi
162
 
162
 
163
        shl     ecx, 8
163
        shl     ecx, 8
164
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
164
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
165
 
165
 
166
        call    save_context
166
        call    save_context
167
 
167
 
168
; first 512 bytes of XSAVE area have the same format as FXSAVE
168
; first 512 bytes of XSAVE area have the same format as FXSAVE
169
        shl     esi, 8
169
        shl     esi, 8
170
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
170
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
171
        mov     ecx, 512/4
171
        mov     ecx, 512/4
172
        cld
172
        cld
173
        rep movsd
173
        rep movsd
174
        fninit
174
        fninit
175
.exit:
175
.exit:
176
        popfd
176
        popfd
177
        pop     edi
177
        pop     edi
178
        pop     esi
178
        pop     esi
179
        pop     ecx
179
        pop     ecx
180
        ret
180
        ret
181
 
181
 
182
avx_save_size:
182
avx_save_size:
183
        mov     eax, [xsave_area_size]
183
        mov     eax, [xsave_area_size]
184
        ret
184
        ret
185
 
185
 
186
; param
186
; param
187
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
187
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
188
 
188
 
189
avx_save:
189
avx_save:
190
        push    ecx
190
        push    ecx
191
        push    esi
191
        push    esi
192
        push    edi
192
        push    edi
193
 
193
 
194
        pushfd
194
        pushfd
195
        cli
195
        cli
196
 
196
 
197
        clts
197
        clts
198
        mov     edi, eax
198
        mov     edi, eax
199
 
199
 
200
        mov     ecx, [fpu_owner]
200
        mov     ecx, [fpu_owner]
201
        mov     esi, [CURRENT_TASK]
201
        mov     esi, [CURRENT_TASK]
202
        cmp     ecx, esi
202
        cmp     ecx, esi
203
        jne     .save
203
        jne     .save
204
 
204
 
205
        call    save_context
205
        call    save_context
206
        jmp     .exit
206
        jmp     .exit
207
.save:
207
.save:
208
        mov     [fpu_owner], esi
208
        mov     [fpu_owner], esi
209
 
209
 
210
        shl     ecx, 8
210
        shl     ecx, 8
211
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
211
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
212
 
212
 
213
        call    save_context
213
        call    save_context
214
 
214
 
215
        shl     esi, 8
215
        shl     esi, 8
216
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
216
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
217
        mov     ecx, [xsave_area_size]
217
        mov     ecx, [xsave_area_size]
218
        add     ecx, 3
218
        add     ecx, 3
219
        shr     ecx, 2
219
        shr     ecx, 2
220
        rep movsd
220
        rep movsd
221
        fninit
221
        fninit
222
.exit:
222
.exit:
223
        popfd
223
        popfd
224
        pop     edi
224
        pop     edi
225
        pop     esi
225
        pop     esi
226
        pop     ecx
226
        pop     ecx
227
        ret
227
        ret
228
 
228
 
229
align 4
229
align 4
230
save_context:
230
save_context:
231
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
231
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
232
        jnc     save_fpu_context
232
        jnc     save_fpu_context
233
        xsave   [eax]
233
        xsave   [eax]
234
        ret
234
        ret
235
save_fpu_context:
235
save_fpu_context:
236
        bt      [cpu_caps], CAPS_SSE
236
        bt      [cpu_caps], CAPS_SSE
237
        jnc     .no_SSE
237
        jnc     .no_SSE
238
        fxsave  [eax]
238
        fxsave  [eax]
239
        ret
239
        ret
240
.no_SSE:
240
.no_SSE:
241
        fnsave  [eax]
241
        fnsave  [eax]
242
        ret
242
        ret
243
 
243
 
244
 
244
 
245
align 4
245
align 4
246
fpu_restore:
246
fpu_restore:
247
        push    ecx
247
        push    ecx
248
        push    esi
248
        push    esi
249
 
249
 
250
        mov     esi, eax
250
        mov     esi, eax
251
 
251
 
252
        pushfd
252
        pushfd
253
        cli
253
        cli
254
 
254
 
255
        mov     ecx, [fpu_owner]
255
        mov     ecx, [fpu_owner]
256
        mov     eax, [CURRENT_TASK]
256
        mov     eax, [CURRENT_TASK]
257
        cmp     ecx, eax
257
        cmp     ecx, eax
258
        jne     .copy
258
        jne     .copy
259
 
259
 
260
        clts
260
        clts
261
        bt      [cpu_caps], CAPS_SSE
261
        bt      [cpu_caps], CAPS_SSE
262
        jnc     .no_SSE
262
        jnc     .no_SSE
263
 
263
 
264
        fxrstor [esi]
264
        fxrstor [esi]
265
        popfd
265
        popfd
266
        pop     esi
266
        pop     esi
267
        pop     ecx
267
        pop     ecx
268
        ret
268
        ret
269
.no_SSE:
269
.no_SSE:
270
        fnclex                  ;fix possible problems
270
        fnclex                  ;fix possible problems
271
        frstor  [esi]
271
        frstor  [esi]
272
        popfd
272
        popfd
273
        pop     esi
273
        pop     esi
274
        pop     ecx
274
        pop     ecx
275
        ret
275
        ret
276
.copy:
276
.copy:
277
        shl     eax, 8
277
        shl     eax, 8
278
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
278
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
279
        mov     ecx, 512/4
279
        mov     ecx, 512/4
280
        cld
280
        cld
281
        rep movsd
281
        rep movsd
282
        popfd
282
        popfd
283
        pop     esi
283
        pop     esi
284
        pop     ecx
284
        pop     ecx
285
        ret
285
        ret
286
 
286
 
287
avx_restore:
287
avx_restore:
288
        push    ecx
288
        push    ecx
289
        push    esi
289
        push    esi
290
 
290
 
291
        mov     esi, eax
291
        mov     esi, eax
292
 
292
 
293
        pushfd
293
        pushfd
294
        cli
294
        cli
295
 
295
 
296
        mov     ecx, [fpu_owner]
296
        mov     ecx, [fpu_owner]
297
        mov     eax, [CURRENT_TASK]
297
        mov     eax, [CURRENT_TASK]
298
        cmp     ecx, eax
298
        cmp     ecx, eax
299
        jne     .copy
299
        jne     .copy
300
 
300
 
301
        clts
301
        clts
302
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
302
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
303
        jnc     .no_xsave
303
        jnc     .no_xsave
304
        xrstor  [esi]
304
        xrstor  [esi]
305
        popfd
305
        popfd
306
        pop     esi
306
        pop     esi
307
        pop     ecx
307
        pop     ecx
308
        ret
308
        ret
309
.no_xsave:
309
.no_xsave:
310
        bt      [cpu_caps], CAPS_SSE
310
        bt      [cpu_caps], CAPS_SSE
311
        jnc     .no_SSE
311
        jnc     .no_SSE
312
 
312
 
313
        fxrstor [esi]
313
        fxrstor [esi]
314
        popfd
314
        popfd
315
        pop     esi
315
        pop     esi
316
        pop     ecx
316
        pop     ecx
317
        ret
317
        ret
318
.no_SSE:
318
.no_SSE:
319
        fnclex                  ;fix possible problems
319
        fnclex                  ;fix possible problems
320
        frstor  [esi]
320
        frstor  [esi]
321
        popfd
321
        popfd
322
        pop     esi
322
        pop     esi
323
        pop     ecx
323
        pop     ecx
324
        ret
324
        ret
325
.copy:
325
.copy:
326
        shl     eax, 8
326
        shl     eax, 8
327
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
327
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
328
        mov     ecx, [xsave_area_size]
328
        mov     ecx, [xsave_area_size]
329
        add     ecx, 3
329
        add     ecx, 3
330
        shr     ecx, 2
330
        shr     ecx, 2
331
        cld
331
        cld
332
        rep movsd
332
        rep movsd
333
        popfd
333
        popfd
334
        pop     esi
334
        pop     esi
335
        pop     ecx
335
        pop     ecx
336
        ret
336
        ret
337
 
337
 
338
align 4
338
align 4
339
except_7:                  ;#NM exception handler
339
except_7:                  ;#NM exception handler
340
        save_ring3_context
340
        save_ring3_context
341
        clts
341
        clts
342
        mov     ax, app_data;
342
        mov     ax, app_data;
343
        mov     ds, ax
343
        mov     ds, ax
344
        mov     es, ax
344
        mov     es, ax
345
 
345
 
346
        mov     ebx, [fpu_owner]
346
        mov     ebx, [fpu_owner]
347
        cmp     ebx, [CURRENT_TASK]
347
        cmp     ebx, [CURRENT_TASK]
348
        je      .exit
348
        je      .exit
349
 
349
 
350
        shl     ebx, 8
350
        shl     ebx, 8
351
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
351
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
352
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
352
        bt      [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
353
        jnc     .no_xsave
353
        jnc     .no_xsave
354
        xsave   [eax]
354
        xsave   [eax]
355
        mov     ebx, [CURRENT_TASK]
355
        mov     ebx, [CURRENT_TASK]
356
        mov     [fpu_owner], ebx
356
        mov     [fpu_owner], ebx
357
        shl     ebx, 8
357
        shl     ebx, 8
358
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
358
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
359
        xrstor  [eax]
359
        xrstor  [eax]
360
.exit:
360
.exit:
361
        restore_ring3_context
361
        restore_ring3_context
362
        iret
362
        iret
363
.no_xsave:
363
.no_xsave:
364
        bt      [cpu_caps], CAPS_SSE
364
        bt      [cpu_caps], CAPS_SSE
365
        jnc     .no_SSE
365
        jnc     .no_SSE
366
 
366
 
367
        fxsave  [eax]
367
        fxsave  [eax]
368
        mov     ebx, [CURRENT_TASK]
368
        mov     ebx, [CURRENT_TASK]
369
        mov     [fpu_owner], ebx
369
        mov     [fpu_owner], ebx
370
        shl     ebx, 8
370
        shl     ebx, 8
371
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
371
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
372
        fxrstor [eax]
372
        fxrstor [eax]
373
        restore_ring3_context
373
        restore_ring3_context
374
        iret
374
        iret
375
 
375
 
376
.no_SSE:
376
.no_SSE:
377
        fnsave  [eax]
377
        fnsave  [eax]
378
        mov     ebx, [CURRENT_TASK]
378
        mov     ebx, [CURRENT_TASK]
379
        mov     [fpu_owner], ebx
379
        mov     [fpu_owner], ebx
380
        shl     ebx, 8
380
        shl     ebx, 8
381
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
381
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
382
        frstor  [eax]
382
        frstor  [eax]
383
        restore_ring3_context
383
        restore_ring3_context
384
        iret
384
        iret
385
 
385
 
386
iglobal
386
iglobal
387
  fpu_owner dd 2
387
  fpu_owner dd 2
388
endg
388
endg