Subversion Repositories Kolibri OS

Rev

Rev 7165 | Rev 7199 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7165 Rev 7168
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
2
;;                                                              ;;
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
4
;; Distributed under terms of the GNU General Public License    ;;
4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
7
 
8
$Revision: 7165 $
8
$Revision: 7168 $
9
 
9
 
10
 
10
 
11
init_fpu:
11
init_fpu:
12
        clts
12
        clts
13
        fninit
13
        fninit
14
 
14
 
15
        bt      [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
15
        bt      [cpu_caps+(CAPS_XSAVE/32)], CAPS_XSAVE mod 32
16
        jnc     .no_xsave
16
        jnc     .no_xsave
17
 
17
 
18
        mov     ecx, cr4
18
        mov     ecx, cr4
19
        or      ecx, CR4_OSXSAVE
19
        or      ecx, CR4_OSXSAVE
20
        mov     cr4, ecx
20
        mov     cr4, ecx
21
 
21
 
22
        mov     eax, 0x0d
22
        mov     eax, 0x0d
23
        xor     ecx, ecx
23
        xor     ecx, ecx
24
        cpuid
24
        cpuid
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
26
        and     ebx, eax
26
        and     ebx, eax
27
        xor     ecx, ecx
27
        xor     ecx, ecx
28
        xgetbv
28
        xgetbv
29
        or      eax, ebx
29
        or      eax, ebx
30
        xor     ecx, ecx
30
        xor     ecx, ecx
31
        xsetbv
31
        xsetbv
32
 
32
 
33
        mov     eax, 0x0d
33
        mov     eax, 0x0d
34
        xor     ecx, ecx
34
        xor     ecx, ecx
35
        cpuid
35
        cpuid
36
        mov     [xsave_area_size], ebx
36
        mov     [xsave_area_size], ebx
37
        cmp     ebx, fpu_data_size
37
        cmp     ebx, fpu_data_size
38
        ja      $
38
        ja      $
39
 
39
 
40
        test    eax, XCR0_AVX512
40
        test    eax, XCR0_AVX512
41
        jz      @f
41
        jz      @f
42
        call    init_avx512
42
        call    init_avx512
43
        xsave   [fpu_data]
43
        xsave   [fpu_data]
44
        ret
44
        ret
45
@@:
45
@@:
46
        test    eax, XCR0_AVX
46
        test    eax, XCR0_AVX
47
        jz      @f
47
        jz      @f
48
        call    init_avx
48
        call    init_avx
49
        xsave   [fpu_data]
49
        xsave   [fpu_data]
50
        ret
50
        ret
51
@@:
51
@@:
52
        test    eax, XCR0_SSE
52
        test    eax, XCR0_SSE
53
        jnz     .sse
53
        jnz     .sse
54
        jmp     .fpu_mmx
54
        jmp     .fpu_mmx
55
.no_xsave:
55
.no_xsave:
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
57
        bt      [cpu_caps], CAPS_SSE
57
        bt      [cpu_caps], CAPS_SSE
58
        jnc     .fpu_mmx
58
        jnc     .fpu_mmx
59
.sse:
59
.sse:
60
        call    init_sse
60
        call    init_sse
61
        fxsave  [fpu_data]
61
        fxsave  [fpu_data]
62
        ret
62
        ret
63
.fpu_mmx:
63
.fpu_mmx:
64
        call    init_fpu_mmx
64
        call    init_fpu_mmx
65
        fnsave  [fpu_data]
65
        fnsave  [fpu_data]
66
        ret
66
        ret
67
 
67
 
68
init_fpu_mmx:
68
init_fpu_mmx:
69
        mov     ecx, cr0
69
        mov     ecx, cr0
70
        and     ecx, not CR0_EM
70
        and     ecx, not CR0_EM
71
        or      ecx, CR0_MP + CR0_NE
71
        or      ecx, CR0_MP + CR0_NE
72
        mov     cr0, ecx
72
        mov     cr0, ecx
73
        ret
73
        ret
74
 
74
 
75
init_sse:
75
init_sse:
76
        mov     ebx, cr4
76
        mov     ebx, cr4
77
        mov     ecx, cr0
77
        mov     ecx, cr0
78
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
78
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
79
        mov     cr4, ebx
79
        mov     cr4, ebx
80
 
80
 
81
        and     ecx, not (CR0_EM + CR0_MP)
81
        and     ecx, not (CR0_EM + CR0_MP)
82
        or      ecx, CR0_NE
82
        or      ecx, CR0_NE
83
        mov     cr0, ecx
83
        mov     cr0, ecx
84
 
84
 
85
        mov     dword [esp-4], MXCSR_INIT
85
        mov     dword [esp-4], MXCSR_INIT
86
        ldmxcsr [esp-4]
86
        ldmxcsr [esp-4]
87
 
87
 
88
        xorps   xmm0, xmm0
88
        xorps   xmm0, xmm0
89
        xorps   xmm1, xmm1
89
        xorps   xmm1, xmm1
90
        xorps   xmm2, xmm2
90
        xorps   xmm2, xmm2
91
        xorps   xmm3, xmm3
91
        xorps   xmm3, xmm3
92
        xorps   xmm4, xmm4
92
        xorps   xmm4, xmm4
93
        xorps   xmm5, xmm5
93
        xorps   xmm5, xmm5
94
        xorps   xmm6, xmm6
94
        xorps   xmm6, xmm6
95
        xorps   xmm7, xmm7
95
        xorps   xmm7, xmm7
96
        ret
96
        ret
97
 
97
 
98
init_avx:
98
init_avx:
99
        mov     ebx, cr4
99
        mov     ebx, cr4
100
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
100
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
101
        mov     cr4, ebx
101
        mov     cr4, ebx
102
 
102
 
103
        mov     ecx, cr0
103
        mov     ecx, cr0
104
        and     ecx, not (CR0_EM + CR0_MP)
104
        and     ecx, not (CR0_EM + CR0_MP)
105
        or      ecx, CR0_NE
105
        or      ecx, CR0_NE
106
        mov     cr0, ecx
106
        mov     cr0, ecx
107
 
107
 
108
        mov     dword [esp-4], MXCSR_INIT
108
        mov     dword [esp-4], MXCSR_INIT
109
        vldmxcsr [esp-4]
109
        vldmxcsr [esp-4]
110
 
110
 
111
        vzeroall
111
        vzeroall
112
        ret
112
        ret
113
 
113
 
114
init_avx512:
114
init_avx512:
115
        mov     ebx, cr4
115
        mov     ebx, cr4
116
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
116
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
117
        mov     cr4, ebx
117
        mov     cr4, ebx
118
 
118
 
119
        mov     ecx, cr0
119
        mov     ecx, cr0
120
        and     ecx, not (CR0_EM + CR0_MP)
120
        and     ecx, not (CR0_EM + CR0_MP)
121
        or      ecx, CR0_NE
121
        or      ecx, CR0_NE
122
        mov     cr0, ecx
122
        mov     cr0, ecx
123
 
123
 
124
        mov     dword [esp-4], MXCSR_INIT
124
        mov     dword [esp-4], MXCSR_INIT
125
        vldmxcsr [esp-4]
125
        vldmxcsr [esp-4]
126
 
126
 
127
        vpxorq  zmm0, zmm0, zmm0
127
        vpxorq  zmm0, zmm0, zmm0
128
        vpxorq  zmm1, zmm1, zmm1
128
        vpxorq  zmm1, zmm1, zmm1
129
        vpxorq  zmm2, zmm2, zmm2
129
        vpxorq  zmm2, zmm2, zmm2
130
        vpxorq  zmm3, zmm3, zmm3
130
        vpxorq  zmm3, zmm3, zmm3
131
        vpxorq  zmm4, zmm4, zmm4
131
        vpxorq  zmm4, zmm4, zmm4
132
        vpxorq  zmm5, zmm5, zmm5
132
        vpxorq  zmm5, zmm5, zmm5
133
        vpxorq  zmm6, zmm6, zmm6
133
        vpxorq  zmm6, zmm6, zmm6
134
        vpxorq  zmm7, zmm7, zmm7
134
        vpxorq  zmm7, zmm7, zmm7
135
 
135
 
136
        ret
136
        ret
137
 
137
 
138
; param
138
; param
139
;  eax= 512 bytes memory area
139
;  eax= 512 bytes memory area aligned on a 16-byte boundary
140
 
140
 
141
align 4
141
align 4
142
fpu_save:
142
fpu_save:
143
        push    ecx
143
        push    ecx
144
        push    esi
144
        push    esi
145
        push    edi
145
        push    edi
146
 
146
 
147
        pushfd
147
        pushfd
148
        cli
148
        cli
149
 
149
 
150
        clts
150
        clts
151
        mov     edi, eax
151
        mov     edi, eax
152
 
152
 
153
        mov     ecx, [fpu_owner]
153
        mov     ecx, [fpu_owner]
154
        mov     esi, [CURRENT_TASK]
154
        mov     esi, [CURRENT_TASK]
155
        cmp     ecx, esi
155
        cmp     ecx, esi
156
        jne     .save
156
        jne     .save
157
 
157
 
158
        call    save_context
158
        call    save_fpu_context
159
        jmp     .exit
159
        jmp     .exit
160
.save:
160
.save:
161
        mov     [fpu_owner], esi
161
        mov     [fpu_owner], esi
162
 
162
 
163
        shl     ecx, 8
163
        shl     ecx, 8
164
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
164
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
165
 
165
 
166
        call    save_context
166
        call    save_context
-
 
167
 
167
 
168
; first 512 bytes of XSAVE area have the same format as FXSAVE
168
        shl     esi, 8
169
        shl     esi, 8
169
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
170
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
170
        mov     ecx, 512/4
171
        mov     ecx, 512/4
171
        cld
172
        cld
172
        rep movsd
173
        rep movsd
173
        fninit
174
        fninit
174
.exit:
175
.exit:
175
        popfd
176
        popfd
176
        pop     edi
177
        pop     edi
177
        pop     esi
178
        pop     esi
178
        pop     ecx
179
        pop     ecx
179
        ret
180
        ret
-
 
181
 
-
 
182
avx_save_size:
-
 
183
        mov     eax, [xsave_area_size]
-
 
184
        ret
-
 
185
 
-
 
186
; param
-
 
187
;  eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
-
 
188
 
-
 
189
avx_save:
-
 
190
        push    ecx
-
 
191
        push    esi
-
 
192
        push    edi
-
 
193
 
-
 
194
        pushfd
-
 
195
        cli
-
 
196
 
-
 
197
        clts
-
 
198
        mov     edi, eax
-
 
199
 
-
 
200
        mov     ecx, [fpu_owner]
-
 
201
        mov     esi, [CURRENT_TASK]
-
 
202
        cmp     ecx, esi
-
 
203
        jne     .save
-
 
204
 
-
 
205
        call    save_context
-
 
206
        jmp     .exit
-
 
207
.save:
-
 
208
        mov     [fpu_owner], esi
-
 
209
 
-
 
210
        shl     ecx, 8
-
 
211
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
-
 
212
 
-
 
213
        call    save_context
-
 
214
 
-
 
215
        shl     esi, 8
-
 
216
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
-
 
217
        mov     ecx, [xsave_area_size]
-
 
218
        add     ecx, 3
-
 
219
        shr     ecx, 2
-
 
220
        rep movsd
-
 
221
        fninit
-
 
222
.exit:
-
 
223
        popfd
-
 
224
        pop     edi
-
 
225
        pop     esi
-
 
226
        pop     ecx
-
 
227
        ret
180
 
228
 
181
align 4
229
align 4
182
save_context:
230
save_context:
183
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
231
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
184
        jnc     .no_xsave
232
        jnc     save_fpu_context
185
        xsave   [eax]
233
        xsave   [eax]
186
        ret
234
        ret
187
.no_xsave:
235
save_fpu_context:
188
        bt      [cpu_caps], CAPS_SSE
236
        bt      [cpu_caps], CAPS_SSE
189
        jnc     .no_SSE
-
 
190
 
237
        jnc     .no_SSE
191
        fxsave  [eax]
238
        fxsave  [eax]
192
        ret
239
        ret
193
.no_SSE:
240
.no_SSE:
194
        fnsave  [eax]
241
        fnsave  [eax]
195
        ret
242
        ret
-
 
243
 
196
 
244
 
197
align 4
245
align 4
198
fpu_restore:
246
fpu_restore:
199
        push    ecx
247
        push    ecx
200
        push    esi
248
        push    esi
201
 
249
 
202
        mov     esi, eax
250
        mov     esi, eax
203
 
251
 
204
        pushfd
252
        pushfd
205
        cli
253
        cli
206
 
254
 
207
        mov     ecx, [fpu_owner]
255
        mov     ecx, [fpu_owner]
208
        mov     eax, [CURRENT_TASK]
256
        mov     eax, [CURRENT_TASK]
209
        cmp     ecx, eax
257
        cmp     ecx, eax
210
        jne     .copy
258
        jne     .copy
211
 
259
 
212
        clts
260
        clts
-
 
261
        bt      [cpu_caps], CAPS_SSE
-
 
262
        jnc     .no_SSE
-
 
263
 
-
 
264
        fxrstor [esi]
-
 
265
        popfd
-
 
266
        pop     esi
-
 
267
        pop     ecx
-
 
268
        ret
-
 
269
.no_SSE:
-
 
270
        fnclex                  ;fix possible problems
-
 
271
        frstor  [esi]
-
 
272
        popfd
-
 
273
        pop     esi
-
 
274
        pop     ecx
-
 
275
        ret
-
 
276
.copy:
-
 
277
        shl     eax, 8
-
 
278
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
-
 
279
        mov     ecx, 512/4
-
 
280
        cld
-
 
281
        rep movsd
-
 
282
        popfd
-
 
283
        pop     esi
-
 
284
        pop     ecx
-
 
285
        ret
-
 
286
 
-
 
287
avx_restore:
-
 
288
        push    ecx
-
 
289
        push    esi
-
 
290
 
-
 
291
        mov     esi, eax
-
 
292
 
-
 
293
        pushfd
-
 
294
        cli
-
 
295
 
-
 
296
        mov     ecx, [fpu_owner]
-
 
297
        mov     eax, [CURRENT_TASK]
-
 
298
        cmp     ecx, eax
-
 
299
        jne     .copy
-
 
300
 
-
 
301
        clts
213
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
302
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
214
        jnc     .no_xsave
303
        jnc     .no_xsave
215
        xrstor  [esi]
304
        xrstor  [esi]
216
        popfd
305
        popfd
217
        pop     esi
306
        pop     esi
218
        pop     ecx
307
        pop     ecx
219
        ret
308
        ret
220
.no_xsave:
309
.no_xsave:
221
        bt      [cpu_caps], CAPS_SSE
310
        bt      [cpu_caps], CAPS_SSE
222
        jnc     .no_SSE
311
        jnc     .no_SSE
223
 
312
 
224
        fxrstor [esi]
313
        fxrstor [esi]
225
        popfd
314
        popfd
226
        pop     esi
315
        pop     esi
227
        pop     ecx
316
        pop     ecx
228
        ret
317
        ret
229
.no_SSE:
318
.no_SSE:
230
        fnclex                  ;fix possible problems
319
        fnclex                  ;fix possible problems
231
        frstor  [esi]
320
        frstor  [esi]
232
        popfd
321
        popfd
233
        pop     esi
322
        pop     esi
234
        pop     ecx
323
        pop     ecx
235
        ret
324
        ret
236
.copy:
325
.copy:
237
        shl     eax, 8
326
        shl     eax, 8
238
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
327
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
-
 
328
        mov     ecx, [xsave_area_size]
-
 
329
        add     ecx, 3
239
        mov     ecx, 512/4
330
        shr     ecx, 2
240
        cld
331
        cld
241
        rep movsd
332
        rep movsd
242
        popfd
333
        popfd
243
        pop     esi
334
        pop     esi
244
        pop     ecx
335
        pop     ecx
245
        ret
336
        ret
246
 
337
 
247
align 4
338
align 4
248
except_7:                  ;#NM exception handler
339
except_7:                  ;#NM exception handler
249
        save_ring3_context
340
        save_ring3_context
250
        clts
341
        clts
251
        mov     ax, app_data;
342
        mov     ax, app_data;
252
        mov     ds, ax
343
        mov     ds, ax
253
        mov     es, ax
344
        mov     es, ax
254
 
345
 
255
        mov     ebx, [fpu_owner]
346
        mov     ebx, [fpu_owner]
256
        cmp     ebx, [CURRENT_TASK]
347
        cmp     ebx, [CURRENT_TASK]
257
        je      .exit
348
        je      .exit
258
 
349
 
259
        shl     ebx, 8
350
        shl     ebx, 8
260
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
351
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
-
 
352
        bt      [cpu_caps+(CAPS_OSXSAVE/32)], CAPS_OSXSAVE mod 32
-
 
353
        jnc     .no_xsave
-
 
354
        xsave   [eax]
-
 
355
        mov     ebx, [CURRENT_TASK]
-
 
356
        mov     [fpu_owner], ebx
-
 
357
        shl     ebx, 8
-
 
358
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
-
 
359
        xrstor  [eax]
-
 
360
.exit:
-
 
361
        restore_ring3_context
-
 
362
        iret
-
 
363
.no_xsave:
261
        bt      [cpu_caps], CAPS_SSE
364
        bt      [cpu_caps], CAPS_SSE
262
        jnc     .no_SSE
365
        jnc     .no_SSE
263
 
366
 
264
        fxsave  [eax]
367
        fxsave  [eax]
265
        mov     ebx, [CURRENT_TASK]
368
        mov     ebx, [CURRENT_TASK]
266
        mov     [fpu_owner], ebx
369
        mov     [fpu_owner], ebx
267
        shl     ebx, 8
370
        shl     ebx, 8
268
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
371
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
269
        fxrstor [eax]
372
        fxrstor [eax]
270
.exit:
-
 
271
        restore_ring3_context
373
        restore_ring3_context
272
        iret
374
        iret
273
 
375
 
274
.no_SSE:
376
.no_SSE:
275
        fnsave  [eax]
377
        fnsave  [eax]
276
        mov     ebx, [CURRENT_TASK]
378
        mov     ebx, [CURRENT_TASK]
277
        mov     [fpu_owner], ebx
379
        mov     [fpu_owner], ebx
278
        shl     ebx, 8
380
        shl     ebx, 8
279
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
381
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
280
        frstor  [eax]
382
        frstor  [eax]
281
        restore_ring3_context
383
        restore_ring3_context
282
        iret
384
        iret
283
 
385
 
284
iglobal
386
iglobal
285
  fpu_owner dd 2
387
  fpu_owner dd 2
286
endg
388
endg