Subversion Repositories Kolibri OS

Rev

Rev 5363 | Rev 7164 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5363 Rev 7124
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
;;                                                              ;;
2
;;                                                              ;;
3
;; Copyright (C) KolibriOS team 2004-2015. All rights reserved. ;;
3
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
4
;; Distributed under terms of the GNU General Public License    ;;
4
;; Distributed under terms of the GNU General Public License    ;;
5
;;                                                              ;;
5
;;                                                              ;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
 
7
 
8
$Revision: 5363 $
8
$Revision: 7124 $
9
 
9
 
10
 
10
 
11
init_fpu:
11
init_fpu:
12
        clts
12
        clts
13
        fninit
13
        fninit
-
 
14
 
-
 
15
        bt      [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
-
 
16
        jnc     .no_xsave
-
 
17
 
-
 
18
        mov     ecx, cr4
-
 
19
        or      ecx, CR4_OSXSAVE
-
 
20
        mov     cr4, ecx
-
 
21
 
-
 
22
        mov     eax, 0x0d
-
 
23
        xor     ecx, ecx
-
 
24
        cpuid
-
 
25
        mov     ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
-
 
26
        and     ebx, eax
-
 
27
        xor     ecx, ecx
-
 
28
        xgetbv
-
 
29
        or      eax, ebx
-
 
30
        xor     ecx, ecx
-
 
31
        xsetbv
-
 
32
 
-
 
33
        mov     eax, 0x0d
-
 
34
        xor     ecx, ecx
-
 
35
        cpuid
-
 
36
        mov     [xsave_area_size], ebx
-
 
37
 
-
 
38
        test    eax, XCR0_AVX512
-
 
39
        jz      @f
-
 
40
        call    init_avx512
-
 
41
        ret
-
 
42
@@:
-
 
43
        test    eax, XCR0_AVX
-
 
44
        jz      @f
-
 
45
        call    init_avx
-
 
46
        ret
-
 
47
@@:
-
 
48
        test    eax, XCR0_SSE
-
 
49
        jz      @f
-
 
50
        call    init_sse
-
 
51
        ret
-
 
52
@@:
-
 
53
        call    init_fpu_mmx
-
 
54
        ret
-
 
55
.no_xsave:
14
 
56
        mov     [xsave_area_size], 512  ; enough for FPU/MMX and SSE
15
        bt      [cpu_caps], CAPS_SSE
57
        bt      [cpu_caps], CAPS_SSE
-
 
58
        jnc     @f
-
 
59
        call    init_sse
-
 
60
        fxsave  [fpu_data]
-
 
61
        ret
-
 
62
@@:
-
 
63
        call    init_fpu_mmx
-
 
64
        fnsave  [fpu_data]
-
 
65
        ret
-
 
66
 
-
 
67
init_fpu_mmx:
-
 
68
        mov     ecx, cr0
-
 
69
        and     ecx, not CR0_EM
-
 
70
        or      ecx, CR0_MP + CR0_NE
-
 
71
        mov     cr0, ecx
-
 
72
        ret
16
        jnc     .no_SSE
73
 
17
 
74
init_sse:
18
        mov     ebx, cr4
75
        mov     ebx, cr4
19
        mov     ecx, cr0
76
        mov     ecx, cr0
20
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
77
        or      ebx, CR4_OSFXSR+CR4_OSXMMEXPT
21
        mov     cr4, ebx
78
        mov     cr4, ebx
22
 
79
 
23
        and     ecx, not (CR0_MP+CR0_EM)
80
        and     ecx, not (CR0_EM + CR0_MP)
24
        or      ecx, CR0_NE
81
        or      ecx, CR0_NE
25
        mov     cr0, ecx
82
        mov     cr0, ecx
26
 
83
 
27
        mov     dword [esp-4], SSE_INIT
84
        mov     dword [esp-4], MXCSR_INIT
28
        ldmxcsr [esp-4]
85
        ldmxcsr [esp-4]
29
 
86
 
30
        xorps   xmm0, xmm0
87
        xorps   xmm0, xmm0
31
        xorps   xmm1, xmm1
88
        xorps   xmm1, xmm1
32
        xorps   xmm2, xmm2
89
        xorps   xmm2, xmm2
33
        xorps   xmm3, xmm3
90
        xorps   xmm3, xmm3
34
        xorps   xmm4, xmm4
91
        xorps   xmm4, xmm4
35
        xorps   xmm5, xmm5
92
        xorps   xmm5, xmm5
36
        xorps   xmm6, xmm6
93
        xorps   xmm6, xmm6
37
        xorps   xmm7, xmm7
94
        xorps   xmm7, xmm7
38
        fxsave  [fpu_data]    ;[eax]
-
 
39
        ret
95
        ret
-
 
96
 
40
.no_SSE:
97
init_avx:
-
 
98
        mov     ebx, cr4
-
 
99
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
-
 
100
        mov     cr4, ebx
-
 
101
 
41
        mov     ecx, cr0
102
        mov     ecx, cr0
42
        and     ecx, not CR0_EM
103
        and     ecx, not (CR0_EM + CR0_MP)
43
        or      ecx, CR0_MP+CR0_NE
104
        or      ecx, CR0_NE
44
        mov     cr0, ecx
105
        mov     cr0, ecx
-
 
106
 
-
 
107
        mov     dword [esp-4], MXCSR_INIT
-
 
108
        vldmxcsr [esp-4]
-
 
109
 
-
 
110
        vzeroall
-
 
111
        ret
-
 
112
 
-
 
113
init_avx512:
-
 
114
        mov     ebx, cr4
-
 
115
        or      ebx, CR4_OSFXSR + CR4_OSXMMEXPT
-
 
116
        mov     cr4, ebx
-
 
117
 
45
        fnsave  [fpu_data]
118
        mov     ecx, cr0
-
 
119
        and     ecx, not (CR0_EM + CR0_MP)
-
 
120
        or      ecx, CR0_NE
-
 
121
        mov     cr0, ecx
-
 
122
 
-
 
123
        mov     dword [esp-4], MXCSR_INIT
-
 
124
        vldmxcsr [esp-4]
-
 
125
 
-
 
126
        vpxorq  zmm0, zmm0, zmm0
-
 
127
        vpxorq  zmm1, zmm1, zmm1
-
 
128
        vpxorq  zmm2, zmm2, zmm2
-
 
129
        vpxorq  zmm3, zmm3, zmm3
-
 
130
        vpxorq  zmm4, zmm4, zmm4
-
 
131
        vpxorq  zmm5, zmm5, zmm5
-
 
132
        vpxorq  zmm6, zmm6, zmm6
-
 
133
        vpxorq  zmm7, zmm7, zmm7
-
 
134
 
46
        ret
135
        ret
47
 
136
 
48
; param
137
; param
49
;  eax= 512 bytes memory area
138
;  eax= 512 bytes memory area
50
 
139
 
51
align 4
140
align 4
52
fpu_save:
141
fpu_save:
53
        push    ecx
142
        push    ecx
54
        push    esi
143
        push    esi
55
        push    edi
144
        push    edi
56
 
145
 
57
        pushfd
146
        pushfd
58
        cli
147
        cli
59
 
148
 
60
        clts
149
        clts
61
        mov     edi, eax
150
        mov     edi, eax
62
 
151
 
63
        mov     ecx, [fpu_owner]
152
        mov     ecx, [fpu_owner]
64
        mov     esi, [CURRENT_TASK]
153
        mov     esi, [CURRENT_TASK]
65
        cmp     ecx, esi
154
        cmp     ecx, esi
66
        jne     .save
155
        jne     .save
67
 
156
 
68
        call    save_context
157
        call    save_context
69
        jmp     .exit
158
        jmp     .exit
70
.save:
159
.save:
71
        mov     [fpu_owner], esi
160
        mov     [fpu_owner], esi
72
 
161
 
73
        shl     ecx, 8
162
        shl     ecx, 8
74
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
163
        mov     eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
75
 
164
 
76
        call    save_context
165
        call    save_context
77
 
166
 
78
        shl     esi, 8
167
        shl     esi, 8
79
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
168
        mov     esi, [esi+SLOT_BASE+APPDATA.fpu_state]
80
        mov     ecx, 512/4
169
        mov     ecx, 512/4
81
        cld
170
        cld
82
        rep movsd
171
        rep movsd
83
        fninit
172
        fninit
84
.exit:
173
.exit:
85
        popfd
174
        popfd
86
        pop     edi
175
        pop     edi
87
        pop     esi
176
        pop     esi
88
        pop     ecx
177
        pop     ecx
89
        ret
178
        ret
90
 
179
 
91
align 4
180
align 4
92
save_context:
181
save_context:
-
 
182
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
-
 
183
        jnc     .no_xsave
-
 
184
        xsave   [eax]
-
 
185
        ret
-
 
186
.no_xsave:
93
        bt      [cpu_caps], CAPS_SSE
187
        bt      [cpu_caps], CAPS_SSE
94
        jnc     .no_SSE
188
        jnc     .no_SSE
95
 
189
 
96
        fxsave  [eax]
190
        fxsave  [eax]
97
        ret
191
        ret
98
.no_SSE:
192
.no_SSE:
99
        fnsave  [eax]
193
        fnsave  [eax]
100
        ret
194
        ret
101
 
195
 
102
align 4
196
align 4
103
fpu_restore:
197
fpu_restore:
104
        push    ecx
198
        push    ecx
105
        push    esi
199
        push    esi
106
 
200
 
107
        mov     esi, eax
201
        mov     esi, eax
108
 
202
 
109
        pushfd
203
        pushfd
110
        cli
204
        cli
111
 
205
 
112
        mov     ecx, [fpu_owner]
206
        mov     ecx, [fpu_owner]
113
        mov     eax, [CURRENT_TASK]
207
        mov     eax, [CURRENT_TASK]
114
        cmp     ecx, eax
208
        cmp     ecx, eax
115
        jne     .copy
209
        jne     .copy
116
 
210
 
117
        clts
211
        clts
-
 
212
        bt      [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
-
 
213
        jnc     .no_xsave
-
 
214
        xrstor  [esi]
-
 
215
        popfd
-
 
216
        pop     esi
-
 
217
        pop     ecx
-
 
218
        ret
-
 
219
.no_xsave:
118
        bt      [cpu_caps], CAPS_SSE
220
        bt      [cpu_caps], CAPS_SSE
119
        jnc     .no_SSE
221
        jnc     .no_SSE
120
 
222
 
121
        fxrstor [esi]
223
        fxrstor [esi]
122
        popfd
224
        popfd
123
        pop     esi
225
        pop     esi
124
        pop     ecx
226
        pop     ecx
125
        ret
227
        ret
126
.no_SSE:
228
.no_SSE:
127
        fnclex                  ;fix possible problems
229
        fnclex                  ;fix possible problems
128
        frstor  [esi]
230
        frstor  [esi]
129
        popfd
231
        popfd
130
        pop     esi
232
        pop     esi
131
        pop     ecx
233
        pop     ecx
132
        ret
234
        ret
133
.copy:
235
.copy:
134
        shl     eax, 8
236
        shl     eax, 8
135
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
237
        mov     edi, [eax+SLOT_BASE+APPDATA.fpu_state]
136
        mov     ecx, 512/4
238
        mov     ecx, 512/4
137
        cld
239
        cld
138
        rep movsd
240
        rep movsd
139
        popfd
241
        popfd
140
        pop     esi
242
        pop     esi
141
        pop     ecx
243
        pop     ecx
142
        ret
244
        ret
143
 
245
 
144
align 4
246
align 4
145
except_7:                  ;#NM exception handler
247
except_7:                  ;#NM exception handler
146
        save_ring3_context
248
        save_ring3_context
147
        clts
249
        clts
148
        mov     ax, app_data;
250
        mov     ax, app_data;
149
        mov     ds, ax
251
        mov     ds, ax
150
        mov     es, ax
252
        mov     es, ax
151
 
253
 
152
        mov     ebx, [fpu_owner]
254
        mov     ebx, [fpu_owner]
153
        cmp     ebx, [CURRENT_TASK]
255
        cmp     ebx, [CURRENT_TASK]
154
        je      .exit
256
        je      .exit
155
 
257
 
156
        shl     ebx, 8
258
        shl     ebx, 8
157
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
259
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
158
        bt      [cpu_caps], CAPS_SSE
260
        bt      [cpu_caps], CAPS_SSE
159
        jnc     .no_SSE
261
        jnc     .no_SSE
160
 
262
 
161
        fxsave  [eax]
263
        fxsave  [eax]
162
        mov     ebx, [CURRENT_TASK]
264
        mov     ebx, [CURRENT_TASK]
163
        mov     [fpu_owner], ebx
265
        mov     [fpu_owner], ebx
164
        shl     ebx, 8
266
        shl     ebx, 8
165
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
267
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
166
        fxrstor [eax]
268
        fxrstor [eax]
167
.exit:
269
.exit:
168
        restore_ring3_context
270
        restore_ring3_context
169
        iret
271
        iret
170
 
272
 
171
.no_SSE:
273
.no_SSE:
172
        fnsave  [eax]
274
        fnsave  [eax]
173
        mov     ebx, [CURRENT_TASK]
275
        mov     ebx, [CURRENT_TASK]
174
        mov     [fpu_owner], ebx
276
        mov     [fpu_owner], ebx
175
        shl     ebx, 8
277
        shl     ebx, 8
176
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
278
        mov     eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
177
        frstor  [eax]
279
        frstor  [eax]
178
        restore_ring3_context
280
        restore_ring3_context
179
        iret
281
        iret
180
 
282
 
181
iglobal
283
iglobal
182
  fpu_owner dd 2
284
  fpu_owner dd 2
183
endg
285
endg