Rev 6934 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6082 | serge | 1 | /* |
2 | * FPU data structures: |
||
3 | */ |
||
4 | #ifndef _ASM_X86_FPU_H |
||
5 | #define _ASM_X86_FPU_H |
||
6 | |||
7 | /* |
||
8 | * The legacy x87 FPU state format, as saved by FSAVE and |
||
9 | * restored by the FRSTOR instructions: |
||
10 | */ |
||
11 | struct fregs_state { |
||
12 | u32 cwd; /* FPU Control Word */ |
||
13 | u32 swd; /* FPU Status Word */ |
||
14 | u32 twd; /* FPU Tag Word */ |
||
15 | u32 fip; /* FPU IP Offset */ |
||
16 | u32 fcs; /* FPU IP Selector */ |
||
17 | u32 foo; /* FPU Operand Pointer Offset */ |
||
18 | u32 fos; /* FPU Operand Pointer Selector */ |
||
19 | |||
20 | /* 8*10 bytes for each FP-reg = 80 bytes: */ |
||
21 | u32 st_space[20]; |
||
22 | |||
23 | /* Software status information [not touched by FSAVE]: */ |
||
24 | u32 status; |
||
25 | }; |
||
26 | |||
27 | /* |
||
28 | * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and |
||
29 | * restored by the FXRSTOR instructions. It's similar to the FSAVE |
||
30 | * format, but differs in some areas, plus has extensions at |
||
31 | * the end for the XMM registers. |
||
32 | */ |
||
33 | struct fxregs_state { |
||
34 | u16 cwd; /* Control Word */ |
||
35 | u16 swd; /* Status Word */ |
||
36 | u16 twd; /* Tag Word */ |
||
37 | u16 fop; /* Last Instruction Opcode */ |
||
38 | union { |
||
39 | struct { |
||
40 | u64 rip; /* Instruction Pointer */ |
||
41 | u64 rdp; /* Data Pointer */ |
||
42 | }; |
||
43 | struct { |
||
44 | u32 fip; /* FPU IP Offset */ |
||
45 | u32 fcs; /* FPU IP Selector */ |
||
46 | u32 foo; /* FPU Operand Offset */ |
||
47 | u32 fos; /* FPU Operand Selector */ |
||
48 | }; |
||
49 | }; |
||
50 | u32 mxcsr; /* MXCSR Register State */ |
||
51 | u32 mxcsr_mask; /* MXCSR Mask */ |
||
52 | |||
53 | /* 8*16 bytes for each FP-reg = 128 bytes: */ |
||
54 | u32 st_space[32]; |
||
55 | |||
56 | /* 16*16 bytes for each XMM-reg = 256 bytes: */ |
||
57 | u32 xmm_space[64]; |
||
58 | |||
59 | u32 padding[12]; |
||
60 | |||
61 | union { |
||
62 | u32 padding1[12]; |
||
63 | u32 sw_reserved[12]; |
||
64 | }; |
||
65 | |||
66 | } __attribute__((aligned(16))); |
||
67 | |||
68 | /* Default value for fxregs_state.mxcsr: */ |
||
69 | #define MXCSR_DEFAULT 0x1f80 |
||
70 | |||
71 | /* |
||
72 | * Software based FPU emulation state. This is arbitrary really, |
||
73 | * it matches the x87 format to make it easier to understand: |
||
74 | */ |
||
75 | struct swregs_state { |
||
76 | u32 cwd; |
||
77 | u32 swd; |
||
78 | u32 twd; |
||
79 | u32 fip; |
||
80 | u32 fcs; |
||
81 | u32 foo; |
||
82 | u32 fos; |
||
83 | /* 8*10 bytes for each FP-reg = 80 bytes: */ |
||
84 | u32 st_space[20]; |
||
85 | u8 ftop; |
||
86 | u8 changed; |
||
87 | u8 lookahead; |
||
88 | u8 no_update; |
||
89 | u8 rm; |
||
90 | u8 alimit; |
||
91 | struct math_emu_info *info; |
||
92 | u32 entry_eip; |
||
93 | }; |
||
94 | |||
95 | /* |
||
96 | * List of XSAVE features Linux knows about: |
||
97 | */ |
||
98 | enum xfeature { |
||
99 | XFEATURE_FP, |
||
100 | XFEATURE_SSE, |
||
101 | /* |
||
102 | * Values above here are "legacy states". |
||
103 | * Those below are "extended states". |
||
104 | */ |
||
105 | XFEATURE_YMM, |
||
106 | XFEATURE_BNDREGS, |
||
107 | XFEATURE_BNDCSR, |
||
108 | XFEATURE_OPMASK, |
||
109 | XFEATURE_ZMM_Hi256, |
||
110 | XFEATURE_Hi16_ZMM, |
||
111 | |||
112 | XFEATURE_MAX, |
||
113 | }; |
||
114 | |||
115 | #define XFEATURE_MASK_FP (1 << XFEATURE_FP) |
||
116 | #define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) |
||
117 | #define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) |
||
118 | #define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) |
||
119 | #define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) |
||
120 | #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) |
||
121 | #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) |
||
122 | #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) |
||
123 | |||
124 | #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) |
||
125 | #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ |
||
126 | | XFEATURE_MASK_ZMM_Hi256 \ |
||
127 | | XFEATURE_MASK_Hi16_ZMM) |
||
128 | |||
129 | #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM |
||
130 | |||
131 | struct reg_128_bit { |
||
132 | u8 regbytes[128/8]; |
||
133 | }; |
||
134 | struct reg_256_bit { |
||
135 | u8 regbytes[256/8]; |
||
136 | }; |
||
137 | struct reg_512_bit { |
||
138 | u8 regbytes[512/8]; |
||
139 | }; |
||
140 | |||
141 | /* |
||
142 | * State component 2: |
||
143 | * |
||
144 | * There are 16x 256-bit AVX registers named YMM0-YMM15. |
||
145 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) |
||
146 | * and are stored in 'struct fxregs_state::xmm_space[]' in the |
||
147 | * "legacy" area. |
||
148 | * |
||
149 | * The high 128 bits are stored here. |
||
150 | */ |
||
151 | struct ymmh_struct { |
||
152 | struct reg_128_bit hi_ymm[16]; |
||
153 | } __packed; |
||
154 | |||
155 | /* Intel MPX support: */ |
||
156 | |||
157 | struct mpx_bndreg { |
||
158 | u64 lower_bound; |
||
159 | u64 upper_bound; |
||
160 | } __packed; |
||
161 | /* |
||
162 | * State component 3 is used for the 4 128-bit bounds registers |
||
163 | */ |
||
164 | struct mpx_bndreg_state { |
||
165 | struct mpx_bndreg bndreg[4]; |
||
166 | } __packed; |
||
167 | |||
168 | /* |
||
169 | * State component 4 is used for the 64-bit user-mode MPX |
||
170 | * configuration register BNDCFGU and the 64-bit MPX status |
||
171 | * register BNDSTATUS. We call the pair "BNDCSR". |
||
172 | */ |
||
173 | struct mpx_bndcsr { |
||
174 | u64 bndcfgu; |
||
175 | u64 bndstatus; |
||
176 | } __packed; |
||
177 | |||
178 | /* |
||
179 | * The BNDCSR state is padded out to be 64-bytes in size. |
||
180 | */ |
||
181 | struct mpx_bndcsr_state { |
||
182 | union { |
||
183 | struct mpx_bndcsr bndcsr; |
||
184 | u8 pad_to_64_bytes[64]; |
||
185 | }; |
||
186 | } __packed; |
||
187 | |||
188 | /* AVX-512 Components: */ |
||
189 | |||
190 | /* |
||
191 | * State component 5 is used for the 8 64-bit opmask registers |
||
192 | * k0-k7 (opmask state). |
||
193 | */ |
||
194 | struct avx_512_opmask_state { |
||
195 | u64 opmask_reg[8]; |
||
196 | } __packed; |
||
197 | |||
198 | /* |
||
199 | * State component 6 is used for the upper 256 bits of the |
||
200 | * registers ZMM0-ZMM15. These 16 256-bit values are denoted |
||
201 | * ZMM0_H-ZMM15_H (ZMM_Hi256 state). |
||
202 | */ |
||
203 | struct avx_512_zmm_uppers_state { |
||
204 | struct reg_256_bit zmm_upper[16]; |
||
205 | } __packed; |
||
206 | |||
207 | /* |
||
208 | * State component 7 is used for the 16 512-bit registers |
||
209 | * ZMM16-ZMM31 (Hi16_ZMM state). |
||
210 | */ |
||
211 | struct avx_512_hi16_state { |
||
212 | struct reg_512_bit hi16_zmm[16]; |
||
213 | } __packed; |
||
214 | |||
215 | struct xstate_header { |
||
216 | u64 xfeatures; |
||
217 | u64 xcomp_bv; |
||
218 | u64 reserved[6]; |
||
219 | } __attribute__((packed)); |
||
220 | |||
221 | /* |
||
222 | * This is our most modern FPU state format, as saved by the XSAVE |
||
223 | * and restored by the XRSTOR instructions. |
||
224 | * |
||
225 | * It consists of a legacy fxregs portion, an xstate header and |
||
226 | * subsequent areas as defined by the xstate header. Not all CPUs |
||
227 | * support all the extensions, so the size of the extended area |
||
228 | * can vary quite a bit between CPUs. |
||
229 | */ |
||
230 | struct xregs_state { |
||
231 | struct fxregs_state i387; |
||
232 | struct xstate_header header; |
||
233 | u8 extended_state_area[0]; |
||
234 | } __attribute__ ((packed, aligned (64))); |
||
235 | |||
236 | /* |
||
237 | * This is a union of all the possible FPU state formats |
||
238 | * put together, so that we can pick the right one runtime. |
||
239 | * |
||
240 | * The size of the structure is determined by the largest |
||
241 | * member - which is the xsave area. The padding is there |
||
242 | * to ensure that statically-allocated task_structs (just |
||
243 | * the init_task today) have enough space. |
||
244 | */ |
||
245 | union fpregs_state { |
||
246 | struct fregs_state fsave; |
||
247 | struct fxregs_state fxsave; |
||
248 | struct swregs_state soft; |
||
249 | struct xregs_state xsave; |
||
250 | u8 __padding[PAGE_SIZE]; |
||
251 | }; |
||
252 | |||
253 | /* |
||
254 | * Highest level per task FPU state data structure that |
||
255 | * contains the FPU register state plus various FPU |
||
256 | * state fields: |
||
257 | */ |
||
258 | struct fpu { |
||
259 | /* |
||
260 | * @last_cpu: |
||
261 | * |
||
262 | * Records the last CPU on which this context was loaded into |
||
263 | * FPU registers. (In the lazy-restore case we might be |
||
264 | * able to reuse FPU registers across multiple context switches |
||
265 | * this way, if no intermediate task used the FPU.) |
||
266 | * |
||
267 | * A value of -1 is used to indicate that the FPU state in context |
||
268 | * memory is newer than the FPU state in registers, and that the |
||
269 | * FPU state should be reloaded next time the task is run. |
||
270 | */ |
||
271 | unsigned int last_cpu; |
||
272 | |||
273 | /* |
||
274 | * @fpstate_active: |
||
275 | * |
||
276 | * This flag indicates whether this context is active: if the task |
||
277 | * is not running then we can restore from this context, if the task |
||
278 | * is running then we should save into this context. |
||
279 | */ |
||
280 | unsigned char fpstate_active; |
||
281 | |||
282 | /* |
||
283 | * @fpregs_active: |
||
284 | * |
||
285 | * This flag determines whether a given context is actively |
||
286 | * loaded into the FPU's registers and that those registers |
||
287 | * represent the task's current FPU state. |
||
288 | * |
||
289 | * Note the interaction with fpstate_active: |
||
290 | * |
||
291 | * # task does not use the FPU: |
||
292 | * fpstate_active == 0 |
||
293 | * |
||
294 | * # task uses the FPU and regs are active: |
||
295 | * fpstate_active == 1 && fpregs_active == 1 |
||
296 | * |
||
297 | * # the regs are inactive but still match fpstate: |
||
298 | * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu |
||
299 | * |
||
300 | * The third state is what we use for the lazy restore optimization |
||
301 | * on lazy-switching CPUs. |
||
302 | */ |
||
303 | unsigned char fpregs_active; |
||
304 | |||
305 | /* |
||
306 | * @counter: |
||
307 | * |
||
308 | * This counter contains the number of consecutive context switches |
||
309 | * during which the FPU stays used. If this is over a threshold, the |
||
310 | * lazy FPU restore logic becomes eager, to save the trap overhead. |
||
311 | * This is an unsigned char so that after 256 iterations the counter |
||
312 | * wraps and the context switch behavior turns lazy again; this is to |
||
313 | * deal with bursty apps that only use the FPU for a short time: |
||
314 | */ |
||
315 | unsigned char counter; |
||
316 | /* |
||
317 | * @state: |
||
318 | * |
||
319 | * In-memory copy of all FPU registers that we save/restore |
||
320 | * over context switches. If the task is using the FPU then |
||
321 | * the registers in the FPU are more recent than this state |
||
322 | * copy. If the task context-switches away then they get |
||
323 | * saved here and represent the FPU state. |
||
324 | * |
||
325 | * After context switches there may be a (short) time period |
||
326 | * during which the in-FPU hardware registers are unchanged |
||
327 | * and still perfectly match this state, if the tasks |
||
328 | * scheduled afterwards are not using the FPU. |
||
329 | * |
||
330 | * This is the 'lazy restore' window of optimization, which |
||
331 | * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. |
||
332 | * |
||
333 | * We detect whether a subsequent task uses the FPU via setting |
||
334 | * CR0::TS to 1, which causes any FPU use to raise a #NM fault. |
||
335 | * |
||
336 | * During this window, if the task gets scheduled again, we |
||
337 | * might be able to skip having to do a restore from this |
||
338 | * memory buffer to the hardware registers - at the cost of |
||
339 | * incurring the overhead of #NM fault traps. |
||
340 | * |
||
341 | * Note that on modern CPUs that support the XSAVEOPT (or other |
||
342 | * optimized XSAVE instructions), we don't use #NM traps anymore, |
||
343 | * as the hardware can track whether FPU registers need saving |
||
344 | * or not. On such CPUs we activate the non-lazy ('eagerfpu') |
||
345 | * logic, which unconditionally saves/restores all FPU state |
||
346 | * across context switches. (if FPU state exists.) |
||
347 | */ |
||
348 | union fpregs_state state; |
||
349 | /* |
||
350 | * WARNING: 'state' is dynamically-sized. Do not put |
||
351 | * anything after it here. |
||
352 | */ |
||
353 | }; |
||
354 | |||
355 | #endif /* _ASM_X86_FPU_H */><>><>><>><>><>><>><>><> |