Rev 6084 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5060 | serge | 1 | /* |
2 | * Copyright © 2013 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Brad Volkin |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | #include "i915_drv.h" |
||
29 | |||
30 | /** |
||
31 | * DOC: batch buffer command parser |
||
32 | * |
||
33 | * Motivation: |
||
34 | * Certain OpenGL features (e.g. transform feedback, performance monitoring) |
||
35 | * require userspace code to submit batches containing commands such as |
||
36 | * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some |
||
37 | * generations of the hardware will noop these commands in "unsecure" batches |
||
38 | * (which includes all userspace batches submitted via i915) even though the |
||
39 | * commands may be safe and represent the intended programming model of the |
||
40 | * device. |
||
41 | * |
||
42 | * The software command parser is similar in operation to the command parsing |
||
43 | * done in hardware for unsecure batches. However, the software parser allows |
||
44 | * some operations that would be noop'd by hardware, if the parser determines |
||
45 | * the operation is safe, and submits the batch as "secure" to prevent hardware |
||
46 | * parsing. |
||
47 | * |
||
48 | * Threats: |
||
49 | * At a high level, the hardware (and software) checks attempt to prevent |
||
50 | * granting userspace undue privileges. There are three categories of privilege. |
||
51 | * |
||
52 | * First, commands which are explicitly defined as privileged or which should |
||
53 | * only be used by the kernel driver. The parser generally rejects such |
||
54 | * commands, though it may allow some from the drm master process. |
||
55 | * |
||
56 | * Second, commands which access registers. To support correct/enhanced |
||
57 | * userspace functionality, particularly certain OpenGL extensions, the parser |
||
58 | * provides a whitelist of registers which userspace may safely access (for both |
||
59 | * normal and drm master processes). |
||
60 | * |
||
61 | * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). |
||
62 | * The parser always rejects such commands. |
||
63 | * |
||
64 | * The majority of the problematic commands fall in the MI_* range, with only a |
||
65 | * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). |
||
66 | * |
||
67 | * Implementation: |
||
68 | * Each ring maintains tables of commands and registers which the parser uses in |
||
69 | * scanning batch buffers submitted to that ring. |
||
70 | * |
||
71 | * Since the set of commands that the parser must check for is significantly |
||
72 | * smaller than the number of commands supported, the parser tables contain only |
||
73 | * those commands required by the parser. This generally works because command |
||
74 | * opcode ranges have standard command length encodings. So for commands that |
||
75 | * the parser does not need to check, it can easily skip them. This is |
||
5354 | serge | 76 | * implemented via a per-ring length decoding vfunc. |
5060 | serge | 77 | * |
78 | * Unfortunately, there are a number of commands that do not follow the standard |
||
79 | * length encoding for their opcode range, primarily amongst the MI_* commands. |
||
80 | * To handle this, the parser provides a way to define explicit "skip" entries |
||
81 | * in the per-ring command tables. |
||
82 | * |
||
83 | * Other command table entries map fairly directly to high level categories |
||
84 | * mentioned above: rejected, master-only, register whitelist. The parser |
||
85 | * implements a number of checks, including the privileged memory checks, via a |
||
86 | * general bitmasking mechanism. |
||
87 | */ |
||
88 | |||
89 | #define STD_MI_OPCODE_MASK 0xFF800000 |
||
90 | #define STD_3D_OPCODE_MASK 0xFFFF0000 |
||
91 | #define STD_2D_OPCODE_MASK 0xFFC00000 |
||
92 | #define STD_MFX_OPCODE_MASK 0xFFFF0000 |
||
93 | |||
94 | #define CMD(op, opm, f, lm, fl, ...) \ |
||
95 | { \ |
||
96 | .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ |
||
6084 | serge | 97 | .cmd = { (op), (opm) }, \ |
5060 | serge | 98 | .length = { (lm) }, \ |
99 | __VA_ARGS__ \ |
||
100 | } |
||
101 | |||
102 | /* Convenience macros to compress the tables */ |
||
103 | #define SMI STD_MI_OPCODE_MASK |
||
104 | #define S3D STD_3D_OPCODE_MASK |
||
105 | #define S2D STD_2D_OPCODE_MASK |
||
106 | #define SMFX STD_MFX_OPCODE_MASK |
||
107 | #define F true |
||
108 | #define S CMD_DESC_SKIP |
||
109 | #define R CMD_DESC_REJECT |
||
110 | #define W CMD_DESC_REGISTER |
||
111 | #define B CMD_DESC_BITMASK |
||
112 | #define M CMD_DESC_MASTER |
||
113 | |||
114 | /* Command Mask Fixed Len Action |
||
115 | ---------------------------------------------------------- */ |
||
116 | static const struct drm_i915_cmd_descriptor common_cmds[] = { |
||
117 | CMD( MI_NOOP, SMI, F, 1, S ), |
||
118 | CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), |
||
119 | CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), |
||
120 | CMD( MI_ARB_CHECK, SMI, F, 1, S ), |
||
121 | CMD( MI_REPORT_HEAD, SMI, F, 1, S ), |
||
122 | CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), |
||
123 | CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ), |
||
124 | CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), |
||
125 | CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, |
||
6084 | serge | 126 | .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), |
127 | CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B, |
||
5060 | serge | 128 | .reg = { .offset = 1, .mask = 0x007FFFFC }, |
129 | .bits = {{ |
||
130 | .offset = 0, |
||
131 | .mask = MI_GLOBAL_GTT, |
||
132 | .expected = 0, |
||
133 | }}, ), |
||
6084 | serge | 134 | CMD( MI_LOAD_REGISTER_MEM, SMI, F, 3, W | B, |
5060 | serge | 135 | .reg = { .offset = 1, .mask = 0x007FFFFC }, |
136 | .bits = {{ |
||
137 | .offset = 0, |
||
138 | .mask = MI_GLOBAL_GTT, |
||
139 | .expected = 0, |
||
140 | }}, ), |
||
5354 | serge | 141 | /* |
142 | * MI_BATCH_BUFFER_START requires some special handling. It's not |
||
143 | * really a 'skip' action but it doesn't seem like it's worth adding |
||
144 | * a new action. See i915_parse_cmds(). |
||
145 | */ |
||
5060 | serge | 146 | CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), |
147 | }; |
||
148 | |||
149 | static const struct drm_i915_cmd_descriptor render_cmds[] = { |
||
150 | CMD( MI_FLUSH, SMI, F, 1, S ), |
||
151 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
||
152 | CMD( MI_PREDICATE, SMI, F, 1, S ), |
||
153 | CMD( MI_TOPOLOGY_FILTER, SMI, F, 1, S ), |
||
6084 | serge | 154 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
5060 | serge | 155 | CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), |
156 | CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), |
||
157 | CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), |
||
158 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3F, B, |
||
159 | .bits = {{ |
||
160 | .offset = 0, |
||
161 | .mask = MI_GLOBAL_GTT, |
||
162 | .expected = 0, |
||
163 | }}, ), |
||
164 | CMD( MI_UPDATE_GTT, SMI, !F, 0xFF, R ), |
||
165 | CMD( MI_CLFLUSH, SMI, !F, 0x3FF, B, |
||
166 | .bits = {{ |
||
167 | .offset = 0, |
||
168 | .mask = MI_GLOBAL_GTT, |
||
169 | .expected = 0, |
||
170 | }}, ), |
||
171 | CMD( MI_REPORT_PERF_COUNT, SMI, !F, 0x3F, B, |
||
172 | .bits = {{ |
||
173 | .offset = 1, |
||
174 | .mask = MI_REPORT_PERF_COUNT_GGTT, |
||
175 | .expected = 0, |
||
176 | }}, ), |
||
177 | CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, |
||
178 | .bits = {{ |
||
179 | .offset = 0, |
||
180 | .mask = MI_GLOBAL_GTT, |
||
181 | .expected = 0, |
||
182 | }}, ), |
||
183 | CMD( GFX_OP_3DSTATE_VF_STATISTICS, S3D, F, 1, S ), |
||
184 | CMD( PIPELINE_SELECT, S3D, F, 1, S ), |
||
185 | CMD( MEDIA_VFE_STATE, S3D, !F, 0xFFFF, B, |
||
186 | .bits = {{ |
||
187 | .offset = 2, |
||
188 | .mask = MEDIA_VFE_STATE_MMIO_ACCESS_MASK, |
||
189 | .expected = 0, |
||
190 | }}, ), |
||
191 | CMD( GPGPU_OBJECT, S3D, !F, 0xFF, S ), |
||
192 | CMD( GPGPU_WALKER, S3D, !F, 0xFF, S ), |
||
193 | CMD( GFX_OP_3DSTATE_SO_DECL_LIST, S3D, !F, 0x1FF, S ), |
||
194 | CMD( GFX_OP_PIPE_CONTROL(5), S3D, !F, 0xFF, B, |
||
195 | .bits = {{ |
||
196 | .offset = 1, |
||
197 | .mask = (PIPE_CONTROL_MMIO_WRITE | PIPE_CONTROL_NOTIFY), |
||
198 | .expected = 0, |
||
199 | }, |
||
200 | { |
||
201 | .offset = 1, |
||
202 | .mask = (PIPE_CONTROL_GLOBAL_GTT_IVB | |
||
203 | PIPE_CONTROL_STORE_DATA_INDEX), |
||
204 | .expected = 0, |
||
205 | .condition_offset = 1, |
||
206 | .condition_mask = PIPE_CONTROL_POST_SYNC_OP_MASK, |
||
207 | }}, ), |
||
208 | }; |
||
209 | |||
210 | static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { |
||
211 | CMD( MI_SET_PREDICATE, SMI, F, 1, S ), |
||
212 | CMD( MI_RS_CONTROL, SMI, F, 1, S ), |
||
213 | CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), |
||
6084 | serge | 214 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
5060 | serge | 215 | CMD( MI_RS_CONTEXT, SMI, F, 1, S ), |
216 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), |
||
217 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), |
||
218 | CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, R ), |
||
219 | CMD( MI_RS_STORE_DATA_IMM, SMI, !F, 0xFF, S ), |
||
220 | CMD( MI_LOAD_URB_MEM, SMI, !F, 0xFF, S ), |
||
221 | CMD( MI_STORE_URB_MEM, SMI, !F, 0xFF, S ), |
||
222 | CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_VS, S3D, !F, 0x7FF, S ), |
||
223 | CMD( GFX_OP_3DSTATE_DX9_CONSTANTF_PS, S3D, !F, 0x7FF, S ), |
||
224 | |||
225 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS, S3D, !F, 0x1FF, S ), |
||
226 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS, S3D, !F, 0x1FF, S ), |
||
227 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS, S3D, !F, 0x1FF, S ), |
||
228 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS, S3D, !F, 0x1FF, S ), |
||
229 | CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), |
||
230 | }; |
||
231 | |||
232 | static const struct drm_i915_cmd_descriptor video_cmds[] = { |
||
233 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
||
6084 | serge | 234 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
5060 | serge | 235 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, |
236 | .bits = {{ |
||
237 | .offset = 0, |
||
238 | .mask = MI_GLOBAL_GTT, |
||
239 | .expected = 0, |
||
240 | }}, ), |
||
241 | CMD( MI_UPDATE_GTT, SMI, !F, 0x3F, R ), |
||
242 | CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, |
||
243 | .bits = {{ |
||
244 | .offset = 0, |
||
245 | .mask = MI_FLUSH_DW_NOTIFY, |
||
246 | .expected = 0, |
||
247 | }, |
||
248 | { |
||
249 | .offset = 1, |
||
250 | .mask = MI_FLUSH_DW_USE_GTT, |
||
251 | .expected = 0, |
||
252 | .condition_offset = 0, |
||
253 | .condition_mask = MI_FLUSH_DW_OP_MASK, |
||
254 | }, |
||
255 | { |
||
256 | .offset = 0, |
||
257 | .mask = MI_FLUSH_DW_STORE_INDEX, |
||
258 | .expected = 0, |
||
259 | .condition_offset = 0, |
||
260 | .condition_mask = MI_FLUSH_DW_OP_MASK, |
||
261 | }}, ), |
||
262 | CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, |
||
263 | .bits = {{ |
||
264 | .offset = 0, |
||
265 | .mask = MI_GLOBAL_GTT, |
||
266 | .expected = 0, |
||
267 | }}, ), |
||
268 | /* |
||
269 | * MFX_WAIT doesn't fit the way we handle length for most commands. |
||
270 | * It has a length field but it uses a non-standard length bias. |
||
271 | * It is always 1 dword though, so just treat it as fixed length. |
||
272 | */ |
||
273 | CMD( MFX_WAIT, SMFX, F, 1, S ), |
||
274 | }; |
||
275 | |||
276 | static const struct drm_i915_cmd_descriptor vecs_cmds[] = { |
||
277 | CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
||
6084 | serge | 278 | CMD( MI_SET_APPID, SMI, F, 1, S ), |
5060 | serge | 279 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, |
280 | .bits = {{ |
||
281 | .offset = 0, |
||
282 | .mask = MI_GLOBAL_GTT, |
||
283 | .expected = 0, |
||
284 | }}, ), |
||
285 | CMD( MI_UPDATE_GTT, SMI, !F, 0x3F, R ), |
||
286 | CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, |
||
287 | .bits = {{ |
||
288 | .offset = 0, |
||
289 | .mask = MI_FLUSH_DW_NOTIFY, |
||
290 | .expected = 0, |
||
291 | }, |
||
292 | { |
||
293 | .offset = 1, |
||
294 | .mask = MI_FLUSH_DW_USE_GTT, |
||
295 | .expected = 0, |
||
296 | .condition_offset = 0, |
||
297 | .condition_mask = MI_FLUSH_DW_OP_MASK, |
||
298 | }, |
||
299 | { |
||
300 | .offset = 0, |
||
301 | .mask = MI_FLUSH_DW_STORE_INDEX, |
||
302 | .expected = 0, |
||
303 | .condition_offset = 0, |
||
304 | .condition_mask = MI_FLUSH_DW_OP_MASK, |
||
305 | }}, ), |
||
306 | CMD( MI_CONDITIONAL_BATCH_BUFFER_END, SMI, !F, 0xFF, B, |
||
307 | .bits = {{ |
||
308 | .offset = 0, |
||
309 | .mask = MI_GLOBAL_GTT, |
||
310 | .expected = 0, |
||
311 | }}, ), |
||
312 | }; |
||
313 | |||
314 | static const struct drm_i915_cmd_descriptor blt_cmds[] = { |
||
315 | CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), |
||
316 | CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, |
||
317 | .bits = {{ |
||
318 | .offset = 0, |
||
319 | .mask = MI_GLOBAL_GTT, |
||
320 | .expected = 0, |
||
321 | }}, ), |
||
322 | CMD( MI_UPDATE_GTT, SMI, !F, 0x3F, R ), |
||
323 | CMD( MI_FLUSH_DW, SMI, !F, 0x3F, B, |
||
324 | .bits = {{ |
||
325 | .offset = 0, |
||
326 | .mask = MI_FLUSH_DW_NOTIFY, |
||
327 | .expected = 0, |
||
328 | }, |
||
329 | { |
||
330 | .offset = 1, |
||
331 | .mask = MI_FLUSH_DW_USE_GTT, |
||
332 | .expected = 0, |
||
333 | .condition_offset = 0, |
||
334 | .condition_mask = MI_FLUSH_DW_OP_MASK, |
||
335 | }, |
||
336 | { |
||
337 | .offset = 0, |
||
338 | .mask = MI_FLUSH_DW_STORE_INDEX, |
||
339 | .expected = 0, |
||
340 | .condition_offset = 0, |
||
341 | .condition_mask = MI_FLUSH_DW_OP_MASK, |
||
342 | }}, ), |
||
343 | CMD( COLOR_BLT, S2D, !F, 0x3F, S ), |
||
344 | CMD( SRC_COPY_BLT, S2D, !F, 0x3F, S ), |
||
345 | }; |
||
346 | |||
347 | static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { |
||
348 | CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), |
||
349 | CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), |
||
350 | }; |
||
351 | |||
352 | #undef CMD |
||
353 | #undef SMI |
||
354 | #undef S3D |
||
355 | #undef S2D |
||
356 | #undef SMFX |
||
357 | #undef F |
||
358 | #undef S |
||
359 | #undef R |
||
360 | #undef W |
||
361 | #undef B |
||
362 | #undef M |
||
363 | |||
364 | static const struct drm_i915_cmd_table gen7_render_cmds[] = { |
||
365 | { common_cmds, ARRAY_SIZE(common_cmds) }, |
||
366 | { render_cmds, ARRAY_SIZE(render_cmds) }, |
||
367 | }; |
||
368 | |||
369 | static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { |
||
370 | { common_cmds, ARRAY_SIZE(common_cmds) }, |
||
371 | { render_cmds, ARRAY_SIZE(render_cmds) }, |
||
372 | { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, |
||
373 | }; |
||
374 | |||
375 | static const struct drm_i915_cmd_table gen7_video_cmds[] = { |
||
376 | { common_cmds, ARRAY_SIZE(common_cmds) }, |
||
377 | { video_cmds, ARRAY_SIZE(video_cmds) }, |
||
378 | }; |
||
379 | |||
380 | static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { |
||
381 | { common_cmds, ARRAY_SIZE(common_cmds) }, |
||
382 | { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, |
||
383 | }; |
||
384 | |||
385 | static const struct drm_i915_cmd_table gen7_blt_cmds[] = { |
||
386 | { common_cmds, ARRAY_SIZE(common_cmds) }, |
||
387 | { blt_cmds, ARRAY_SIZE(blt_cmds) }, |
||
388 | }; |
||
389 | |||
390 | static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { |
||
391 | { common_cmds, ARRAY_SIZE(common_cmds) }, |
||
392 | { blt_cmds, ARRAY_SIZE(blt_cmds) }, |
||
393 | { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, |
||
394 | }; |
||
395 | |||
396 | /* |
||
397 | * Register whitelists, sorted by increasing register offset. |
||
6084 | serge | 398 | */ |
399 | |||
400 | /* |
||
401 | * An individual whitelist entry granting access to register addr. If |
||
402 | * mask is non-zero the argument of immediate register writes will be |
||
403 | * AND-ed with mask, and the command will be rejected if the result |
||
404 | * doesn't match value. |
||
5060 | serge | 405 | * |
6084 | serge | 406 | * Registers with non-zero mask are only allowed to be written using |
407 | * LRI. |
||
408 | */ |
||
409 | struct drm_i915_reg_descriptor { |
||
410 | u32 addr; |
||
411 | u32 mask; |
||
412 | u32 value; |
||
413 | }; |
||
414 | |||
415 | /* Convenience macro for adding 32-bit registers. */ |
||
416 | #define REG32(address, ...) \ |
||
417 | { .addr = address, __VA_ARGS__ } |
||
418 | |||
419 | /* |
||
420 | * Convenience macro for adding 64-bit registers. |
||
421 | * |
||
5060 | serge | 422 | * Some registers that userspace accesses are 64 bits. The register |
423 | * access commands only allow 32-bit accesses. Hence, we have to include |
||
424 | * entries for both halves of the 64-bit registers. |
||
425 | */ |
||
6084 | serge | 426 | #define REG64(addr) \ |
427 | REG32(addr), REG32(addr + sizeof(u32)) |
||
5060 | serge | 428 | |
6084 | serge | 429 | static const struct drm_i915_reg_descriptor gen7_render_regs[] = { |
430 | REG64(GPGPU_THREADS_DISPATCHED), |
||
5060 | serge | 431 | REG64(HS_INVOCATION_COUNT), |
432 | REG64(DS_INVOCATION_COUNT), |
||
433 | REG64(IA_VERTICES_COUNT), |
||
434 | REG64(IA_PRIMITIVES_COUNT), |
||
435 | REG64(VS_INVOCATION_COUNT), |
||
436 | REG64(GS_INVOCATION_COUNT), |
||
437 | REG64(GS_PRIMITIVES_COUNT), |
||
438 | REG64(CL_INVOCATION_COUNT), |
||
439 | REG64(CL_PRIMITIVES_COUNT), |
||
440 | REG64(PS_INVOCATION_COUNT), |
||
441 | REG64(PS_DEPTH_COUNT), |
||
6084 | serge | 442 | REG32(OACONTROL), /* Only allowed for LRI and SRM. See below. */ |
5354 | serge | 443 | REG64(MI_PREDICATE_SRC0), |
444 | REG64(MI_PREDICATE_SRC1), |
||
6084 | serge | 445 | REG32(GEN7_3DPRIM_END_OFFSET), |
446 | REG32(GEN7_3DPRIM_START_VERTEX), |
||
447 | REG32(GEN7_3DPRIM_VERTEX_COUNT), |
||
448 | REG32(GEN7_3DPRIM_INSTANCE_COUNT), |
||
449 | REG32(GEN7_3DPRIM_START_INSTANCE), |
||
450 | REG32(GEN7_3DPRIM_BASE_VERTEX), |
||
451 | REG32(GEN7_GPGPU_DISPATCHDIMX), |
||
452 | REG32(GEN7_GPGPU_DISPATCHDIMY), |
||
453 | REG32(GEN7_GPGPU_DISPATCHDIMZ), |
||
5060 | serge | 454 | REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), |
455 | REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), |
||
456 | REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), |
||
457 | REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)), |
||
458 | REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)), |
||
459 | REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)), |
||
460 | REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)), |
||
461 | REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)), |
||
6084 | serge | 462 | REG32(GEN7_SO_WRITE_OFFSET(0)), |
463 | REG32(GEN7_SO_WRITE_OFFSET(1)), |
||
464 | REG32(GEN7_SO_WRITE_OFFSET(2)), |
||
465 | REG32(GEN7_SO_WRITE_OFFSET(3)), |
||
466 | REG32(GEN7_L3SQCREG1), |
||
467 | REG32(GEN7_L3CNTLREG2), |
||
468 | REG32(GEN7_L3CNTLREG3), |
||
469 | REG32(HSW_SCRATCH1, |
||
470 | .mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE, |
||
471 | .value = 0), |
||
472 | REG32(HSW_ROW_CHICKEN3, |
||
473 | .mask = ~(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE << 16 | |
||
474 | HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), |
||
475 | .value = 0), |
||
5060 | serge | 476 | }; |
477 | |||
6084 | serge | 478 | static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { |
479 | REG32(BCS_SWCTRL), |
||
5060 | serge | 480 | }; |
481 | |||
6084 | serge | 482 | static const struct drm_i915_reg_descriptor ivb_master_regs[] = { |
483 | REG32(FORCEWAKE_MT), |
||
484 | REG32(DERRMR), |
||
485 | REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), |
||
486 | REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), |
||
487 | REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), |
||
5060 | serge | 488 | }; |
489 | |||
6084 | serge | 490 | static const struct drm_i915_reg_descriptor hsw_master_regs[] = { |
491 | REG32(FORCEWAKE_MT), |
||
492 | REG32(DERRMR), |
||
5060 | serge | 493 | }; |
494 | |||
495 | #undef REG64 |
||
6084 | serge | 496 | #undef REG32 |
5060 | serge | 497 | |
498 | static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) |
||
499 | { |
||
500 | u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; |
||
501 | u32 subclient = |
||
502 | (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; |
||
503 | |||
504 | if (client == INSTR_MI_CLIENT) |
||
505 | return 0x3F; |
||
506 | else if (client == INSTR_RC_CLIENT) { |
||
507 | if (subclient == INSTR_MEDIA_SUBCLIENT) |
||
508 | return 0xFFFF; |
||
509 | else |
||
510 | return 0xFF; |
||
511 | } |
||
512 | |||
513 | DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header); |
||
514 | return 0; |
||
515 | } |
||
516 | |||
517 | static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header) |
||
518 | { |
||
519 | u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; |
||
520 | u32 subclient = |
||
521 | (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; |
||
6084 | serge | 522 | u32 op = (cmd_header & INSTR_26_TO_24_MASK) >> INSTR_26_TO_24_SHIFT; |
5060 | serge | 523 | |
524 | if (client == INSTR_MI_CLIENT) |
||
525 | return 0x3F; |
||
526 | else if (client == INSTR_RC_CLIENT) { |
||
6084 | serge | 527 | if (subclient == INSTR_MEDIA_SUBCLIENT) { |
528 | if (op == 6) |
||
529 | return 0xFFFF; |
||
530 | else |
||
531 | return 0xFFF; |
||
532 | } else |
||
5060 | serge | 533 | return 0xFF; |
534 | } |
||
535 | |||
536 | DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header); |
||
537 | return 0; |
||
538 | } |
||
539 | |||
540 | static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) |
||
541 | { |
||
542 | u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; |
||
543 | |||
544 | if (client == INSTR_MI_CLIENT) |
||
545 | return 0x3F; |
||
546 | else if (client == INSTR_BC_CLIENT) |
||
547 | return 0xFF; |
||
548 | |||
549 | DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); |
||
550 | return 0; |
||
551 | } |
||
552 | |||
553 | static bool validate_cmds_sorted(struct intel_engine_cs *ring, |
||
554 | const struct drm_i915_cmd_table *cmd_tables, |
||
555 | int cmd_table_count) |
||
556 | { |
||
557 | int i; |
||
558 | bool ret = true; |
||
559 | |||
560 | if (!cmd_tables || cmd_table_count == 0) |
||
561 | return true; |
||
562 | |||
563 | for (i = 0; i < cmd_table_count; i++) { |
||
564 | const struct drm_i915_cmd_table *table = &cmd_tables[i]; |
||
565 | u32 previous = 0; |
||
566 | int j; |
||
567 | |||
568 | for (j = 0; j < table->count; j++) { |
||
569 | const struct drm_i915_cmd_descriptor *desc = |
||
6084 | serge | 570 | &table->table[j]; |
5060 | serge | 571 | u32 curr = desc->cmd.value & desc->cmd.mask; |
572 | |||
573 | if (curr < previous) { |
||
574 | DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", |
||
575 | ring->id, i, j, curr, previous); |
||
576 | ret = false; |
||
577 | } |
||
578 | |||
579 | previous = curr; |
||
580 | } |
||
581 | } |
||
582 | |||
583 | return ret; |
||
584 | } |
||
585 | |||
6084 | serge | 586 | static bool check_sorted(int ring_id, |
587 | const struct drm_i915_reg_descriptor *reg_table, |
||
588 | int reg_count) |
||
5060 | serge | 589 | { |
590 | int i; |
||
591 | u32 previous = 0; |
||
592 | bool ret = true; |
||
593 | |||
594 | for (i = 0; i < reg_count; i++) { |
||
6084 | serge | 595 | u32 curr = reg_table[i].addr; |
5060 | serge | 596 | |
597 | if (curr < previous) { |
||
598 | DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", |
||
599 | ring_id, i, curr, previous); |
||
600 | ret = false; |
||
601 | } |
||
602 | |||
603 | previous = curr; |
||
604 | } |
||
605 | |||
606 | return ret; |
||
607 | } |
||
608 | |||
609 | static bool validate_regs_sorted(struct intel_engine_cs *ring) |
||
610 | { |
||
611 | return check_sorted(ring->id, ring->reg_table, ring->reg_count) && |
||
612 | check_sorted(ring->id, ring->master_reg_table, |
||
613 | ring->master_reg_count); |
||
614 | } |
||
615 | |||
616 | struct cmd_node { |
||
617 | const struct drm_i915_cmd_descriptor *desc; |
||
618 | struct hlist_node node; |
||
619 | }; |
||
620 | |||
621 | /* |
||
622 | * Different command ranges have different numbers of bits for the opcode. For |
||
623 | * example, MI commands use bits 31:23 while 3D commands use bits 31:16. The |
||
624 | * problem is that, for example, MI commands use bits 22:16 for other fields |
||
625 | * such as GGTT vs PPGTT bits. If we include those bits in the mask then when |
||
626 | * we mask a command from a batch it could hash to the wrong bucket due to |
||
627 | * non-opcode bits being set. But if we don't include those bits, some 3D |
||
628 | * commands may hash to the same bucket due to not including opcode bits that |
||
629 | * make the command unique. For now, we will risk hashing to the same bucket. |
||
630 | * |
||
631 | * If we attempt to generate a perfect hash, we should be able to look at bits |
||
632 | * 31:29 of a command from a batch buffer and use the full mask for that |
||
633 | * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this. |
||
634 | */ |
||
635 | #define CMD_HASH_MASK STD_MI_OPCODE_MASK |
||
636 | |||
637 | static int init_hash_table(struct intel_engine_cs *ring, |
||
638 | const struct drm_i915_cmd_table *cmd_tables, |
||
639 | int cmd_table_count) |
||
640 | { |
||
641 | int i, j; |
||
642 | |||
643 | hash_init(ring->cmd_hash); |
||
644 | |||
645 | for (i = 0; i < cmd_table_count; i++) { |
||
646 | const struct drm_i915_cmd_table *table = &cmd_tables[i]; |
||
647 | |||
648 | for (j = 0; j < table->count; j++) { |
||
649 | const struct drm_i915_cmd_descriptor *desc = |
||
650 | &table->table[j]; |
||
651 | struct cmd_node *desc_node = |
||
652 | kmalloc(sizeof(*desc_node), GFP_KERNEL); |
||
653 | |||
654 | if (!desc_node) |
||
655 | return -ENOMEM; |
||
656 | |||
657 | desc_node->desc = desc; |
||
658 | hash_add(ring->cmd_hash, &desc_node->node, |
||
659 | desc->cmd.value & CMD_HASH_MASK); |
||
660 | } |
||
661 | } |
||
662 | |||
663 | return 0; |
||
664 | } |
||
665 | |||
666 | static void fini_hash_table(struct intel_engine_cs *ring) |
||
667 | { |
||
668 | struct hlist_node *tmp; |
||
669 | struct cmd_node *desc_node; |
||
670 | int i; |
||
671 | |||
672 | hash_for_each_safe(ring->cmd_hash, i, tmp, desc_node, node) { |
||
673 | hash_del(&desc_node->node); |
||
674 | kfree(desc_node); |
||
675 | } |
||
676 | } |
||
677 | |||
678 | /** |
||
679 | * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer |
||
680 | * @ring: the ringbuffer to initialize |
||
681 | * |
||
682 | * Optionally initializes fields related to batch buffer command parsing in the |
||
683 | * struct intel_engine_cs based on whether the platform requires software |
||
684 | * command parsing. |
||
685 | * |
||
686 | * Return: non-zero if initialization fails |
||
687 | */ |
||
688 | int i915_cmd_parser_init_ring(struct intel_engine_cs *ring) |
||
689 | { |
||
690 | const struct drm_i915_cmd_table *cmd_tables; |
||
691 | int cmd_table_count; |
||
692 | int ret; |
||
693 | |||
694 | if (!IS_GEN7(ring->dev)) |
||
695 | return 0; |
||
696 | |||
697 | switch (ring->id) { |
||
698 | case RCS: |
||
699 | if (IS_HASWELL(ring->dev)) { |
||
700 | cmd_tables = hsw_render_ring_cmds; |
||
701 | cmd_table_count = |
||
702 | ARRAY_SIZE(hsw_render_ring_cmds); |
||
703 | } else { |
||
704 | cmd_tables = gen7_render_cmds; |
||
705 | cmd_table_count = ARRAY_SIZE(gen7_render_cmds); |
||
706 | } |
||
707 | |||
708 | ring->reg_table = gen7_render_regs; |
||
709 | ring->reg_count = ARRAY_SIZE(gen7_render_regs); |
||
710 | |||
711 | if (IS_HASWELL(ring->dev)) { |
||
712 | ring->master_reg_table = hsw_master_regs; |
||
713 | ring->master_reg_count = ARRAY_SIZE(hsw_master_regs); |
||
714 | } else { |
||
715 | ring->master_reg_table = ivb_master_regs; |
||
716 | ring->master_reg_count = ARRAY_SIZE(ivb_master_regs); |
||
717 | } |
||
718 | |||
719 | ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask; |
||
720 | break; |
||
721 | case VCS: |
||
722 | cmd_tables = gen7_video_cmds; |
||
723 | cmd_table_count = ARRAY_SIZE(gen7_video_cmds); |
||
724 | ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; |
||
725 | break; |
||
726 | case BCS: |
||
727 | if (IS_HASWELL(ring->dev)) { |
||
728 | cmd_tables = hsw_blt_ring_cmds; |
||
729 | cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); |
||
730 | } else { |
||
731 | cmd_tables = gen7_blt_cmds; |
||
732 | cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); |
||
733 | } |
||
734 | |||
735 | ring->reg_table = gen7_blt_regs; |
||
736 | ring->reg_count = ARRAY_SIZE(gen7_blt_regs); |
||
737 | |||
738 | if (IS_HASWELL(ring->dev)) { |
||
739 | ring->master_reg_table = hsw_master_regs; |
||
740 | ring->master_reg_count = ARRAY_SIZE(hsw_master_regs); |
||
741 | } else { |
||
742 | ring->master_reg_table = ivb_master_regs; |
||
743 | ring->master_reg_count = ARRAY_SIZE(ivb_master_regs); |
||
744 | } |
||
745 | |||
746 | ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; |
||
747 | break; |
||
748 | case VECS: |
||
749 | cmd_tables = hsw_vebox_cmds; |
||
750 | cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); |
||
751 | /* VECS can use the same length_mask function as VCS */ |
||
752 | ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; |
||
753 | break; |
||
754 | default: |
||
755 | DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", |
||
756 | ring->id); |
||
757 | BUG(); |
||
758 | } |
||
759 | |||
760 | BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count)); |
||
761 | BUG_ON(!validate_regs_sorted(ring)); |
||
762 | |||
6084 | serge | 763 | WARN_ON(!hash_empty(ring->cmd_hash)); |
764 | |||
5060 | serge | 765 | ret = init_hash_table(ring, cmd_tables, cmd_table_count); |
766 | if (ret) { |
||
767 | DRM_ERROR("CMD: cmd_parser_init failed!\n"); |
||
768 | fini_hash_table(ring); |
||
769 | return ret; |
||
770 | } |
||
771 | |||
772 | ring->needs_cmd_parser = true; |
||
773 | |||
774 | return 0; |
||
775 | } |
||
776 | |||
777 | /** |
||
778 | * i915_cmd_parser_fini_ring() - clean up cmd parser related fields |
||
779 | * @ring: the ringbuffer to clean up |
||
780 | * |
||
781 | * Releases any resources related to command parsing that may have been |
||
782 | * initialized for the specified ring. |
||
783 | */ |
||
784 | void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring) |
||
785 | { |
||
786 | if (!ring->needs_cmd_parser) |
||
787 | return; |
||
788 | |||
789 | fini_hash_table(ring); |
||
790 | } |
||
791 | |||
792 | static const struct drm_i915_cmd_descriptor* |
||
793 | find_cmd_in_table(struct intel_engine_cs *ring, |
||
794 | u32 cmd_header) |
||
795 | { |
||
796 | struct cmd_node *desc_node; |
||
797 | |||
798 | hash_for_each_possible(ring->cmd_hash, desc_node, node, |
||
799 | cmd_header & CMD_HASH_MASK) { |
||
800 | const struct drm_i915_cmd_descriptor *desc = desc_node->desc; |
||
801 | u32 masked_cmd = desc->cmd.mask & cmd_header; |
||
802 | u32 masked_value = desc->cmd.value & desc->cmd.mask; |
||
803 | |||
804 | if (masked_cmd == masked_value) |
||
805 | return desc; |
||
806 | } |
||
807 | |||
808 | return NULL; |
||
809 | } |
||
810 | |||
811 | /* |
||
812 | * Returns a pointer to a descriptor for the command specified by cmd_header. |
||
813 | * |
||
814 | * The caller must supply space for a default descriptor via the default_desc |
||
815 | * parameter. If no descriptor for the specified command exists in the ring's |
||
816 | * command parser tables, this function fills in default_desc based on the |
||
817 | * ring's default length encoding and returns default_desc. |
||
818 | */ |
||
819 | static const struct drm_i915_cmd_descriptor* |
||
820 | find_cmd(struct intel_engine_cs *ring, |
||
821 | u32 cmd_header, |
||
822 | struct drm_i915_cmd_descriptor *default_desc) |
||
823 | { |
||
824 | const struct drm_i915_cmd_descriptor *desc; |
||
825 | u32 mask; |
||
826 | |||
827 | desc = find_cmd_in_table(ring, cmd_header); |
||
828 | if (desc) |
||
829 | return desc; |
||
830 | |||
831 | mask = ring->get_cmd_length_mask(cmd_header); |
||
832 | if (!mask) |
||
833 | return NULL; |
||
834 | |||
835 | BUG_ON(!default_desc); |
||
836 | default_desc->flags = CMD_DESC_SKIP; |
||
837 | default_desc->length.mask = mask; |
||
838 | |||
839 | return default_desc; |
||
840 | } |
||
841 | |||
6084 | serge | 842 | static const struct drm_i915_reg_descriptor * |
843 | find_reg(const struct drm_i915_reg_descriptor *table, |
||
844 | int count, u32 addr) |
||
5060 | serge | 845 | { |
6084 | serge | 846 | if (table) { |
5060 | serge | 847 | int i; |
848 | |||
849 | for (i = 0; i < count; i++) { |
||
6084 | serge | 850 | if (table[i].addr == addr) |
851 | return &table[i]; |
||
5060 | serge | 852 | } |
853 | } |
||
854 | |||
6084 | serge | 855 | return NULL; |
5060 | serge | 856 | } |
857 | |||
6084 | serge | 858 | static u32 *vmap_batch(struct drm_i915_gem_object *obj, |
859 | unsigned start, unsigned len) |
||
5060 | serge | 860 | { |
861 | int i; |
||
862 | void *addr = NULL; |
||
863 | struct sg_page_iter sg_iter; |
||
6084 | serge | 864 | int first_page = start >> PAGE_SHIFT; |
865 | int last_page = (len + start + 4095) >> PAGE_SHIFT; |
||
866 | int npages = last_page - first_page; |
||
5060 | serge | 867 | struct page **pages; |
868 | |||
6088 | serge | 869 | pages = kmalloc(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); |
5060 | serge | 870 | if (pages == NULL) { |
871 | DRM_DEBUG_DRIVER("Failed to get space for pages\n"); |
||
872 | goto finish; |
||
873 | } |
||
874 | |||
875 | i = 0; |
||
6084 | serge | 876 | for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) { |
877 | pages[i++] = sg_page_iter_page(&sg_iter); |
||
878 | if (i == npages) |
||
879 | break; |
||
5060 | serge | 880 | } |
881 | |||
6084 | serge | 882 | addr = vmap(pages, i, 0, PAGE_KERNEL); |
5060 | serge | 883 | if (addr == NULL) { |
884 | DRM_DEBUG_DRIVER("Failed to vmap pages\n"); |
||
885 | goto finish; |
||
886 | } |
||
887 | |||
888 | finish: |
||
889 | if (pages) |
||
6084 | serge | 890 | drm_free_large(pages); |
5060 | serge | 891 | return (u32*)addr; |
892 | } |
||
893 | |||
6084 | serge | 894 | /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ |
895 | static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, |
||
896 | struct drm_i915_gem_object *src_obj, |
||
897 | u32 batch_start_offset, |
||
898 | u32 batch_len) |
||
899 | { |
||
900 | int needs_clflush = 0; |
||
901 | void *src_base, *src; |
||
902 | void *dst = NULL; |
||
903 | int ret; |
||
904 | |||
905 | if (batch_len > dest_obj->base.size || |
||
906 | batch_len + batch_start_offset > src_obj->base.size) |
||
907 | return ERR_PTR(-E2BIG); |
||
908 | |||
909 | if (WARN_ON(dest_obj->pages_pin_count == 0)) |
||
910 | return ERR_PTR(-ENODEV); |
||
911 | |||
912 | ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush); |
||
913 | if (ret) { |
||
914 | DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); |
||
915 | return ERR_PTR(ret); |
||
916 | } |
||
917 | |||
918 | src_base = vmap_batch(src_obj, batch_start_offset, batch_len); |
||
919 | if (!src_base) { |
||
920 | DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); |
||
921 | ret = -ENOMEM; |
||
922 | goto unpin_src; |
||
923 | } |
||
924 | |||
925 | ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); |
||
926 | if (ret) { |
||
927 | DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); |
||
928 | goto unmap_src; |
||
929 | } |
||
930 | |||
931 | dst = vmap_batch(dest_obj, 0, batch_len); |
||
932 | if (!dst) { |
||
933 | DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); |
||
934 | ret = -ENOMEM; |
||
935 | goto unmap_src; |
||
936 | } |
||
937 | |||
938 | src = src_base + offset_in_page(batch_start_offset); |
||
939 | if (needs_clflush) |
||
940 | drm_clflush_virt_range(src, batch_len); |
||
941 | |||
942 | memcpy(dst, src, batch_len); |
||
943 | |||
944 | unmap_src: |
||
945 | vunmap(src_base); |
||
946 | unpin_src: |
||
947 | i915_gem_object_unpin_pages(src_obj); |
||
948 | |||
949 | return ret ? ERR_PTR(ret) : dst; |
||
950 | } |
||
951 | |||
5060 | serge | 952 | /** |
953 | * i915_needs_cmd_parser() - should a given ring use software command parsing? |
||
954 | * @ring: the ring in question |
||
955 | * |
||
956 | * Only certain platforms require software batch buffer command parsing, and |
||
5354 | serge | 957 | * only when enabled via module parameter. |
5060 | serge | 958 | * |
959 | * Return: true if the ring requires software command parsing |
||
960 | */ |
||
961 | bool i915_needs_cmd_parser(struct intel_engine_cs *ring) |
||
962 | { |
||
963 | if (!ring->needs_cmd_parser) |
||
964 | return false; |
||
965 | |||
5354 | serge | 966 | if (!USES_PPGTT(ring->dev)) |
5060 | serge | 967 | return false; |
968 | |||
969 | return (i915.enable_cmd_parser == 1); |
||
970 | } |
||
971 | |||
972 | static bool check_cmd(const struct intel_engine_cs *ring, |
||
973 | const struct drm_i915_cmd_descriptor *desc, |
||
6084 | serge | 974 | const u32 *cmd, u32 length, |
5060 | serge | 975 | const bool is_master, |
976 | bool *oacontrol_set) |
||
977 | { |
||
978 | if (desc->flags & CMD_DESC_REJECT) { |
||
979 | DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); |
||
980 | return false; |
||
981 | } |
||
982 | |||
983 | if ((desc->flags & CMD_DESC_MASTER) && !is_master) { |
||
984 | DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", |
||
985 | *cmd); |
||
986 | return false; |
||
987 | } |
||
988 | |||
989 | if (desc->flags & CMD_DESC_REGISTER) { |
||
990 | /* |
||
6084 | serge | 991 | * Get the distance between individual register offset |
992 | * fields if the command can perform more than one |
||
993 | * access at a time. |
||
5060 | serge | 994 | */ |
6084 | serge | 995 | const u32 step = desc->reg.step ? desc->reg.step : length; |
996 | u32 offset; |
||
5060 | serge | 997 | |
6084 | serge | 998 | for (offset = desc->reg.offset; offset < length; |
999 | offset += step) { |
||
1000 | const u32 reg_addr = cmd[offset] & desc->reg.mask; |
||
1001 | const struct drm_i915_reg_descriptor *reg = |
||
1002 | find_reg(ring->reg_table, ring->reg_count, |
||
1003 | reg_addr); |
||
5060 | serge | 1004 | |
6084 | serge | 1005 | if (!reg && is_master) |
1006 | reg = find_reg(ring->master_reg_table, |
||
1007 | ring->master_reg_count, |
||
1008 | reg_addr); |
||
1009 | |||
1010 | if (!reg) { |
||
5060 | serge | 1011 | DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", |
6084 | serge | 1012 | reg_addr, *cmd, ring->id); |
5060 | serge | 1013 | return false; |
1014 | } |
||
6084 | serge | 1015 | |
1016 | /* |
||
1017 | * OACONTROL requires some special handling for |
||
1018 | * writes. We want to make sure that any batch which |
||
1019 | * enables OA also disables it before the end of the |
||
1020 | * batch. The goal is to prevent one process from |
||
1021 | * snooping on the perf data from another process. To do |
||
1022 | * that, we need to check the value that will be written |
||
1023 | * to the register. Hence, limit OACONTROL writes to |
||
1024 | * only MI_LOAD_REGISTER_IMM commands. |
||
1025 | */ |
||
1026 | if (reg_addr == OACONTROL) { |
||
1027 | if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { |
||
1028 | DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); |
||
1029 | return false; |
||
1030 | } |
||
1031 | |||
1032 | if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) |
||
1033 | *oacontrol_set = (cmd[offset + 1] != 0); |
||
1034 | } |
||
1035 | |||
1036 | /* |
||
1037 | * Check the value written to the register against the |
||
1038 | * allowed mask/value pair given in the whitelist entry. |
||
1039 | */ |
||
1040 | if (reg->mask) { |
||
1041 | if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { |
||
1042 | DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", |
||
1043 | reg_addr); |
||
1044 | return false; |
||
1045 | } |
||
1046 | |||
1047 | if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && |
||
1048 | (offset + 2 > length || |
||
1049 | (cmd[offset + 1] & reg->mask) != reg->value)) { |
||
1050 | DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", |
||
1051 | reg_addr); |
||
1052 | return false; |
||
1053 | } |
||
1054 | } |
||
5060 | serge | 1055 | } |
1056 | } |
||
1057 | |||
1058 | if (desc->flags & CMD_DESC_BITMASK) { |
||
1059 | int i; |
||
1060 | |||
1061 | for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) { |
||
1062 | u32 dword; |
||
1063 | |||
1064 | if (desc->bits[i].mask == 0) |
||
1065 | break; |
||
1066 | |||
1067 | if (desc->bits[i].condition_mask != 0) { |
||
1068 | u32 offset = |
||
1069 | desc->bits[i].condition_offset; |
||
1070 | u32 condition = cmd[offset] & |
||
1071 | desc->bits[i].condition_mask; |
||
1072 | |||
1073 | if (condition == 0) |
||
1074 | continue; |
||
1075 | } |
||
1076 | |||
1077 | dword = cmd[desc->bits[i].offset] & |
||
1078 | desc->bits[i].mask; |
||
1079 | |||
1080 | if (dword != desc->bits[i].expected) { |
||
1081 | DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", |
||
1082 | *cmd, |
||
1083 | desc->bits[i].mask, |
||
1084 | desc->bits[i].expected, |
||
1085 | dword, ring->id); |
||
1086 | return false; |
||
1087 | } |
||
1088 | } |
||
1089 | } |
||
1090 | |||
1091 | return true; |
||
1092 | } |
||
1093 | |||
1094 | #define LENGTH_BIAS 2 |
||
1095 | |||
1096 | #if 0 |
||
1097 | /** |
||
1098 | * i915_parse_cmds() - parse a submitted batch buffer for privilege violations |
||
1099 | * @ring: the ring on which the batch is to execute |
||
1100 | * @batch_obj: the batch buffer in question |
||
6084 | serge | 1101 | * @shadow_batch_obj: copy of the batch buffer in question |
5060 | serge | 1102 | * @batch_start_offset: byte offset in the batch at which execution starts |
6084 | serge | 1103 | * @batch_len: length of the commands in batch_obj |
5060 | serge | 1104 | * @is_master: is the submitting process the drm master? |
1105 | * |
||
1106 | * Parses the specified batch buffer looking for privilege violations as |
||
1107 | * described in the overview. |
||
1108 | * |
||
5354 | serge | 1109 | * Return: non-zero if the parser finds violations or otherwise fails; -EACCES |
1110 | * if the batch appears legal but should use hardware parsing |
||
5060 | serge | 1111 | */ |
1112 | int i915_parse_cmds(struct intel_engine_cs *ring, |
||
1113 | struct drm_i915_gem_object *batch_obj, |
||
6084 | serge | 1114 | struct drm_i915_gem_object *shadow_batch_obj, |
5060 | serge | 1115 | u32 batch_start_offset, |
6084 | serge | 1116 | u32 batch_len, |
5060 | serge | 1117 | bool is_master) |
1118 | { |
||
1119 | u32 *cmd, *batch_base, *batch_end; |
||
1120 | struct drm_i915_cmd_descriptor default_desc = { 0 }; |
||
1121 | bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ |
||
6084 | serge | 1122 | int ret = 0; |
5060 | serge | 1123 | |
6084 | serge | 1124 | batch_base = copy_batch(shadow_batch_obj, batch_obj, |
1125 | batch_start_offset, batch_len); |
||
1126 | if (IS_ERR(batch_base)) { |
||
1127 | DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); |
||
1128 | return PTR_ERR(batch_base); |
||
5060 | serge | 1129 | } |
1130 | |||
6084 | serge | 1131 | /* |
1132 | * We use the batch length as size because the shadow object is as |
||
1133 | * large or larger and copy_batch() will write MI_NOPs to the extra |
||
1134 | * space. Parsing should be faster in some cases this way. |
||
1135 | */ |
||
1136 | batch_end = batch_base + (batch_len / sizeof(*batch_end)); |
||
5060 | serge | 1137 | |
6084 | serge | 1138 | cmd = batch_base; |
5060 | serge | 1139 | while (cmd < batch_end) { |
1140 | const struct drm_i915_cmd_descriptor *desc; |
||
1141 | u32 length; |
||
1142 | |||
1143 | if (*cmd == MI_BATCH_BUFFER_END) |
||
1144 | break; |
||
1145 | |||
1146 | desc = find_cmd(ring, *cmd, &default_desc); |
||
1147 | if (!desc) { |
||
1148 | DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", |
||
1149 | *cmd); |
||
1150 | ret = -EINVAL; |
||
1151 | break; |
||
1152 | } |
||
1153 | |||
5354 | serge | 1154 | /* |
1155 | * If the batch buffer contains a chained batch, return an |
||
1156 | * error that tells the caller to abort and dispatch the |
||
1157 | * workload as a non-secure batch. |
||
1158 | */ |
||
1159 | if (desc->cmd.value == MI_BATCH_BUFFER_START) { |
||
1160 | ret = -EACCES; |
||
1161 | break; |
||
1162 | } |
||
1163 | |||
5060 | serge | 1164 | if (desc->flags & CMD_DESC_FIXED) |
1165 | length = desc->length.fixed; |
||
1166 | else |
||
1167 | length = ((*cmd & desc->length.mask) + LENGTH_BIAS); |
||
1168 | |||
1169 | if ((batch_end - cmd) < length) { |
||
1170 | DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", |
||
1171 | *cmd, |
||
1172 | length, |
||
1173 | batch_end - cmd); |
||
1174 | ret = -EINVAL; |
||
1175 | break; |
||
1176 | } |
||
1177 | |||
6084 | serge | 1178 | if (!check_cmd(ring, desc, cmd, length, is_master, |
1179 | &oacontrol_set)) { |
||
5060 | serge | 1180 | ret = -EINVAL; |
1181 | break; |
||
1182 | } |
||
1183 | |||
1184 | cmd += length; |
||
1185 | } |
||
1186 | |||
1187 | if (oacontrol_set) { |
||
1188 | DRM_DEBUG_DRIVER("CMD: batch set OACONTROL but did not clear it\n"); |
||
1189 | ret = -EINVAL; |
||
1190 | } |
||
1191 | |||
1192 | if (cmd >= batch_end) { |
||
1193 | DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); |
||
1194 | ret = -EINVAL; |
||
1195 | } |
||
1196 | |||
6084 | serge | 1197 | vunmap(batch_base); |
5060 | serge | 1198 | |
1199 | return ret; |
||
1200 | } |
||
1201 | #endif |
||
1202 | |||
1203 | /** |
||
1204 | * i915_cmd_parser_get_version() - get the cmd parser version number |
||
1205 | * |
||
1206 | * The cmd parser maintains a simple increasing integer version number suitable |
||
1207 | * for passing to userspace clients to determine what operations are permitted. |
||
1208 | * |
||
1209 | * Return: the current version number of the cmd parser |
||
1210 | */ |
||
1211 | int i915_cmd_parser_get_version(void) |
||
1212 | { |
||
1213 | /* |
||
1214 | * Command parser version history |
||
1215 | * |
||
1216 | * 1. Initial version. Checks batches and reports violations, but leaves |
||
1217 | * hardware parsing enabled (so does not allow new use cases). |
||
5354 | serge | 1218 | * 2. Allow access to the MI_PREDICATE_SRC0 and |
1219 | * MI_PREDICATE_SRC1 registers. |
||
6084 | serge | 1220 | * 3. Allow access to the GPGPU_THREADS_DISPATCHED register. |
1221 | * 4. L3 atomic chicken bits of HSW_SCRATCH1 and HSW_ROW_CHICKEN3. |
||
1222 | * 5. GPGPU dispatch compute indirect registers. |
||
5060 | serge | 1223 | */ |
6084 | serge | 1224 | return 5; |
5060 | serge | 1225 | }>>>>>>>>>>>>><> |