123,8 → 123,8 |
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ), |
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ), |
CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, |
.reg = { .offset = 1, .mask = 0x007FFFFC } ), |
CMD( MI_STORE_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, |
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), |
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B, |
.reg = { .offset = 1, .mask = 0x007FFFFC }, |
.bits = {{ |
.offset = 0, |
131,7 → 131,7 |
.mask = MI_GLOBAL_GTT, |
.expected = 0, |
}}, ), |
CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, |
CMD( MI_LOAD_REGISTER_MEM, SMI, F, 3, W | B, |
.reg = { .offset = 1, .mask = 0x007FFFFC }, |
.bits = {{ |
.offset = 0, |
151,6 → 151,7 |
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
CMD( MI_PREDICATE, SMI, F, 1, S ), |
CMD( MI_TOPOLOGY_FILTER, SMI, F, 1, S ), |
CMD( MI_SET_APPID, SMI, F, 1, S ), |
CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), |
CMD( MI_SET_CONTEXT, SMI, !F, 0xFF, R ), |
CMD( MI_URB_CLEAR, SMI, !F, 0xFF, S ), |
210,6 → 211,7 |
CMD( MI_SET_PREDICATE, SMI, F, 1, S ), |
CMD( MI_RS_CONTROL, SMI, F, 1, S ), |
CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), |
CMD( MI_SET_APPID, SMI, F, 1, S ), |
CMD( MI_RS_CONTEXT, SMI, F, 1, S ), |
CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), |
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), |
229,6 → 231,7 |
|
static const struct drm_i915_cmd_descriptor video_cmds[] = { |
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
CMD( MI_SET_APPID, SMI, F, 1, S ), |
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, |
.bits = {{ |
.offset = 0, |
272,6 → 275,7 |
|
static const struct drm_i915_cmd_descriptor vecs_cmds[] = { |
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), |
CMD( MI_SET_APPID, SMI, F, 1, S ), |
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, |
.bits = {{ |
.offset = 0, |
391,16 → 395,39 |
|
/* |
* Register whitelists, sorted by increasing register offset. |
*/ |
|
/* |
* An individual whitelist entry granting access to register addr. If |
* mask is non-zero the argument of immediate register writes will be |
* AND-ed with mask, and the command will be rejected if the result |
* doesn't match value. |
* |
* Registers with non-zero mask are only allowed to be written using |
* LRI. |
*/ |
struct drm_i915_reg_descriptor { |
u32 addr; |
u32 mask; |
u32 value; |
}; |
|
/* Convenience macro for adding 32-bit registers. */ |
#define REG32(address, ...) \ |
{ .addr = address, __VA_ARGS__ } |
|
/* |
* Convenience macro for adding 64-bit registers. |
* |
* Some registers that userspace accesses are 64 bits. The register |
* access commands only allow 32-bit accesses. Hence, we have to include |
* entries for both halves of the 64-bit registers. |
*/ |
#define REG64(addr) \ |
REG32(addr), REG32(addr + sizeof(u32)) |
|
/* Convenience macro for adding 64-bit registers */ |
#define REG64(addr) (addr), (addr + sizeof(u32)) |
|
static const u32 gen7_render_regs[] = { |
static const struct drm_i915_reg_descriptor gen7_render_regs[] = { |
REG64(GPGPU_THREADS_DISPATCHED), |
REG64(HS_INVOCATION_COUNT), |
REG64(DS_INVOCATION_COUNT), |
REG64(IA_VERTICES_COUNT), |
412,15 → 439,18 |
REG64(CL_PRIMITIVES_COUNT), |
REG64(PS_INVOCATION_COUNT), |
REG64(PS_DEPTH_COUNT), |
OACONTROL, /* Only allowed for LRI and SRM. See below. */ |
REG32(OACONTROL), /* Only allowed for LRI and SRM. See below. */ |
REG64(MI_PREDICATE_SRC0), |
REG64(MI_PREDICATE_SRC1), |
GEN7_3DPRIM_END_OFFSET, |
GEN7_3DPRIM_START_VERTEX, |
GEN7_3DPRIM_VERTEX_COUNT, |
GEN7_3DPRIM_INSTANCE_COUNT, |
GEN7_3DPRIM_START_INSTANCE, |
GEN7_3DPRIM_BASE_VERTEX, |
REG32(GEN7_3DPRIM_END_OFFSET), |
REG32(GEN7_3DPRIM_START_VERTEX), |
REG32(GEN7_3DPRIM_VERTEX_COUNT), |
REG32(GEN7_3DPRIM_INSTANCE_COUNT), |
REG32(GEN7_3DPRIM_START_INSTANCE), |
REG32(GEN7_3DPRIM_BASE_VERTEX), |
REG32(GEN7_GPGPU_DISPATCHDIMX), |
REG32(GEN7_GPGPU_DISPATCHDIMY), |
REG32(GEN7_GPGPU_DISPATCHDIMZ), |
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)), |
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)), |
REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)), |
429,33 → 459,41 |
REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)), |
REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)), |
REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)), |
GEN7_SO_WRITE_OFFSET(0), |
GEN7_SO_WRITE_OFFSET(1), |
GEN7_SO_WRITE_OFFSET(2), |
GEN7_SO_WRITE_OFFSET(3), |
GEN7_L3SQCREG1, |
GEN7_L3CNTLREG2, |
GEN7_L3CNTLREG3, |
REG32(GEN7_SO_WRITE_OFFSET(0)), |
REG32(GEN7_SO_WRITE_OFFSET(1)), |
REG32(GEN7_SO_WRITE_OFFSET(2)), |
REG32(GEN7_SO_WRITE_OFFSET(3)), |
REG32(GEN7_L3SQCREG1), |
REG32(GEN7_L3CNTLREG2), |
REG32(GEN7_L3CNTLREG3), |
REG32(HSW_SCRATCH1, |
.mask = ~HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE, |
.value = 0), |
REG32(HSW_ROW_CHICKEN3, |
.mask = ~(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE << 16 | |
HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), |
.value = 0), |
}; |
|
static const u32 gen7_blt_regs[] = { |
BCS_SWCTRL, |
static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { |
REG32(BCS_SWCTRL), |
}; |
|
static const u32 ivb_master_regs[] = { |
FORCEWAKE_MT, |
DERRMR, |
GEN7_PIPE_DE_LOAD_SL(PIPE_A), |
GEN7_PIPE_DE_LOAD_SL(PIPE_B), |
GEN7_PIPE_DE_LOAD_SL(PIPE_C), |
static const struct drm_i915_reg_descriptor ivb_master_regs[] = { |
REG32(FORCEWAKE_MT), |
REG32(DERRMR), |
REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), |
REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), |
REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), |
}; |
|
static const u32 hsw_master_regs[] = { |
FORCEWAKE_MT, |
DERRMR, |
static const struct drm_i915_reg_descriptor hsw_master_regs[] = { |
REG32(FORCEWAKE_MT), |
REG32(DERRMR), |
}; |
|
#undef REG64 |
#undef REG32 |
|
static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) |
{ |
481,13 → 519,17 |
u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; |
u32 subclient = |
(cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; |
u32 op = (cmd_header & INSTR_26_TO_24_MASK) >> INSTR_26_TO_24_SHIFT; |
|
if (client == INSTR_MI_CLIENT) |
return 0x3F; |
else if (client == INSTR_RC_CLIENT) { |
if (subclient == INSTR_MEDIA_SUBCLIENT) |
if (subclient == INSTR_MEDIA_SUBCLIENT) { |
if (op == 6) |
return 0xFFFF; |
else |
return 0xFFF; |
else |
} else |
return 0xFF; |
} |
|
525,7 → 567,7 |
|
for (j = 0; j < table->count; j++) { |
const struct drm_i915_cmd_descriptor *desc = |
&table->table[i]; |
&table->table[j]; |
u32 curr = desc->cmd.value & desc->cmd.mask; |
|
if (curr < previous) { |
541,7 → 583,9 |
return ret; |
} |
|
static bool check_sorted(int ring_id, const u32 *reg_table, int reg_count) |
static bool check_sorted(int ring_id, |
const struct drm_i915_reg_descriptor *reg_table, |
int reg_count) |
{ |
int i; |
u32 previous = 0; |
548,7 → 592,7 |
bool ret = true; |
|
for (i = 0; i < reg_count; i++) { |
u32 curr = reg_table[i]; |
u32 curr = reg_table[i].addr; |
|
if (curr < previous) { |
DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", |
716,7 → 760,8 |
BUG_ON(!validate_cmds_sorted(ring, cmd_tables, cmd_table_count)); |
BUG_ON(!validate_regs_sorted(ring)); |
|
if (hash_empty(ring->cmd_hash)) { |
WARN_ON(!hash_empty(ring->cmd_hash)); |
|
ret = init_hash_table(ring, cmd_tables, cmd_table_count); |
if (ret) { |
DRM_ERROR("CMD: cmd_parser_init failed!\n"); |
723,7 → 768,6 |
fini_hash_table(ring); |
return ret; |
} |
} |
|
ring->needs_cmd_parser = true; |
|
795,25 → 839,31 |
return default_desc; |
} |
|
static bool valid_reg(const u32 *table, int count, u32 addr) |
static const struct drm_i915_reg_descriptor * |
find_reg(const struct drm_i915_reg_descriptor *table, |
int count, u32 addr) |
{ |
if (table && count != 0) { |
if (table) { |
int i; |
|
for (i = 0; i < count; i++) { |
if (table[i] == addr) |
return true; |
if (table[i].addr == addr) |
return &table[i]; |
} |
} |
|
return false; |
return NULL; |
} |
|
static u32 *vmap_batch(struct drm_i915_gem_object *obj) |
static u32 *vmap_batch(struct drm_i915_gem_object *obj, |
unsigned start, unsigned len) |
{ |
int i; |
void *addr = NULL; |
struct sg_page_iter sg_iter; |
int first_page = start >> PAGE_SHIFT; |
int last_page = (len + start + 4095) >> PAGE_SHIFT; |
int npages = last_page - first_page; |
struct page **pages; |
|
pages = kmalloc(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); |
823,12 → 873,13 |
} |
|
i = 0; |
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { |
pages[i] = sg_page_iter_page(&sg_iter); |
i++; |
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) { |
pages[i++] = sg_page_iter_page(&sg_iter); |
if (i == npages) |
break; |
} |
|
// addr = vmap(pages, i, 0, PAGE_KERNEL); |
addr = vmap(pages, i, 0, PAGE_KERNEL); |
if (addr == NULL) { |
DRM_DEBUG_DRIVER("Failed to vmap pages\n"); |
goto finish; |
836,10 → 887,68 |
|
finish: |
if (pages) |
free(pages); |
drm_free_large(pages); |
return (u32*)addr; |
} |
|
/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ |
static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, |
struct drm_i915_gem_object *src_obj, |
u32 batch_start_offset, |
u32 batch_len) |
{ |
int needs_clflush = 0; |
void *src_base, *src; |
void *dst = NULL; |
int ret; |
|
if (batch_len > dest_obj->base.size || |
batch_len + batch_start_offset > src_obj->base.size) |
return ERR_PTR(-E2BIG); |
|
if (WARN_ON(dest_obj->pages_pin_count == 0)) |
return ERR_PTR(-ENODEV); |
|
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush); |
if (ret) { |
DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); |
return ERR_PTR(ret); |
} |
|
src_base = vmap_batch(src_obj, batch_start_offset, batch_len); |
if (!src_base) { |
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); |
ret = -ENOMEM; |
goto unpin_src; |
} |
|
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); |
if (ret) { |
DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); |
goto unmap_src; |
} |
|
dst = vmap_batch(dest_obj, 0, batch_len); |
if (!dst) { |
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); |
ret = -ENOMEM; |
goto unmap_src; |
} |
|
src = src_base + offset_in_page(batch_start_offset); |
if (needs_clflush) |
drm_clflush_virt_range(src, batch_len); |
|
memcpy(dst, src, batch_len); |
|
unmap_src: |
vunmap(src_base); |
unpin_src: |
i915_gem_object_unpin_pages(src_obj); |
|
return ret ? ERR_PTR(ret) : dst; |
} |
|
/** |
* i915_needs_cmd_parser() - should a given ring use software command parsing? |
* @ring: the ring in question |
862,7 → 971,7 |
|
static bool check_cmd(const struct intel_engine_cs *ring, |
const struct drm_i915_cmd_descriptor *desc, |
const u32 *cmd, |
const u32 *cmd, u32 length, |
const bool is_master, |
bool *oacontrol_set) |
{ |
878,16 → 987,41 |
} |
|
if (desc->flags & CMD_DESC_REGISTER) { |
u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask; |
/* |
* Get the distance between individual register offset |
* fields if the command can perform more than one |
* access at a time. |
*/ |
const u32 step = desc->reg.step ? desc->reg.step : length; |
u32 offset; |
|
for (offset = desc->reg.offset; offset < length; |
offset += step) { |
const u32 reg_addr = cmd[offset] & desc->reg.mask; |
const struct drm_i915_reg_descriptor *reg = |
find_reg(ring->reg_table, ring->reg_count, |
reg_addr); |
|
if (!reg && is_master) |
reg = find_reg(ring->master_reg_table, |
ring->master_reg_count, |
reg_addr); |
|
if (!reg) { |
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", |
reg_addr, *cmd, ring->id); |
return false; |
} |
|
/* |
* OACONTROL requires some special handling for writes. We |
* want to make sure that any batch which enables OA also |
* disables it before the end of the batch. The goal is to |
* prevent one process from snooping on the perf data from |
* another process. To do that, we need to check the value |
* that will be written to the register. Hence, limit |
* OACONTROL writes to only MI_LOAD_REGISTER_IMM commands. |
* OACONTROL requires some special handling for |
* writes. We want to make sure that any batch which |
* enables OA also disables it before the end of the |
* batch. The goal is to prevent one process from |
* snooping on the perf data from another process. To do |
* that, we need to check the value that will be written |
* to the register. Hence, limit OACONTROL writes to |
* only MI_LOAD_REGISTER_IMM commands. |
*/ |
if (reg_addr == OACONTROL) { |
if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { |
896,23 → 1030,30 |
} |
|
if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1)) |
*oacontrol_set = (cmd[2] != 0); |
*oacontrol_set = (cmd[offset + 1] != 0); |
} |
|
if (!valid_reg(ring->reg_table, |
ring->reg_count, reg_addr)) { |
if (!is_master || |
!valid_reg(ring->master_reg_table, |
ring->master_reg_count, |
reg_addr)) { |
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", |
reg_addr, |
*cmd, |
ring->id); |
/* |
* Check the value written to the register against the |
* allowed mask/value pair given in the whitelist entry. |
*/ |
if (reg->mask) { |
if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { |
DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", |
reg_addr); |
return false; |
} |
|
if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && |
(offset + 2 > length || |
(cmd[offset + 1] & reg->mask) != reg->value)) { |
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n", |
reg_addr); |
return false; |
} |
} |
} |
} |
|
if (desc->flags & CMD_DESC_BITMASK) { |
int i; |
957,7 → 1098,9 |
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations |
* @ring: the ring on which the batch is to execute |
* @batch_obj: the batch buffer in question |
* @shadow_batch_obj: copy of the batch buffer in question |
* @batch_start_offset: byte offset in the batch at which execution starts |
* @batch_len: length of the commands in batch_obj |
* @is_master: is the submitting process the drm master? |
* |
* Parses the specified batch buffer looking for privilege violations as |
968,34 → 1111,31 |
*/ |
int i915_parse_cmds(struct intel_engine_cs *ring, |
struct drm_i915_gem_object *batch_obj, |
struct drm_i915_gem_object *shadow_batch_obj, |
u32 batch_start_offset, |
u32 batch_len, |
bool is_master) |
{ |
int ret = 0; |
u32 *cmd, *batch_base, *batch_end; |
struct drm_i915_cmd_descriptor default_desc = { 0 }; |
int needs_clflush = 0; |
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ |
int ret = 0; |
|
ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush); |
if (ret) { |
DRM_DEBUG_DRIVER("CMD: failed to prep read\n"); |
return ret; |
batch_base = copy_batch(shadow_batch_obj, batch_obj, |
batch_start_offset, batch_len); |
if (IS_ERR(batch_base)) { |
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); |
return PTR_ERR(batch_base); |
} |
|
batch_base = vmap_batch(batch_obj); |
if (!batch_base) { |
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); |
i915_gem_object_unpin_pages(batch_obj); |
return -ENOMEM; |
} |
/* |
* We use the batch length as size because the shadow object is as |
* large or larger and copy_batch() will write MI_NOPs to the extra |
* space. Parsing should be faster in some cases this way. |
*/ |
batch_end = batch_base + (batch_len / sizeof(*batch_end)); |
|
if (needs_clflush) |
drm_clflush_virt_range((char *)batch_base, batch_obj->base.size); |
|
cmd = batch_base + (batch_start_offset / sizeof(*cmd)); |
batch_end = cmd + (batch_obj->base.size / sizeof(*batch_end)); |
|
cmd = batch_base; |
while (cmd < batch_end) { |
const struct drm_i915_cmd_descriptor *desc; |
u32 length; |
1035,7 → 1175,8 |
break; |
} |
|
if (!check_cmd(ring, desc, cmd, is_master, &oacontrol_set)) { |
if (!check_cmd(ring, desc, cmd, length, is_master, |
&oacontrol_set)) { |
ret = -EINVAL; |
break; |
} |
1053,10 → 1194,8 |
ret = -EINVAL; |
} |
|
// vunmap(batch_base); |
vunmap(batch_base); |
|
i915_gem_object_unpin_pages(batch_obj); |
|
return ret; |
} |
#endif |
1078,6 → 1217,9 |
* hardware parsing enabled (so does not allow new use cases). |
* 2. Allow access to the MI_PREDICATE_SRC0 and |
* MI_PREDICATE_SRC1 registers. |
* 3. Allow access to the GPGPU_THREADS_DISPATCHED register. |
* 4. L3 atomic chicken bits of HSW_SCRATCH1 and HSW_ROW_CHICKEN3. |
* 5. GPGPU dispatch compute indirect registers. |
*/ |
return 2; |
return 5; |
} |