/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_buffer.h |
---|
0,0 → 1,90 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUFFER_H |
#define ILO_BUFFER_H |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
struct ilo_buffer { |
unsigned bo_size; |
struct intel_bo *bo; |
}; |
static inline void |
ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev, |
unsigned size, uint32_t bind, uint32_t flags) |
{ |
buf->bo_size = size; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 118: |
* |
* "For buffers, which have no inherent "height," padding requirements |
* are different. A buffer must be padded to the next multiple of 256 |
* array elements, with an additional 16 bytes added beyond that to |
* account for the L1 cache line." |
*/ |
if (bind & PIPE_BIND_SAMPLER_VIEW) |
buf->bo_size = align(buf->bo_size, 256) + 16; |
if ((bind & PIPE_BIND_VERTEX_BUFFER) && ilo_dev_gen(dev) < ILO_GEN(7.5)) { |
/* |
* As noted in ilo_format_translate(), we treat some 3-component formats |
* as 4-component formats to work around hardware limitations. Imagine |
* the case where the vertex buffer holds a single |
* PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. The |
* hardware would fail to fetch it at boundary check because the vertex |
* buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex |
* and that takes at least 8 bytes. |
* |
* For the workaround to work, we should add 2 to the bo size. But that |
* would waste a page when the bo size is already page aligned. Let's |
* round it to page size for now and revisit this when needed. |
*/ |
buf->bo_size = align(buf->bo_size, 4096); |
} |
} |
static inline void |
ilo_buffer_cleanup(struct ilo_buffer *buf) |
{ |
intel_bo_unref(buf->bo); |
} |
static inline void |
ilo_buffer_set_bo(struct ilo_buffer *buf, struct intel_bo *bo) |
{ |
intel_bo_unref(buf->bo); |
buf->bo = intel_bo_ref(bo); |
} |
#endif /* ILO_BUFFER_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder.c |
---|
0,0 → 1,495 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "ilo_builder.h" |
#include "ilo_builder_render.h" /* for ilo_builder_batch_patch_sba() */ |
enum ilo_builder_writer_flags { |
/* |
* When this bit is set, ilo_builder_begin() will not realllocate. New |
* data will be appended instead. |
*/ |
WRITER_FLAG_APPEND = 1 << 0, |
/* |
* When this bit is set, the writer grows when full. When not, callers |
* must make sure the writer never needs to grow. |
*/ |
WRITER_FLAG_GROW = 1 << 1, |
/* |
* The writer will be mapped directly. |
*/ |
WRITER_FLAG_MAP = 1 << 2, |
}; |
/** |
* Set the initial size and flags of a writer. |
*/ |
static void |
ilo_builder_writer_init(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
switch (which) { |
case ILO_BUILDER_WRITER_BATCH: |
writer->size = sizeof(uint32_t) * 8192; |
break; |
case ILO_BUILDER_WRITER_INSTRUCTION: |
/* |
* The EUs pretch some instructions. But since the kernel invalidates |
* the instruction cache between batch buffers, we can set |
* WRITER_FLAG_APPEND without worrying the EUs would see invalid |
* instructions prefetched. |
*/ |
writer->flags = WRITER_FLAG_APPEND | WRITER_FLAG_GROW; |
writer->size = 8192; |
break; |
default: |
assert(!"unknown builder writer"); |
return; |
break; |
} |
if (builder->dev->has_llc) |
writer->flags |= WRITER_FLAG_MAP; |
} |
/** |
* Free all resources used by a writer. Note that the initial size is not |
* reset. |
*/ |
static void |
ilo_builder_writer_reset(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
if (writer->ptr) { |
if (writer->flags & WRITER_FLAG_MAP) |
intel_bo_unmap(writer->bo); |
else |
FREE(writer->ptr); |
writer->ptr = NULL; |
} |
intel_bo_unref(writer->bo); |
writer->bo = NULL; |
writer->used = 0; |
writer->stolen = 0; |
if (writer->items) { |
FREE(writer->items); |
writer->item_alloc = 0; |
writer->item_used = 0; |
} |
} |
/** |
* Discard everything written so far. |
*/ |
void |
ilo_builder_writer_discard(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
intel_bo_truncate_relocs(writer->bo, 0); |
writer->used = 0; |
writer->stolen = 0; |
writer->item_used = 0; |
} |
static struct intel_bo * |
alloc_writer_bo(struct intel_winsys *winsys, |
enum ilo_builder_writer_type which, |
unsigned size) |
{ |
static const char *writer_names[ILO_BUILDER_WRITER_COUNT] = { |
[ILO_BUILDER_WRITER_BATCH] = "batch", |
[ILO_BUILDER_WRITER_INSTRUCTION] = "instruction", |
}; |
return intel_winsys_alloc_bo(winsys, writer_names[which], size, true); |
} |
static void * |
map_writer_bo(struct intel_bo *bo, unsigned flags) |
{ |
assert(flags & WRITER_FLAG_MAP); |
if (flags & WRITER_FLAG_APPEND) |
return intel_bo_map_gtt_async(bo); |
else |
return intel_bo_map(bo, true); |
} |
/** |
* Allocate and map the buffer for writing. |
*/ |
static bool |
ilo_builder_writer_alloc_and_map(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
/* allocate a new bo when not appending */ |
if (!(writer->flags & WRITER_FLAG_APPEND) || !writer->bo) { |
struct intel_bo *bo; |
bo = alloc_writer_bo(builder->winsys, which, writer->size); |
if (bo) { |
intel_bo_unref(writer->bo); |
writer->bo = bo; |
} else if (writer->bo) { |
/* reuse the old bo */ |
ilo_builder_writer_discard(builder, which); |
} else { |
return false; |
} |
writer->used = 0; |
writer->stolen = 0; |
writer->item_used = 0; |
} |
/* map the bo or allocate the staging system memory */ |
if (writer->flags & WRITER_FLAG_MAP) |
writer->ptr = map_writer_bo(writer->bo, writer->flags); |
else if (!writer->ptr) |
writer->ptr = MALLOC(writer->size); |
return (writer->ptr != NULL); |
} |
/** |
* Unmap the buffer for submission. |
*/ |
static bool |
ilo_builder_writer_unmap(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
unsigned offset; |
int err = 0; |
if (writer->flags & WRITER_FLAG_MAP) { |
intel_bo_unmap(writer->bo); |
writer->ptr = NULL; |
return true; |
} |
offset = builder->begin_used[which]; |
if (writer->used > offset) { |
err = intel_bo_pwrite(writer->bo, offset, writer->used - offset, |
(char *) writer->ptr + offset); |
} |
if (writer->stolen && !err) { |
const unsigned offset = writer->size - writer->stolen; |
err = intel_bo_pwrite(writer->bo, offset, writer->stolen, |
(const char *) writer->ptr + offset); |
} |
/* keep writer->ptr */ |
return !err; |
} |
/** |
* Grow a mapped writer to at least \p new_size. |
*/ |
bool |
ilo_builder_writer_grow(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned new_size, bool preserve) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
struct intel_bo *new_bo; |
void *new_ptr; |
if (!(writer->flags & WRITER_FLAG_GROW)) |
return false; |
/* stolen data may already be referenced and cannot be moved */ |
if (writer->stolen) |
return false; |
if (new_size < writer->size << 1) |
new_size = writer->size << 1; |
/* STATE_BASE_ADDRESS requires page-aligned buffers */ |
new_size = align(new_size, 4096); |
new_bo = alloc_writer_bo(builder->winsys, which, new_size); |
if (!new_bo) |
return false; |
/* map and copy the data over */ |
if (writer->flags & WRITER_FLAG_MAP) { |
new_ptr = map_writer_bo(new_bo, writer->flags); |
/* |
* When WRITER_FLAG_APPEND and WRITER_FLAG_GROW are both set, we may end |
* up copying between two GTT-mapped BOs. That is slow. The issue |
* could be solved by adding intel_bo_map_async(), or callers may choose |
* to manually grow the writer without preserving the data. |
*/ |
if (new_ptr && preserve) |
memcpy(new_ptr, writer->ptr, writer->used); |
} else if (preserve) { |
new_ptr = REALLOC(writer->ptr, writer->size, new_size); |
} else { |
new_ptr = MALLOC(new_size); |
} |
if (!new_ptr) { |
intel_bo_unref(new_bo); |
return false; |
} |
if (writer->flags & WRITER_FLAG_MAP) |
intel_bo_unmap(writer->bo); |
else if (!preserve) |
FREE(writer->ptr); |
intel_bo_unref(writer->bo); |
writer->size = new_size; |
writer->bo = new_bo; |
writer->ptr = new_ptr; |
return true; |
} |
/** |
* Record an item for later decoding. |
*/ |
bool |
ilo_builder_writer_record(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
enum ilo_builder_item_type type, |
unsigned offset, unsigned size) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
struct ilo_builder_item *item; |
if (writer->item_used == writer->item_alloc) { |
const unsigned new_alloc = (writer->item_alloc) ? |
writer->item_alloc << 1 : 256; |
struct ilo_builder_item *items; |
items = REALLOC(writer->items, |
sizeof(writer->items[0]) * writer->item_alloc, |
sizeof(writer->items[0]) * new_alloc); |
if (!items) |
return false; |
writer->items = items; |
writer->item_alloc = new_alloc; |
} |
item = &writer->items[writer->item_used++]; |
item->type = type; |
item->offset = offset; |
item->size = size; |
return true; |
} |
/** |
* Initialize the builder. |
*/ |
void |
ilo_builder_init(struct ilo_builder *builder, |
const struct ilo_dev *dev, |
struct intel_winsys *winsys) |
{ |
int i; |
memset(builder, 0, sizeof(*builder)); |
builder->dev = dev; |
builder->winsys = winsys; |
/* gen6_SURFACE_STATE() may override this */ |
switch (ilo_dev_gen(dev)) { |
case ILO_GEN(8): |
builder->mocs = GEN8_MOCS_MT_WB | GEN8_MOCS_CT_L3; |
break; |
case ILO_GEN(7.5): |
case ILO_GEN(7): |
builder->mocs = GEN7_MOCS_L3_WB; |
break; |
default: |
builder->mocs = 0; |
break; |
} |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) |
ilo_builder_writer_init(builder, i); |
} |
/** |
* Reset the builder and free all resources used. After resetting, the |
* builder behaves as if it is newly initialized, except for potentially |
* larger initial bo sizes. |
*/ |
void |
ilo_builder_reset(struct ilo_builder *builder) |
{ |
int i; |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) |
ilo_builder_writer_reset(builder, i); |
} |
/** |
* Allocate and map the BOs. It may re-allocate or reuse existing BOs if |
* there is any. |
* |
* Most builder functions can only be called after ilo_builder_begin() and |
* before ilo_builder_end(). |
*/ |
bool |
ilo_builder_begin(struct ilo_builder *builder) |
{ |
int i; |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) { |
if (!ilo_builder_writer_alloc_and_map(builder, i)) { |
ilo_builder_reset(builder); |
return false; |
} |
builder->begin_used[i] = builder->writers[i].used; |
} |
builder->unrecoverable_error = false; |
builder->sba_instruction_pos = 0; |
return true; |
} |
/** |
* Unmap BOs and make sure the written data landed the BOs. The batch buffer |
* ready for submission is returned. |
*/ |
struct intel_bo * |
ilo_builder_end(struct ilo_builder *builder, unsigned *used) |
{ |
struct ilo_builder_writer *bat; |
int i; |
ilo_builder_batch_patch_sba(builder); |
assert(ilo_builder_validate(builder, 0, NULL)); |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) { |
if (!ilo_builder_writer_unmap(builder, i)) |
builder->unrecoverable_error = true; |
} |
if (builder->unrecoverable_error) |
return NULL; |
bat = &builder->writers[ILO_BUILDER_WRITER_BATCH]; |
*used = bat->used; |
return bat->bo; |
} |
/** |
* Return true if the builder is in a valid state, after accounting for the |
* additional BOs specified. The additional BOs can be listed to avoid |
* snapshotting and restoring when they are known ahead of time. |
* |
* The number of additional BOs should not be more than a few. Like two, for |
* copying between two BOs. |
* |
* Callers must make sure the builder is in a valid state when |
* ilo_builder_end() is called. |
*/ |
bool |
ilo_builder_validate(struct ilo_builder *builder, |
unsigned bo_count, struct intel_bo **bos) |
{ |
const unsigned max_bo_count = 2; |
struct intel_bo *bos_to_submit[ILO_BUILDER_WRITER_COUNT + max_bo_count]; |
int i; |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) |
bos_to_submit[i] = builder->writers[i].bo; |
if (bo_count) { |
assert(bo_count <= max_bo_count); |
if (bo_count > max_bo_count) |
return false; |
memcpy(&bos_to_submit[ILO_BUILDER_WRITER_COUNT], |
bos, sizeof(*bos) * bo_count); |
i += bo_count; |
} |
return intel_winsys_can_submit_bo(builder->winsys, bos_to_submit, i); |
} |
/** |
* Take a snapshot of the writer state. |
*/ |
void |
ilo_builder_batch_snapshot(const struct ilo_builder *builder, |
struct ilo_builder_snapshot *snapshot) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const struct ilo_builder_writer *writer = &builder->writers[which]; |
snapshot->reloc_count = intel_bo_get_reloc_count(writer->bo); |
snapshot->used = writer->used; |
snapshot->stolen = writer->stolen; |
snapshot->item_used = writer->item_used; |
} |
/** |
* Restore the writer state to when the snapshot was taken, except that it |
* does not (unnecessarily) shrink BOs or the item array. |
*/ |
void |
ilo_builder_batch_restore(struct ilo_builder *builder, |
const struct ilo_builder_snapshot *snapshot) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
struct ilo_builder_writer *writer = &builder->writers[which]; |
intel_bo_truncate_relocs(writer->bo, snapshot->reloc_count); |
writer->used = snapshot->used; |
writer->stolen = snapshot->stolen; |
writer->item_used = snapshot->item_used; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder.h |
---|
0,0 → 1,557 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_H |
#define ILO_BUILDER_H |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_debug.h" |
#include "ilo_dev.h" |
enum ilo_builder_writer_type { |
ILO_BUILDER_WRITER_BATCH, |
ILO_BUILDER_WRITER_INSTRUCTION, |
ILO_BUILDER_WRITER_COUNT, |
}; |
enum ilo_builder_item_type { |
/* for dynamic buffer */ |
ILO_BUILDER_ITEM_BLOB, |
ILO_BUILDER_ITEM_CLIP_VIEWPORT, |
ILO_BUILDER_ITEM_SF_VIEWPORT, |
ILO_BUILDER_ITEM_SCISSOR_RECT, |
ILO_BUILDER_ITEM_CC_VIEWPORT, |
ILO_BUILDER_ITEM_COLOR_CALC, |
ILO_BUILDER_ITEM_DEPTH_STENCIL, |
ILO_BUILDER_ITEM_BLEND, |
ILO_BUILDER_ITEM_SAMPLER, |
ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, |
/* for surface buffer */ |
ILO_BUILDER_ITEM_SURFACE, |
ILO_BUILDER_ITEM_BINDING_TABLE, |
/* for instruction buffer */ |
ILO_BUILDER_ITEM_KERNEL, |
ILO_BUILDER_ITEM_COUNT, |
}; |
struct ilo_builder_item { |
enum ilo_builder_item_type type; |
unsigned offset; |
unsigned size; |
}; |
struct ilo_builder_writer { |
/* internal flags */ |
unsigned flags; |
unsigned size; |
struct intel_bo *bo; |
void *ptr; |
/* data written to the bottom */ |
unsigned used; |
/* data written to the top */ |
unsigned stolen; |
/* for decoding */ |
struct ilo_builder_item *items; |
unsigned item_alloc; |
unsigned item_used; |
}; |
/** |
* A snapshot of the writer state. |
*/ |
struct ilo_builder_snapshot { |
unsigned reloc_count; |
unsigned used; |
unsigned stolen; |
unsigned item_used; |
}; |
struct ilo_builder { |
const struct ilo_dev *dev; |
struct intel_winsys *winsys; |
uint32_t mocs; |
struct ilo_builder_writer writers[ILO_BUILDER_WRITER_COUNT]; |
bool unrecoverable_error; |
/* for writers that have their data appended */ |
unsigned begin_used[ILO_BUILDER_WRITER_COUNT]; |
/* for STATE_BASE_ADDRESS */ |
unsigned sba_instruction_pos; |
}; |
void |
ilo_builder_init(struct ilo_builder *builder, |
const struct ilo_dev *dev, |
struct intel_winsys *winsys); |
void |
ilo_builder_reset(struct ilo_builder *builder); |
void |
ilo_builder_decode(struct ilo_builder *builder); |
bool |
ilo_builder_begin(struct ilo_builder *builder); |
struct intel_bo * |
ilo_builder_end(struct ilo_builder *builder, unsigned *used); |
bool |
ilo_builder_validate(struct ilo_builder *builder, |
unsigned bo_count, struct intel_bo **bos); |
/** |
* Return true if the builder has a relocation entry for \p bo. |
*/ |
static inline bool |
ilo_builder_has_reloc(const struct ilo_builder *builder, |
struct intel_bo *bo) |
{ |
int i; |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) { |
const struct ilo_builder_writer *writer = &builder->writers[i]; |
if (intel_bo_has_reloc(writer->bo, bo)) |
return true; |
} |
return false; |
} |
void |
ilo_builder_writer_discard(struct ilo_builder *builder, |
enum ilo_builder_writer_type which); |
bool |
ilo_builder_writer_grow(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned new_size, bool preserve); |
bool |
ilo_builder_writer_record(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
enum ilo_builder_item_type type, |
unsigned offset, unsigned size); |
static inline void |
ilo_builder_writer_checked_record(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
enum ilo_builder_item_type item, |
unsigned offset, unsigned size) |
{ |
if (unlikely(ilo_debug & (ILO_DEBUG_BATCH | ILO_DEBUG_HANG))) { |
if (!ilo_builder_writer_record(builder, which, item, offset, size)) { |
builder->unrecoverable_error = true; |
builder->writers[which].item_used = 0; |
} |
} |
} |
/** |
* Return an offset to a region that is aligned to \p alignment and has at |
* least \p size bytes. The region is reserved from the bottom. |
*/ |
static inline unsigned |
ilo_builder_writer_reserve_bottom(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned alignment, unsigned size) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
unsigned offset; |
assert(alignment && util_is_power_of_two(alignment)); |
offset = align(writer->used, alignment); |
if (unlikely(offset + size > writer->size - writer->stolen)) { |
if (!ilo_builder_writer_grow(builder, which, |
offset + size + writer->stolen, true)) { |
builder->unrecoverable_error = true; |
ilo_builder_writer_discard(builder, which); |
offset = 0; |
} |
assert(offset + size <= writer->size - writer->stolen); |
} |
return offset; |
} |
/** |
* Similar to ilo_builder_writer_reserve_bottom(), but reserve from the top. |
*/ |
static inline unsigned |
ilo_builder_writer_reserve_top(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned alignment, unsigned size) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
unsigned offset; |
assert(alignment && util_is_power_of_two(alignment)); |
offset = (writer->size - writer->stolen - size) & ~(alignment - 1); |
if (unlikely(offset < writer->used || |
size > writer->size - writer->stolen)) { |
if (!ilo_builder_writer_grow(builder, which, |
align(writer->used, alignment) + size + writer->stolen, true)) { |
builder->unrecoverable_error = true; |
ilo_builder_writer_discard(builder, which); |
} |
offset = (writer->size - writer->stolen - size) & ~(alignment - 1); |
assert(offset + size <= writer->size - writer->stolen); |
} |
return offset; |
} |
/** |
* Add a relocation entry to the writer. |
*/ |
static inline void |
ilo_builder_writer_reloc(struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned offset, struct intel_bo *bo, |
unsigned bo_offset, unsigned reloc_flags, |
bool write_presumed_offset_hi) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
uint64_t presumed_offset; |
int err; |
if (write_presumed_offset_hi) |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
else |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
assert(offset + sizeof(uint32_t) <= writer->used || |
(offset >= writer->size - writer->stolen && |
offset + sizeof(uint32_t) <= writer->size)); |
err = intel_bo_add_reloc(writer->bo, offset, bo, bo_offset, |
reloc_flags, &presumed_offset); |
if (unlikely(err)) |
builder->unrecoverable_error = true; |
if (write_presumed_offset_hi) { |
*((uint64_t *) ((char *) writer->ptr + offset)) = presumed_offset; |
} else { |
/* 32-bit addressing */ |
assert(presumed_offset == (uint64_t) ((uint32_t) presumed_offset)); |
*((uint32_t *) ((char *) writer->ptr + offset)) = presumed_offset; |
} |
} |
/** |
* Reserve a region from the dynamic buffer. Both the offset, in bytes, and |
* the pointer to the reserved region are returned. The pointer is only valid |
* until the next reserve call. |
* |
* Note that \p alignment is in bytes and \p len is in DWords. |
*/ |
static inline uint32_t |
ilo_builder_dynamic_pointer(struct ilo_builder *builder, |
enum ilo_builder_item_type item, |
unsigned alignment, unsigned len, |
uint32_t **dw) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const unsigned size = len << 2; |
const unsigned offset = ilo_builder_writer_reserve_top(builder, |
which, alignment, size); |
struct ilo_builder_writer *writer = &builder->writers[which]; |
/* all states are at least aligned to 32-bytes */ |
if (item != ILO_BUILDER_ITEM_BLOB) |
assert(alignment % 32 == 0); |
*dw = (uint32_t *) ((char *) writer->ptr + offset); |
writer->stolen = writer->size - offset; |
ilo_builder_writer_checked_record(builder, which, item, offset, size); |
return offset; |
} |
/** |
* Write a dynamic state to the dynamic buffer. |
*/ |
static inline uint32_t |
ilo_builder_dynamic_write(struct ilo_builder *builder, |
enum ilo_builder_item_type item, |
unsigned alignment, unsigned len, |
const uint32_t *dw) |
{ |
uint32_t offset, *dst; |
offset = ilo_builder_dynamic_pointer(builder, item, alignment, len, &dst); |
memcpy(dst, dw, len << 2); |
return offset; |
} |
/** |
* Reserve some space from the top (for prefetches). |
*/ |
static inline void |
ilo_builder_dynamic_pad_top(struct ilo_builder *builder, unsigned len) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const unsigned size = len << 2; |
struct ilo_builder_writer *writer = &builder->writers[which]; |
if (writer->stolen < size) { |
ilo_builder_writer_reserve_top(builder, which, |
1, size - writer->stolen); |
writer->stolen = size; |
} |
} |
static inline unsigned |
ilo_builder_dynamic_used(const struct ilo_builder *builder) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const struct ilo_builder_writer *writer = &builder->writers[which]; |
return writer->stolen >> 2; |
} |
/** |
* Reserve a region from the surface buffer. Both the offset, in bytes, and |
* the pointer to the reserved region are returned. The pointer is only valid |
* until the next reserve call. |
* |
* Note that \p alignment is in bytes and \p len is in DWords. |
*/ |
static inline uint32_t |
ilo_builder_surface_pointer(struct ilo_builder *builder, |
enum ilo_builder_item_type item, |
unsigned alignment, unsigned len, |
uint32_t **dw) |
{ |
assert(item == ILO_BUILDER_ITEM_SURFACE || |
item == ILO_BUILDER_ITEM_BINDING_TABLE); |
return ilo_builder_dynamic_pointer(builder, item, alignment, len, dw); |
} |
/** |
* Add a relocation entry for a DWord of a surface state. |
*/ |
static inline void |
ilo_builder_surface_reloc(struct ilo_builder *builder, |
uint32_t offset, unsigned dw_index, |
struct intel_bo *bo, unsigned bo_offset, |
unsigned reloc_flags) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
ilo_builder_writer_reloc(builder, which, offset + (dw_index << 2), |
bo, bo_offset, reloc_flags, false); |
} |
static inline void |
ilo_builder_surface_reloc64(struct ilo_builder *builder, |
uint32_t offset, unsigned dw_index, |
struct intel_bo *bo, unsigned bo_offset, |
unsigned reloc_flags) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
ilo_builder_writer_reloc(builder, which, offset + (dw_index << 2), |
bo, bo_offset, reloc_flags, true); |
} |
static inline unsigned |
ilo_builder_surface_used(const struct ilo_builder *builder) |
{ |
return ilo_builder_dynamic_used(builder); |
} |
/** |
* Write a kernel to the instruction buffer. The offset, in bytes, of the |
* kernel is returned. |
*/ |
static inline uint32_t |
ilo_builder_instruction_write(struct ilo_builder *builder, |
unsigned size, const void *kernel) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_INSTRUCTION; |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 112: |
* |
* "Due to prefetch of the instruction stream, the EUs may attempt to |
* access up to 8 instructions (128 bytes) beyond the end of the |
* kernel program - possibly into the next memory page. Although |
* these instructions will not be executed, software must account for |
* the prefetch in order to avoid invalid page access faults." |
*/ |
const unsigned reserved_size = size + 128; |
/* kernels are aligned to 64 bytes */ |
const unsigned alignment = 64; |
const unsigned offset = ilo_builder_writer_reserve_bottom(builder, |
which, alignment, reserved_size); |
struct ilo_builder_writer *writer = &builder->writers[which]; |
memcpy((char *) writer->ptr + offset, kernel, size); |
writer->used = offset + size; |
ilo_builder_writer_checked_record(builder, which, |
ILO_BUILDER_ITEM_KERNEL, offset, size); |
return offset; |
} |
/** |
* Reserve a region from the batch buffer. Both the offset, in DWords, and |
* the pointer to the reserved region are returned. The pointer is only valid |
* until the next reserve call. |
* |
* Note that \p len is in DWords. |
*/ |
static inline unsigned |
ilo_builder_batch_pointer(struct ilo_builder *builder, |
unsigned len, uint32_t **dw) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
/* |
* We know the batch bo is always aligned. Using 1 here should allow the |
* compiler to optimize away aligning. |
*/ |
const unsigned alignment = 1; |
const unsigned size = len << 2; |
const unsigned offset = ilo_builder_writer_reserve_bottom(builder, |
which, alignment, size); |
struct ilo_builder_writer *writer = &builder->writers[which]; |
assert(offset % 4 == 0); |
*dw = (uint32_t *) ((char *) writer->ptr + offset); |
writer->used = offset + size; |
return offset >> 2; |
} |
/** |
* Write a command to the batch buffer. |
*/ |
static inline unsigned |
ilo_builder_batch_write(struct ilo_builder *builder, |
unsigned len, const uint32_t *dw) |
{ |
unsigned pos; |
uint32_t *dst; |
pos = ilo_builder_batch_pointer(builder, len, &dst); |
memcpy(dst, dw, len << 2); |
return pos; |
} |
/** |
* Add a relocation entry for a DWord of a command. |
*/ |
static inline void |
ilo_builder_batch_reloc(struct ilo_builder *builder, unsigned pos, |
struct intel_bo *bo, unsigned bo_offset, |
unsigned reloc_flags) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
ilo_builder_writer_reloc(builder, which, pos << 2, |
bo, bo_offset, reloc_flags, false); |
} |
static inline void |
ilo_builder_batch_reloc64(struct ilo_builder *builder, unsigned pos, |
struct intel_bo *bo, unsigned bo_offset, |
unsigned reloc_flags) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
ilo_builder_writer_reloc(builder, which, pos << 2, |
bo, bo_offset, reloc_flags, true); |
} |
static inline unsigned |
ilo_builder_batch_used(const struct ilo_builder *builder) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const struct ilo_builder_writer *writer = &builder->writers[which]; |
return writer->used >> 2; |
} |
static inline unsigned |
ilo_builder_batch_space(const struct ilo_builder *builder) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const struct ilo_builder_writer *writer = &builder->writers[which]; |
return (writer->size - writer->stolen - writer->used) >> 2; |
} |
static inline void |
ilo_builder_batch_discard(struct ilo_builder *builder) |
{ |
ilo_builder_writer_discard(builder, ILO_BUILDER_WRITER_BATCH); |
} |
static inline void |
ilo_builder_batch_print_stats(const struct ilo_builder *builder) |
{ |
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH; |
const struct ilo_builder_writer *writer = &builder->writers[which]; |
ilo_printf("%d+%d bytes (%d%% full)\n", |
writer->used, writer->stolen, |
(writer->used + writer->stolen) * 100 / writer->size); |
} |
void |
ilo_builder_batch_snapshot(const struct ilo_builder *builder, |
struct ilo_builder_snapshot *snapshot); |
void |
ilo_builder_batch_restore(struct ilo_builder *builder, |
const struct ilo_builder_snapshot *snapshot); |
#endif /* ILO_BUILDER_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_3d.h |
---|
0,0 → 1,92 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_3D_H |
#define ILO_BUILDER_3D_H |
#include "genhw/genhw.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_builder_3d_top.h" |
#include "ilo_builder_3d_bottom.h" |
static inline void |
gen6_3DPRIMITIVE(struct ilo_builder *builder, |
const struct pipe_draw_info *info, |
const struct ilo_ib_state *ib) |
{ |
const uint8_t cmd_len = 6; |
const int prim = gen6_3d_translate_pipe_prim(info->mode); |
const int vb_access = (info->indexed) ? |
GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; |
const uint32_t vb_start = info->start + |
((info->indexed) ? ib->draw_start_offset : 0); |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | |
vb_access | |
prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | |
(cmd_len - 2); |
dw[1] = info->count; |
dw[2] = vb_start; |
dw[3] = info->instance_count; |
dw[4] = info->start_instance; |
dw[5] = info->index_bias; |
} |
static inline void |
gen7_3DPRIMITIVE(struct ilo_builder *builder, |
const struct pipe_draw_info *info, |
const struct ilo_ib_state *ib) |
{ |
const uint8_t cmd_len = 7; |
const int prim = gen6_3d_translate_pipe_prim(info->mode); |
const int vb_access = (info->indexed) ? |
GEN7_3DPRIM_DW1_ACCESS_RANDOM : GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; |
const uint32_t vb_start = info->start + |
((info->indexed) ? ib->draw_start_offset : 0); |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); |
dw[1] = vb_access | prim; |
dw[2] = info->count; |
dw[3] = vb_start; |
dw[4] = info->instance_count; |
dw[5] = info->start_instance; |
dw[6] = info->index_bias; |
} |
#endif /* ILO_BUILDER_3D_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h |
---|
0,0 → 1,1815 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_3D_BOTTOM_H |
#define ILO_BUILDER_3D_BOTTOM_H |
#include "genhw/genhw.h" |
#include "../ilo_shader.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_format.h" |
#include "ilo_builder.h" |
#include "ilo_builder_3d_top.h" |
static inline void |
gen6_3DSTATE_CLIP(struct ilo_builder *builder, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
bool enable_guardband, |
int num_viewports) |
{ |
const uint8_t cmd_len = 4; |
uint32_t dw1, dw2, dw3, *dw; |
int interps; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
dw1 = rasterizer->clip.payload[0]; |
dw2 = rasterizer->clip.payload[1]; |
dw3 = rasterizer->clip.payload[2]; |
if (enable_guardband && rasterizer->clip.can_enable_guardband) |
dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; |
interps = (fs) ? ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; |
if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | |
GEN6_INTERP_NONPERSPECTIVE_CENTROID | |
GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) |
dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; |
dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | |
(num_viewports - 1); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); |
dw[1] = dw1; |
dw[2] = dw2; |
dw[3] = dw3; |
} |
static inline void |
gen6_disable_3DSTATE_CLIP(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
} |
static inline void |
gen7_internal_3dstate_sf(struct ilo_builder *builder, |
uint8_t cmd_len, uint32_t *dw, |
const struct ilo_rasterizer_sf *sf, |
int num_samples) |
{ |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
assert(cmd_len == 7); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); |
if (!sf) { |
dw[1] = 0; |
dw[2] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; |
dw[3] = 0; |
dw[4] = 0; |
dw[5] = 0; |
dw[6] = 0; |
return; |
} |
/* see rasterizer_init_sf_gen6() */ |
STATIC_ASSERT(Elements(sf->payload) >= 3); |
dw[1] = sf->payload[0]; |
dw[2] = sf->payload[1]; |
dw[3] = sf->payload[2]; |
if (num_samples > 1) |
dw[2] |= sf->dw_msaa; |
dw[4] = sf->dw_depth_offset_const; |
dw[5] = sf->dw_depth_offset_scale; |
dw[6] = sf->dw_depth_offset_clamp; |
} |
static inline void |
gen8_internal_3dstate_sbe(struct ilo_builder *builder, |
uint8_t cmd_len, uint32_t *dw, |
const struct ilo_shader_state *fs, |
int sprite_coord_mode) |
{ |
const struct ilo_kernel_routing *routing; |
int vue_offset, vue_len, out_count; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(cmd_len == 4); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); |
if (!fs) { |
dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; |
dw[2] = 0; |
dw[3] = 0; |
return; |
} |
routing = ilo_shader_get_kernel_routing(fs); |
vue_offset = routing->source_skip; |
assert(vue_offset % 2 == 0); |
vue_offset /= 2; |
vue_len = (routing->source_len + 1) / 2; |
if (!vue_len) |
vue_len = 1; |
out_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); |
assert(out_count <= 32); |
dw[1] = out_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | |
vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[1] |= GEN8_SBE_DW1_USE_URB_READ_LEN | |
GEN8_SBE_DW1_USE_URB_READ_OFFSET | |
vue_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT; |
} else { |
dw[1] |= vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; |
} |
if (routing->swizzle_enable) |
dw[1] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; |
switch (sprite_coord_mode) { |
case PIPE_SPRITE_COORD_UPPER_LEFT: |
dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; |
break; |
case PIPE_SPRITE_COORD_LOWER_LEFT: |
dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; |
break; |
} |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 268: |
* |
* "This field (Point Sprite Texture Coordinate Enable) must be |
* programmed to 0 when non-point primitives are rendered." |
* |
* TODO We do not check that yet. |
*/ |
dw[2] = routing->point_sprite_enable; |
dw[3] = routing->const_interp_enable; |
} |
static inline void |
gen8_internal_3dstate_sbe_swiz(struct ilo_builder *builder, |
uint8_t cmd_len, uint32_t *dw, |
const struct ilo_shader_state *fs) |
{ |
const struct ilo_kernel_routing *routing; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(cmd_len == 11); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2); |
if (!fs) { |
memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1)); |
return; |
} |
routing = ilo_shader_get_kernel_routing(fs); |
STATIC_ASSERT(sizeof(routing->swizzles) >= sizeof(*dw) * 8); |
memcpy(&dw[1], routing->swizzles, sizeof(*dw) * 8); |
/* WrapShortest enables */ |
dw[9] = 0; |
dw[10] = 0; |
} |
static inline void |
gen6_3DSTATE_SF(struct ilo_builder *builder, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
int sample_count) |
{ |
const uint8_t cmd_len = 20; |
uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11]; |
uint32_t gen7_3dstate_sf[7]; |
const struct ilo_rasterizer_sf *sf; |
int sprite_coord_mode; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
sf = (rasterizer) ? &rasterizer->sf : NULL; |
sprite_coord_mode = (rasterizer) ? rasterizer->state.sprite_coord_mode : 0; |
gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe), |
gen8_3dstate_sbe, fs, sprite_coord_mode); |
gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz), |
gen8_3dstate_sbe_swiz, fs); |
gen7_internal_3dstate_sf(builder, Elements(gen7_3dstate_sf), |
gen7_3dstate_sf, sf, sample_count); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); |
dw[1] = gen8_3dstate_sbe[1]; |
memcpy(&dw[2], &gen7_3dstate_sf[1], sizeof(*dw) * 6); |
memcpy(&dw[8], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8); |
dw[16] = gen8_3dstate_sbe[2]; |
dw[17] = gen8_3dstate_sbe[3]; |
dw[18] = gen8_3dstate_sbe_swiz[9]; |
dw[19] = gen8_3dstate_sbe_swiz[10]; |
} |
static inline void |
gen7_3DSTATE_SF(struct ilo_builder *builder, |
const struct ilo_rasterizer_sf *sf, |
enum pipe_format zs_format, |
int sample_count) |
{ |
const uint8_t cmd_len = 7; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
gen7_internal_3dstate_sf(builder, cmd_len, dw, sf, sample_count); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { |
int hw_format; |
/* separate stencil */ |
switch (zs_format) { |
case PIPE_FORMAT_Z16_UNORM: |
hw_format = GEN6_ZFORMAT_D16_UNORM; |
break; |
case PIPE_FORMAT_Z32_FLOAT: |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
hw_format = GEN6_ZFORMAT_D32_FLOAT; |
break; |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
hw_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; |
break; |
default: |
/* FLOAT surface is assumed when there is no depth buffer */ |
hw_format = GEN6_ZFORMAT_D32_FLOAT; |
break; |
} |
dw[1] |= hw_format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; |
} |
} |
static inline void |
gen8_3DSTATE_SF(struct ilo_builder *builder, |
const struct ilo_rasterizer_sf *sf) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); |
/* see rasterizer_init_sf_gen8() */ |
STATIC_ASSERT(Elements(sf->payload) >= 3); |
dw[1] = sf->payload[0]; |
dw[2] = sf->payload[1]; |
dw[3] = sf->payload[2]; |
} |
static inline void |
gen7_3DSTATE_SBE(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
int sprite_coord_mode) |
{ |
const uint8_t cmd_len = 14; |
uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11]; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe), |
gen8_3dstate_sbe, fs, sprite_coord_mode); |
gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz), |
gen8_3dstate_sbe_swiz, fs); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); |
dw[1] = gen8_3dstate_sbe[1]; |
memcpy(&dw[2], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8); |
dw[10] = gen8_3dstate_sbe[2]; |
dw[11] = gen8_3dstate_sbe[3]; |
dw[12] = gen8_3dstate_sbe_swiz[9]; |
dw[13] = gen8_3dstate_sbe_swiz[10]; |
} |
static inline void |
gen8_3DSTATE_SBE(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
int sprite_coord_mode) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
gen8_internal_3dstate_sbe(builder, cmd_len, dw, fs, sprite_coord_mode); |
} |
static inline void |
gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder, |
const struct ilo_shader_state *fs) |
{ |
const uint8_t cmd_len = 11; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
gen8_internal_3dstate_sbe_swiz(builder, cmd_len, dw, fs); |
} |
static inline void |
gen8_3DSTATE_RASTER(struct ilo_builder *builder, |
const struct ilo_rasterizer_sf *sf) |
{ |
const uint8_t cmd_len = 5; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_RASTER) | (cmd_len - 2); |
dw[1] = sf->dw_raster; |
dw[2] = sf->dw_depth_offset_const; |
dw[3] = sf->dw_depth_offset_scale; |
dw[4] = sf->dw_depth_offset_clamp; |
} |
static inline void |
gen6_3DSTATE_WM(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
const struct ilo_rasterizer_state *rasterizer, |
bool dual_blend, bool cc_may_kill) |
{ |
const uint8_t cmd_len = 9; |
const int num_samples = 1; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5, dw6, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
cso = ilo_shader_get_kernel_cso(fs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
dw6 = cso->payload[3]; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 248: |
* |
* "This bit (Statistics Enable) must be disabled if either of these |
* bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve |
* Enable or Depth Buffer Resolve Enable." |
*/ |
dw4 |= GEN6_WM_DW4_STATISTICS; |
if (cc_may_kill) |
dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE; |
if (dual_blend) |
dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; |
dw5 |= rasterizer->wm.payload[0]; |
dw6 |= rasterizer->wm.payload[1]; |
if (num_samples > 1) { |
dw6 |= rasterizer->wm.dw_msaa_rast | |
rasterizer->wm.dw_msaa_disp; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(fs); |
dw[2] = dw2; |
dw[3] = 0; /* scratch */ |
dw[4] = dw4; |
dw[5] = dw5; |
dw[6] = dw6; |
dw[7] = 0; /* kernel 1 */ |
dw[8] = 0; /* kernel 2 */ |
} |
static inline void |
gen6_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op) |
{ |
const uint8_t cmd_len = 9; |
const int max_threads = (builder->dev->gt == 2) ? 80 : 40; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = hiz_op; |
/* honor the valid range even if dispatching is disabled */ |
dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; |
dw[6] = 0; |
dw[7] = 0; |
dw[8] = 0; |
} |
static inline void |
gen7_3DSTATE_WM(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
const struct ilo_rasterizer_state *rasterizer, |
bool cc_may_kill) |
{ |
const uint8_t cmd_len = 3; |
const int num_samples = 1; |
const struct ilo_shader_cso *cso; |
uint32_t dw1, dw2, *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
/* see rasterizer_init_wm_gen7() */ |
dw1 = rasterizer->wm.payload[0]; |
dw2 = rasterizer->wm.payload[1]; |
/* see fs_init_cso_gen7() */ |
cso = ilo_shader_get_kernel_cso(fs); |
dw1 |= cso->payload[3]; |
dw1 |= GEN7_WM_DW1_STATISTICS; |
if (cc_may_kill) |
dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL; |
if (num_samples > 1) { |
dw1 |= rasterizer->wm.dw_msaa_rast; |
dw2 |= rasterizer->wm.dw_msaa_disp; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
dw[1] = dw1; |
dw[2] = dw2; |
} |
static inline void |
gen8_3DSTATE_WM(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
const struct ilo_rasterizer_state *rasterizer) |
{ |
const uint8_t cmd_len = 2; |
const struct ilo_shader_cso *cso; |
uint32_t dw1, interps, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
/* see rasterizer_get_wm_gen8() */ |
dw1 = rasterizer->wm.payload[0]; |
dw1 |= GEN7_WM_DW1_STATISTICS; |
/* see fs_init_cso_gen8() */ |
cso = ilo_shader_get_kernel_cso(fs); |
interps = cso->payload[4]; |
assert(!(dw1 & interps)); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
dw[1] = dw1 | interps; |
} |
static inline void |
gen7_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op) |
{ |
const uint8_t cmd_len = 3; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
dw[1] = hiz_op; |
dw[2] = 0; |
} |
static inline void |
gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_builder *builder, |
const struct ilo_dsa_state *dsa) |
{ |
const uint8_t cmd_len = 3; |
uint32_t dw1, dw2, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
dw1 = dsa->payload[0]; |
dw2 = dsa->payload[1]; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_DEPTH_STENCIL) | (cmd_len - 2); |
dw[1] = dw1; |
dw[2] = dw2; |
} |
static inline void |
gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, uint32_t op, |
uint16_t width, uint16_t height, int sample_count) |
{ |
const uint8_t cmd_len = 5; |
const uint32_t sample_mask = ((1 << sample_count) - 1) | 0x1; |
uint32_t dw1, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
dw1 = op; |
switch (sample_count) { |
case 0: |
case 1: |
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1; |
break; |
case 2: |
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_2; |
break; |
case 4: |
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_4; |
break; |
case 8: |
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_8; |
break; |
case 16: |
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_16; |
break; |
default: |
assert(!"unsupported sample count"); |
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1; |
break; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2); |
dw[1] = dw1; |
dw[2] = 0; |
/* exclusive? */ |
dw[3] = height << 16 | width; |
dw[4] = sample_mask; |
} |
static inline void |
gen8_disable_3DSTATE_WM_HZ_OP(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 5; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = 0; |
} |
static inline void |
gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_CHROMAKEY) | (cmd_len - 2); |
dw[1] = 0; |
} |
static inline void |
gen7_3DSTATE_PS(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
bool dual_blend) |
{ |
const uint8_t cmd_len = 8; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5, *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
/* see fs_init_cso_gen7() */ |
cso = ilo_shader_get_kernel_cso(fs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
if (dual_blend) |
dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(fs); |
dw[2] = dw2; |
dw[3] = 0; /* scratch */ |
dw[4] = dw4; |
dw[5] = dw5; |
dw[6] = 0; /* kernel 1 */ |
dw[7] = 0; /* kernel 2 */ |
} |
static inline void |
gen7_disable_3DSTATE_PS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 8; |
int max_threads; |
uint32_t dw4, *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
/* GPU hangs if none of the dispatch enable bits is set */ |
dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; |
/* see brwCreateContext() */ |
switch (ilo_dev_gen(builder->dev)) { |
case ILO_GEN(7.5): |
max_threads = (builder->dev->gt == 3) ? 408 : |
(builder->dev->gt == 2) ? 204 : 102; |
dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; |
break; |
case ILO_GEN(7): |
default: |
max_threads = (builder->dev->gt == 2) ? 172 : 48; |
dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; |
break; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = dw4; |
dw[5] = 0; |
dw[6] = 0; |
dw[7] = 0; |
} |
static inline void |
gen8_3DSTATE_PS(struct ilo_builder *builder, |
const struct ilo_shader_state *fs) |
{ |
const uint8_t cmd_len = 12; |
const struct ilo_shader_cso *cso; |
uint32_t dw3, dw6, dw7, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
/* see fs_init_cso_gen8() */ |
cso = ilo_shader_get_kernel_cso(fs); |
dw3 = cso->payload[0]; |
dw6 = cso->payload[1]; |
dw7 = cso->payload[2]; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(fs); |
dw[2] = 0; |
dw[3] = dw3; |
dw[4] = 0; /* scratch */ |
dw[5] = 0; |
dw[6] = dw6; |
dw[7] = dw7; |
dw[8] = 0; /* kernel 1 */ |
dw[9] = 0; |
dw[10] = 0; /* kernel 2 */ |
dw[11] = 0; |
} |
static inline void |
gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder, |
const struct ilo_shader_state *fs, |
bool cc_may_kill, bool per_sample) |
{ |
const uint8_t cmd_len = 2; |
const struct ilo_shader_cso *cso; |
uint32_t dw1, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
/* see fs_init_cso_gen8() */ |
cso = ilo_shader_get_kernel_cso(fs); |
dw1 = cso->payload[3]; |
if (cc_may_kill) |
dw1 |= GEN8_PSX_DW1_DISPATCH_ENABLE | GEN8_PSX_DW1_KILL_PIXEL; |
if (per_sample) |
dw1 |= GEN8_PSX_DW1_PER_SAMPLE; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2); |
dw[1] = dw1; |
} |
static inline void |
gen8_3DSTATE_PS_BLEND(struct ilo_builder *builder, |
const struct ilo_blend_state *blend, |
const struct ilo_fb_state *fb, |
const struct ilo_dsa_state *dsa) |
{ |
const uint8_t cmd_len = 2; |
uint32_t dw1, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
dw1 = 0; |
if (blend->alpha_to_coverage && fb->num_samples > 1) |
dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE; |
if (fb->state.nr_cbufs && fb->state.cbufs[0]) { |
const struct ilo_fb_blend_caps *caps = &fb->blend_caps[0]; |
dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT; |
if (caps->can_blend) { |
if (caps->dst_alpha_forced_one) |
dw1 |= blend->dw_ps_blend_dst_alpha_forced_one; |
else |
dw1 |= blend->dw_ps_blend; |
} |
if (caps->can_alpha_test) |
dw1 |= dsa->dw_ps_blend_alpha; |
} else { |
dw1 |= dsa->dw_ps_blend_alpha; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_BLEND) | (cmd_len - 2); |
dw[1] = dw1; |
} |
static inline void |
gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen6_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder, |
uint32_t binding_table) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS, |
binding_table); |
} |
static inline void |
gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, |
uint32_t sampler_state) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS, |
sampler_state); |
} |
static inline void |
gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, |
int num_samples, const uint32_t *pattern, |
bool pixel_location_center) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; |
uint32_t dw1, dw2, dw3, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : |
GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; |
switch (num_samples) { |
case 0: |
case 1: |
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; |
dw2 = 0; |
dw3 = 0; |
break; |
case 4: |
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; |
dw2 = pattern[0]; |
dw3 = 0; |
break; |
case 8: |
assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); |
dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; |
dw2 = pattern[0]; |
dw3 = pattern[1]; |
break; |
default: |
assert(!"unsupported sample count"); |
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; |
dw2 = 0; |
dw3 = 0; |
break; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2); |
dw[1] = dw1; |
dw[2] = dw2; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
dw[3] = dw3; |
} |
static inline void |
gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, |
int num_samples, |
bool pixel_location_center) |
{ |
const uint8_t cmd_len = 2; |
uint32_t dw1, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : |
GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; |
switch (num_samples) { |
case 0: |
case 1: |
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; |
break; |
case 2: |
dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2; |
break; |
case 4: |
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; |
break; |
case 8: |
dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; |
break; |
case 16: |
dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16; |
break; |
default: |
assert(!"unsupported sample count"); |
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; |
break; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2); |
dw[1] = dw1; |
} |
static inline void |
gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder, |
const uint32_t *pattern_1x, |
const uint32_t *pattern_2x, |
const uint32_t *pattern_4x, |
const uint32_t *pattern_8x, |
const uint32_t *pattern_16x) |
{ |
const uint8_t cmd_len = 9; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SAMPLE_PATTERN) | (cmd_len - 2); |
dw[1] = pattern_16x[3]; |
dw[2] = pattern_16x[2]; |
dw[3] = pattern_16x[1]; |
dw[4] = pattern_16x[0]; |
dw[5] = pattern_8x[1]; |
dw[6] = pattern_8x[0]; |
dw[7] = pattern_4x[0]; |
dw[8] = pattern_1x[0] << 16 | |
pattern_2x[0]; |
} |
static inline void |
gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, |
unsigned sample_mask) |
{ |
const uint8_t cmd_len = 2; |
const unsigned valid_mask = 0xf; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
sample_mask &= valid_mask; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2); |
dw[1] = sample_mask; |
} |
static inline void |
gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, |
unsigned sample_mask, |
int num_samples) |
{ |
const uint8_t cmd_len = 2; |
const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 294: |
* |
* "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field |
* (Sample Mask) must be zero. |
* |
* If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field |
* must be zero." |
*/ |
sample_mask &= valid_mask; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2); |
dw[1] = sample_mask; |
} |
static inline void |
gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder, |
unsigned x, unsigned y, |
unsigned width, unsigned height) |
{ |
const uint8_t cmd_len = 4; |
unsigned xmax = x + width - 1; |
unsigned ymax = y + height - 1; |
unsigned rect_limit; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { |
rect_limit = 16383; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 230: |
* |
* "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) |
* must be an even number" |
*/ |
assert(y % 2 == 0); |
rect_limit = 8191; |
} |
if (x > rect_limit) x = rect_limit; |
if (y > rect_limit) y = rect_limit; |
if (xmax > rect_limit) xmax = rect_limit; |
if (ymax > rect_limit) ymax = rect_limit; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | (cmd_len - 2); |
dw[1] = y << 16 | x; |
dw[2] = ymax << 16 | xmax; |
/* |
* There is no need to set the origin. It is intended to support front |
* buffer rendering. |
*/ |
dw[3] = 0; |
} |
static inline void |
gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder, |
int x_offset, int y_offset) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(x_offset >= 0 && x_offset <= 31); |
assert(y_offset >= 0 && y_offset <= 31); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | (cmd_len - 2); |
dw[1] = x_offset << 8 | y_offset; |
} |
static inline void |
gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder, |
const struct pipe_poly_stipple *pattern) |
{ |
const uint8_t cmd_len = 33; |
uint32_t *dw; |
int i; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | (cmd_len - 2); |
dw++; |
STATIC_ASSERT(Elements(pattern->stipple) == 32); |
for (i = 0; i < 32; i++) |
dw[i] = pattern->stipple[i]; |
} |
static inline void |
gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, |
unsigned pattern, unsigned factor) |
{ |
const uint8_t cmd_len = 3; |
unsigned inverse; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert((pattern & 0xffff) == pattern); |
assert(factor >= 1 && factor <= 256); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | (cmd_len - 2); |
dw[1] = pattern; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { |
/* in U1.16 */ |
inverse = 65536 / factor; |
dw[2] = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT | |
factor; |
} |
else { |
/* in U1.13 */ |
inverse = 8192 / factor; |
dw[2] = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT | |
factor; |
} |
} |
static inline void |
gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 3; |
const uint32_t dw[3] = { |
GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2), |
0 << GEN6_AA_LINE_DW1_BIAS__SHIFT | 0, |
0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT | 0, |
}; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
ilo_builder_batch_write(builder, cmd_len, dw); |
} |
static inline void |
gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, |
const struct ilo_zs_surface *zs, |
bool aligned_8x4) |
{ |
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? |
GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : |
GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER); |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 7; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
dw[1] = zs->payload[0]; |
dw[2] = 0; |
/* see ilo_gpe_init_zs_surface() */ |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[3] = 0; |
dw[4] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2]; |
dw[5] = zs->payload[3]; |
dw[6] = zs->payload[4]; |
dw[7] = zs->payload[5]; |
dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT; |
if (zs->bo) { |
ilo_builder_batch_reloc64(builder, pos + 2, zs->bo, |
zs->payload[1], INTEL_RELOC_WRITE); |
} |
} else { |
dw[3] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2]; |
dw[4] = zs->payload[3]; |
dw[5] = zs->payload[4]; |
dw[6] = zs->payload[5]; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
dw[4] |= builder->mocs << GEN7_DEPTH_DW4_MOCS__SHIFT; |
else |
dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT; |
if (zs->bo) { |
ilo_builder_batch_reloc(builder, pos + 2, zs->bo, |
zs->payload[1], INTEL_RELOC_WRITE); |
} |
} |
} |
static inline void |
gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, |
const struct ilo_zs_surface *zs) |
{ |
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? |
GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : |
GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER); |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
/* see ilo_gpe_init_zs_surface() */ |
dw[1] = zs->payload[6]; |
dw[2] = 0; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT; |
dw[3] = 0; |
dw[4] = zs->payload[8]; |
if (zs->separate_s8_bo) { |
ilo_builder_batch_reloc64(builder, pos + 2, |
zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); |
} |
} else { |
dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT; |
if (zs->separate_s8_bo) { |
ilo_builder_batch_reloc(builder, pos + 2, |
zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); |
} |
} |
} |
static inline void |
gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, |
const struct ilo_zs_surface *zs) |
{ |
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? |
GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : |
GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER); |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
/* see ilo_gpe_init_zs_surface() */ |
dw[1] = zs->payload[9]; |
dw[2] = 0; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT; |
dw[3] = 0; |
dw[4] = zs->payload[11]; |
if (zs->hiz_bo) { |
ilo_builder_batch_reloc64(builder, pos + 2, |
zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE); |
} |
} else { |
dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT; |
if (zs->hiz_bo) { |
ilo_builder_batch_reloc(builder, pos + 2, |
zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE); |
} |
} |
} |
static inline void |
gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, |
uint32_t clear_val) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | |
GEN6_CLEAR_PARAMS_DW0_VALID | |
(cmd_len - 2); |
dw[1] = clear_val; |
} |
static inline void |
gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, |
uint32_t clear_val) |
{ |
const uint8_t cmd_len = 3; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | (cmd_len - 2); |
dw[1] = clear_val; |
dw[2] = GEN7_CLEAR_PARAMS_DW2_VALID; |
} |
static inline void |
gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t clip_viewport, |
uint32_t sf_viewport, |
uint32_t cc_viewport) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | |
GEN6_VP_PTR_DW0_CLIP_CHANGED | |
GEN6_VP_PTR_DW0_SF_CHANGED | |
GEN6_VP_PTR_DW0_CC_CHANGED | |
(cmd_len - 2); |
dw[1] = clip_viewport; |
dw[2] = sf_viewport; |
dw[3] = cc_viewport; |
} |
static inline void |
gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t scissor_rect) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) | |
(cmd_len - 2); |
dw[1] = scissor_rect; |
} |
static inline void |
gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t blend_state, |
uint32_t depth_stencil_state, |
uint32_t color_calc_state) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | (cmd_len - 2); |
dw[1] = blend_state | GEN6_CC_PTR_DW1_BLEND_CHANGED; |
dw[2] = depth_stencil_state | GEN6_CC_PTR_DW2_ZS_CHANGED; |
dw[3] = color_calc_state | GEN6_CC_PTR_DW3_CC_CHANGED; |
} |
static inline void |
gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder, |
uint32_t sf_clip_viewport) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, |
sf_clip_viewport); |
} |
static inline void |
gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder, |
uint32_t cc_viewport) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC, |
cc_viewport); |
} |
static inline void |
gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t color_calc_state) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) |
color_calc_state |= 1; |
gen7_3dstate_pointer(builder, |
GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state); |
} |
static inline void |
gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t depth_stencil_state) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, |
depth_stencil_state); |
} |
static inline void |
gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t blend_state) |
{ |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) |
blend_state |= 1; |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, |
blend_state); |
} |
static inline uint32_t |
gen6_CLIP_VIEWPORT(struct ilo_builder *builder, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports) |
{ |
const int state_align = 32; |
const int state_len = 4 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 193: |
* |
* "The viewport-related state is stored as an array of up to 16 |
* elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->min_gbx); |
dw[1] = fui(vp->max_gbx); |
dw[2] = fui(vp->min_gby); |
dw[3] = fui(vp->max_gby); |
dw += 4; |
} |
return state_offset; |
} |
static inline uint32_t |
gen6_SF_VIEWPORT(struct ilo_builder *builder, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports) |
{ |
const int state_align = 32; |
const int state_len = 8 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 262: |
* |
* "The viewport-specific state used by the SF unit (SF_VIEWPORT) is |
* stored as an array of up to 16 elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->m00); |
dw[1] = fui(vp->m11); |
dw[2] = fui(vp->m22); |
dw[3] = fui(vp->m30); |
dw[4] = fui(vp->m31); |
dw[5] = fui(vp->m32); |
dw[6] = 0; |
dw[7] = 0; |
dw += 8; |
} |
return state_offset; |
} |
static inline uint32_t |
gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports) |
{ |
const int state_align = 64; |
const int state_len = 16 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 270: |
* |
* "The viewport-specific state used by both the SF and CL units |
* (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each |
* of which contains the DWords described below. The start of each |
* element is spaced 16 DWords apart. The location of first element of |
* the array, as specified by both Pointer to SF_VIEWPORT and Pointer |
* to CLIP_VIEWPORT, is aligned to a 64-byte boundary." |
*/ |
assert(num_viewports && num_viewports <= 16); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->m00); |
dw[1] = fui(vp->m11); |
dw[2] = fui(vp->m22); |
dw[3] = fui(vp->m30); |
dw[4] = fui(vp->m31); |
dw[5] = fui(vp->m32); |
dw[6] = 0; |
dw[7] = 0; |
dw[8] = fui(vp->min_gbx); |
dw[9] = fui(vp->max_gbx); |
dw[10] = fui(vp->min_gby); |
dw[11] = fui(vp->max_gby); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[12] = fui(vp->min_x); |
dw[13] = fui(vp->max_x - 1.0f); |
dw[14] = fui(vp->min_y); |
dw[15] = fui(vp->max_y - 1.0f); |
} else { |
dw[12] = 0; |
dw[13] = 0; |
dw[14] = 0; |
dw[15] = 0; |
} |
dw += 16; |
} |
return state_offset; |
} |
static inline uint32_t |
gen6_CC_VIEWPORT(struct ilo_builder *builder, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports) |
{ |
const int state_align = 32; |
const int state_len = 2 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 385: |
* |
* "The viewport state is stored as an array of up to 16 elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->min_z); |
dw[1] = fui(vp->max_z); |
dw += 2; |
} |
return state_offset; |
} |
static inline uint32_t |
gen6_SCISSOR_RECT(struct ilo_builder *builder, |
const struct ilo_scissor_state *scissor, |
unsigned num_viewports) |
{ |
const int state_align = 32; |
const int state_len = 2 * num_viewports; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 263: |
* |
* "The viewport-specific state used by the SF unit (SCISSOR_RECT) is |
* stored as an array of up to 16 elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
assert(Elements(scissor->payload) >= state_len); |
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT, |
state_align, state_len, scissor->payload); |
} |
static inline uint32_t |
gen6_COLOR_CALC_STATE(struct ilo_builder *builder, |
const struct pipe_stencil_ref *stencil_ref, |
ubyte alpha_ref, |
const struct pipe_blend_color *blend_color) |
{ |
const int state_align = 64; |
const int state_len = 6; |
uint32_t state_offset, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw); |
dw[0] = stencil_ref->ref_value[0] << 24 | |
stencil_ref->ref_value[1] << 16 | |
GEN6_CC_DW0_ALPHATEST_UNORM8; |
dw[1] = alpha_ref; |
dw[2] = fui(blend_color->color[0]); |
dw[3] = fui(blend_color->color[1]); |
dw[4] = fui(blend_color->color[2]); |
dw[5] = fui(blend_color->color[3]); |
return state_offset; |
} |
static inline uint32_t |
gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder, |
const struct ilo_dsa_state *dsa) |
{ |
const int state_align = 64; |
const int state_len = 3; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
STATIC_ASSERT(Elements(dsa->payload) >= state_len); |
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL, |
state_align, state_len, dsa->payload); |
} |
static inline uint32_t |
gen6_BLEND_STATE(struct ilo_builder *builder, |
const struct ilo_blend_state *blend, |
const struct ilo_fb_state *fb, |
const struct ilo_dsa_state *dsa) |
{ |
const int state_align = 64; |
int state_len; |
uint32_t state_offset, *dw; |
unsigned num_targets, i; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 376: |
* |
* "The blend state is stored as an array of up to 8 elements..." |
*/ |
num_targets = fb->state.nr_cbufs; |
assert(num_targets <= 8); |
if (!num_targets) { |
if (!dsa->dw_blend_alpha) |
return 0; |
/* to be able to reference alpha func */ |
num_targets = 1; |
} |
state_len = 2 * num_targets; |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); |
for (i = 0; i < num_targets; i++) { |
const struct ilo_blend_cso *cso = &blend->cso[i]; |
dw[0] = cso->payload[0]; |
dw[1] = cso->payload[1] | blend->dw_shared; |
if (i < fb->state.nr_cbufs && fb->state.cbufs[i]) { |
const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i]; |
if (caps->can_blend) { |
if (caps->dst_alpha_forced_one) |
dw[0] |= cso->dw_blend_dst_alpha_forced_one; |
else |
dw[0] |= cso->dw_blend; |
} |
if (caps->can_logicop) |
dw[1] |= blend->dw_logicop; |
if (caps->can_alpha_test) |
dw[1] |= dsa->dw_blend_alpha; |
} else { |
dw[1] |= GEN6_RT_DW1_WRITE_DISABLE_A | |
GEN6_RT_DW1_WRITE_DISABLE_R | |
GEN6_RT_DW1_WRITE_DISABLE_G | |
GEN6_RT_DW1_WRITE_DISABLE_B | |
dsa->dw_blend_alpha; |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 356: |
* |
* "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage |
* Dither both must be disabled." |
* |
* There is no such limitation on GEN7, or for AlphaToOne. But GL |
* requires that anyway. |
*/ |
if (fb->num_samples > 1) |
dw[1] |= blend->dw_alpha_mod; |
dw += 2; |
} |
return state_offset; |
} |
static inline uint32_t |
gen8_BLEND_STATE(struct ilo_builder *builder, |
const struct ilo_blend_state *blend, |
const struct ilo_fb_state *fb, |
const struct ilo_dsa_state *dsa) |
{ |
const int state_align = 64; |
const int state_len = 1 + 2 * fb->state.nr_cbufs; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
assert(fb->state.nr_cbufs <= 8); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); |
dw[0] = blend->dw_shared; |
if (fb->num_samples > 1) |
dw[0] |= blend->dw_alpha_mod; |
if (!fb->state.nr_cbufs || fb->blend_caps[0].can_alpha_test) |
dw[0] |= dsa->dw_blend_alpha; |
dw++; |
for (i = 0; i < fb->state.nr_cbufs; i++) { |
const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i]; |
const struct ilo_blend_cso *cso = &blend->cso[i]; |
dw[0] = cso->payload[0]; |
dw[1] = cso->payload[1]; |
if (fb->state.cbufs[i]) { |
if (caps->can_blend) { |
if (caps->dst_alpha_forced_one) |
dw[0] |= cso->dw_blend_dst_alpha_forced_one; |
else |
dw[0] |= cso->dw_blend; |
} |
if (caps->can_logicop) |
dw[1] |= blend->dw_logicop; |
} else { |
dw[0] |= GEN8_RT_DW0_WRITE_DISABLE_A | |
GEN8_RT_DW0_WRITE_DISABLE_R | |
GEN8_RT_DW0_WRITE_DISABLE_G | |
GEN8_RT_DW0_WRITE_DISABLE_B; |
} |
dw += 2; |
} |
return state_offset; |
} |
#endif /* ILO_BUILDER_3D_BOTTOM_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h |
---|
0,0 → 1,1899 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_3D_TOP_H |
#define ILO_BUILDER_3D_TOP_H |
#include "genhw/genhw.h" |
#include "../ilo_resource.h" |
#include "../ilo_shader.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_state_3d.h" |
#include "ilo_builder.h" |
static inline void |
gen6_3DSTATE_URB(struct ilo_builder *builder, |
int vs_total_size, int gs_total_size, |
int vs_entry_size, int gs_entry_size) |
{ |
const uint8_t cmd_len = 3; |
const int row_size = 128; /* 1024 bits */ |
int vs_alloc_size, gs_alloc_size; |
int vs_num_entries, gs_num_entries; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
/* in 1024-bit URB rows */ |
vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; |
gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; |
/* the valid range is [1, 5] */ |
if (!vs_alloc_size) |
vs_alloc_size = 1; |
if (!gs_alloc_size) |
gs_alloc_size = 1; |
assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); |
/* the valid range is [24, 256] in multiples of 4 */ |
vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; |
if (vs_num_entries > 256) |
vs_num_entries = 256; |
assert(vs_num_entries >= 24); |
/* the valid range is [0, 256] in multiples of 4 */ |
gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; |
if (gs_num_entries > 256) |
gs_num_entries = 256; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); |
dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | |
vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; |
dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | |
(gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; |
} |
static inline void |
gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, |
int subop, int offset, int size) |
{ |
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | |
GEN6_RENDER_SUBTYPE_3D | |
subop; |
const uint8_t cmd_len = 2; |
const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) && |
builder->dev->gt == 3) || |
ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1; |
uint32_t *dw; |
int end; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
/* VS, HS, DS, GS, and PS variants */ |
assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && |
subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 68: |
* |
* "(A table that says the maximum size of each constant buffer is |
* 16KB") |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 115: |
* |
* "The sum of the Constant Buffer Offset and the Constant Buffer Size |
* may not exceed the maximum value of the Constant Buffer Size." |
* |
* Thus, the valid range of buffer end is [0KB, 16KB]. |
*/ |
end = (offset + size) / 1024; |
if (end > 16 * slice_count) { |
assert(!"invalid constant buffer end"); |
end = 16 * slice_count; |
} |
/* the valid range of buffer offset is [0KB, 15KB] */ |
offset = (offset + 1023) / 1024; |
if (offset > 15 * slice_count) { |
assert(!"invalid constant buffer offset"); |
offset = 15 * slice_count; |
} |
if (offset > end) { |
assert(!size); |
offset = end; |
} |
/* the valid range of buffer size is [0KB, 15KB] */ |
size = end - offset; |
if (size > 15 * slice_count) { |
assert(!"invalid constant buffer size"); |
size = 15 * slice_count; |
} |
assert(offset % slice_count == 0 && size % slice_count == 0); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT | |
size; |
} |
static inline void |
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, |
int offset, int size) |
{ |
gen7_3dstate_push_constant_alloc(builder, |
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); |
} |
static inline void |
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, |
int offset, int size) |
{ |
gen7_3dstate_push_constant_alloc(builder, |
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); |
} |
static inline void |
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, |
int offset, int size) |
{ |
gen7_3dstate_push_constant_alloc(builder, |
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); |
} |
static inline void |
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, |
int offset, int size) |
{ |
gen7_3dstate_push_constant_alloc(builder, |
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); |
} |
static inline void |
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, |
int offset, int size) |
{ |
gen7_3dstate_push_constant_alloc(builder, |
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); |
} |
static inline void |
gen7_3dstate_urb(struct ilo_builder *builder, |
int subop, int offset, int size, |
int entry_size) |
{ |
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | |
GEN6_RENDER_SUBTYPE_3D | |
subop; |
const uint8_t cmd_len = 2; |
const int row_size = 64; /* 512 bits */ |
int alloc_size, num_entries, min_entries, max_entries; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
/* VS, HS, DS, and GS variants */ |
assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && |
subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); |
/* in multiples of 8KB */ |
assert(offset % 8192 == 0); |
offset /= 8192; |
/* in multiple of 512-bit rows */ |
alloc_size = (entry_size + row_size - 1) / row_size; |
if (!alloc_size) |
alloc_size = 1; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 34: |
* |
* "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may |
* cause performance to decrease due to banking in the URB. Element |
* sizes of 16 to 20 should be programmed with six 512-bit URB rows." |
*/ |
if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) |
alloc_size = 6; |
/* in multiples of 8 */ |
num_entries = (size / row_size / alloc_size) & ~7; |
switch (subop) { |
case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: |
switch (ilo_dev_gen(builder->dev)) { |
case ILO_GEN(8): |
max_entries = 2560; |
min_entries = 64; |
break; |
case ILO_GEN(7.5): |
max_entries = (builder->dev->gt >= 2) ? 1664 : 640; |
min_entries = (builder->dev->gt >= 2) ? 64 : 32; |
break; |
case ILO_GEN(7): |
default: |
max_entries = (builder->dev->gt == 2) ? 704 : 512; |
min_entries = 32; |
break; |
} |
assert(num_entries >= min_entries); |
if (num_entries > max_entries) |
num_entries = max_entries; |
break; |
case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: |
max_entries = (builder->dev->gt == 2) ? 64 : 32; |
if (num_entries > max_entries) |
num_entries = max_entries; |
break; |
case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: |
if (num_entries) |
assert(num_entries >= 138); |
break; |
case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: |
switch (ilo_dev_gen(builder->dev)) { |
case ILO_GEN(8): |
max_entries = 960; |
break; |
case ILO_GEN(7.5): |
max_entries = (builder->dev->gt >= 2) ? 640 : 256; |
break; |
case ILO_GEN(7): |
default: |
max_entries = (builder->dev->gt == 2) ? 320 : 192; |
break; |
} |
if (num_entries > max_entries) |
num_entries = max_entries; |
break; |
default: |
break; |
} |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT | |
(alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT | |
num_entries; |
} |
static inline void |
gen7_3DSTATE_URB_VS(struct ilo_builder *builder, |
int offset, int size, int entry_size) |
{ |
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, |
offset, size, entry_size); |
} |
static inline void |
gen7_3DSTATE_URB_HS(struct ilo_builder *builder, |
int offset, int size, int entry_size) |
{ |
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, |
offset, size, entry_size); |
} |
static inline void |
gen7_3DSTATE_URB_DS(struct ilo_builder *builder, |
int offset, int size, int entry_size) |
{ |
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, |
offset, size, entry_size); |
} |
static inline void |
gen7_3DSTATE_URB_GS(struct ilo_builder *builder, |
int offset, int size, int entry_size) |
{ |
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, |
offset, size, entry_size); |
} |
static inline void |
gen75_3DSTATE_VF(struct ilo_builder *builder, |
bool enable_cut_index, |
uint32_t cut_index) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7.5, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); |
if (enable_cut_index) |
dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE; |
dw[1] = cut_index; |
} |
static inline void |
gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder, |
bool enable) |
{ |
const uint8_t cmd_len = 1; |
const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) | |
enable; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
ilo_builder_batch_write(builder, cmd_len, &dw0); |
} |
/** |
* Translate a pipe primitive type to the matching hardware primitive type. |
*/ |
static inline int |
gen6_3d_translate_pipe_prim(unsigned prim) |
{ |
static const int prim_mapping[ILO_PRIM_MAX] = { |
[PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, |
[PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, |
[PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, |
[PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, |
[PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, |
[PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, |
[PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, |
[PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, |
[PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, |
[PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, |
[PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, |
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, |
[PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, |
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, |
[ILO_PRIM_RECTANGLES] = GEN6_3DPRIM_RECTLIST, |
}; |
assert(prim_mapping[prim]); |
return prim_mapping[prim]; |
} |
static inline void |
gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim) |
{ |
const uint8_t cmd_len = 2; |
const int prim = gen6_3d_translate_pipe_prim(pipe_prim); |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_TOPOLOGY) | (cmd_len - 2); |
dw[1] = prim; |
} |
static inline void |
gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder, |
int vb_index, uint32_t step_rate) |
{ |
const uint8_t cmd_len = 3; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_INSTANCING) | (cmd_len - 2); |
dw[1] = vb_index; |
if (step_rate) |
dw[1] |= GEN8_INSTANCING_DW1_ENABLE; |
dw[2] = step_rate; |
} |
static inline void |
gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder, |
bool vid_enable, int vid_ve, int vid_comp, |
bool iid_enable, int iid_ve, int iid_comp) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_SGVS) | (cmd_len - 2); |
dw[1] = 0; |
if (iid_enable) { |
dw[1] |= GEN8_SGVS_DW1_IID_ENABLE | |
vid_comp << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT | |
vid_ve << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT; |
} |
if (vid_enable) { |
dw[1] |= GEN8_SGVS_DW1_VID_ENABLE | |
vid_comp << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT | |
vid_ve << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT; |
} |
} |
static inline void |
gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, |
const struct ilo_ve_state *ve, |
const struct ilo_vb_state *vb) |
{ |
uint8_t cmd_len; |
uint32_t *dw; |
unsigned pos, hw_idx; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 82: |
* |
* "From 1 to 33 VBs can be specified..." |
*/ |
assert(ve->vb_count <= 33); |
if (!ve->vb_count) |
return; |
cmd_len = 1 + 4 * ve->vb_count; |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2); |
dw++; |
pos++; |
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { |
const unsigned instance_divisor = ve->instance_divisors[hw_idx]; |
const unsigned pipe_idx = ve->vb_mapping[hw_idx]; |
const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; |
dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) |
dw[0] |= builder->mocs << GEN8_VB_DW0_MOCS__SHIFT; |
else |
dw[0] |= builder->mocs << GEN6_VB_DW0_MOCS__SHIFT; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED; |
if (instance_divisor) |
dw[0] |= GEN6_VB_DW0_ACCESS_INSTANCEDATA; |
else |
dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA; |
/* use null vb if there is no buffer or the stride is out of range */ |
if (!cso->buffer || cso->stride > 2048) { |
dw[0] |= GEN6_VB_DW0_IS_NULL; |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? |
0 : instance_divisor; |
continue; |
} |
dw[0] |= cso->stride << GEN6_VB_DW0_PITCH__SHIFT; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
const struct ilo_buffer *buf = ilo_buffer(cso->buffer); |
const uint32_t start_offset = cso->buffer_offset; |
ilo_builder_batch_reloc64(builder, pos + 1, |
buf->bo, start_offset, 0); |
dw[3] = buf->bo_size; |
} else { |
const struct ilo_buffer *buf = ilo_buffer(cso->buffer); |
const uint32_t start_offset = cso->buffer_offset; |
const uint32_t end_offset = buf->bo_size - 1; |
dw[3] = instance_divisor; |
ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); |
ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); |
} |
dw += 4; |
pos += 4; |
} |
} |
/* the user vertex buffer must be uploaded with gen6_user_vertex_buffer() */ |
static inline void |
gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, |
uint32_t vb_begin, uint32_t vb_end, |
uint32_t stride) |
{ |
const struct ilo_builder_writer *bat = |
&builder->writers[ILO_BUILDER_WRITER_BATCH]; |
const uint8_t cmd_len = 1 + 4; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2); |
dw++; |
pos++; |
/* VERTEX_BUFFER_STATE */ |
dw[0] = 0 << GEN6_VB_DW0_INDEX__SHIFT | |
GEN6_VB_DW0_ACCESS_VERTEXDATA | |
stride << GEN6_VB_DW0_PITCH__SHIFT; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED; |
dw[3] = 0; |
ilo_builder_batch_reloc(builder, pos + 1, bat->bo, vb_begin, 0); |
ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0); |
} |
static inline void |
gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, |
const struct ilo_ve_state *ve) |
{ |
uint8_t cmd_len; |
uint32_t *dw; |
unsigned i; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 92: |
* |
* "At least one VERTEX_ELEMENT_STATE structure must be included." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 93: |
* |
* "Up to 34 (DevSNB+) vertex elements are supported." |
*/ |
assert(ve->count + ve->prepend_nosrc_cso >= 1); |
assert(ve->count + ve->prepend_nosrc_cso <= 34); |
STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); |
cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2); |
dw++; |
if (ve->prepend_nosrc_cso) { |
memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload)); |
dw += 2; |
} |
for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) { |
memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload)); |
dw += 2; |
} |
if (ve->last_cso_edgeflag) |
memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload)); |
} |
static inline void |
gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, |
const struct ilo_ib_state *ib, |
bool enable_cut_index) |
{ |
const uint8_t cmd_len = 3; |
struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); |
uint32_t start_offset, end_offset; |
int format; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
if (!buf) |
return; |
/* this is moved to the new 3DSTATE_VF */ |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) |
assert(!enable_cut_index); |
switch (ib->hw_index_size) { |
case 4: |
format = GEN6_IB_DW0_FORMAT_DWORD; |
break; |
case 2: |
format = GEN6_IB_DW0_FORMAT_WORD; |
break; |
case 1: |
format = GEN6_IB_DW0_FORMAT_BYTE; |
break; |
default: |
assert(!"unknown index size"); |
format = GEN6_IB_DW0_FORMAT_BYTE; |
break; |
} |
/* |
* set start_offset to 0 here and adjust pipe_draw_info::start with |
* ib->draw_start_offset in 3DPRIMITIVE |
*/ |
start_offset = 0; |
end_offset = buf->bo_size; |
/* end_offset must also be aligned and is inclusive */ |
end_offset -= (end_offset % ib->hw_index_size); |
end_offset--; |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) | |
builder->mocs << GEN6_IB_DW0_MOCS__SHIFT | |
format; |
if (enable_cut_index) |
dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE; |
ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); |
ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); |
} |
static inline void |
gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, |
const struct ilo_ib_state *ib) |
{ |
const uint8_t cmd_len = 5; |
struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); |
int format; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
if (!buf) |
return; |
switch (ib->hw_index_size) { |
case 4: |
format = GEN8_IB_DW1_FORMAT_DWORD; |
break; |
case 2: |
format = GEN8_IB_DW1_FORMAT_WORD; |
break; |
case 1: |
format = GEN8_IB_DW1_FORMAT_BYTE; |
break; |
default: |
assert(!"unknown index size"); |
format = GEN8_IB_DW1_FORMAT_BYTE; |
break; |
} |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2); |
dw[1] = format | |
builder->mocs << GEN8_IB_DW1_MOCS__SHIFT; |
dw[4] = buf->bo_size; |
/* ignore ib->offset here in favor of adjusting 3DPRIMITIVE */ |
ilo_builder_batch_reloc64(builder, pos + 2, buf->bo, 0, 0); |
} |
static inline void |
gen6_3DSTATE_VS(struct ilo_builder *builder, |
const struct ilo_shader_state *vs) |
{ |
const uint8_t cmd_len = 6; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
cso = ilo_shader_get_kernel_cso(vs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(vs); |
dw[2] = dw2; |
dw[3] = 0; /* scratch */ |
dw[4] = dw4; |
dw[5] = dw5; |
} |
static inline void |
gen8_3DSTATE_VS(struct ilo_builder *builder, |
const struct ilo_shader_state *vs, |
uint32_t clip_plane_enable) |
{ |
const uint8_t cmd_len = 9; |
const struct ilo_shader_cso *cso; |
uint32_t dw3, dw6, dw7, dw8, *dw; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
cso = ilo_shader_get_kernel_cso(vs); |
dw3 = cso->payload[0]; |
dw6 = cso->payload[1]; |
dw7 = cso->payload[2]; |
dw8 = clip_plane_enable << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(vs); |
dw[2] = 0; |
dw[3] = dw3; |
dw[4] = 0; /* scratch */ |
dw[5] = 0; |
dw[6] = dw6; |
dw[7] = dw7; |
dw[8] = dw8; |
} |
static inline void |
gen6_disable_3DSTATE_VS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 6; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = 0; |
dw[5] = 0; |
} |
static inline void |
gen7_disable_3DSTATE_HS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 7; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = 0; |
dw[5] = 0; |
dw[6] = 0; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[7] = 0; |
dw[8] = 0; |
} |
} |
static inline void |
gen7_3DSTATE_TE(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
} |
static inline void |
gen7_disable_3DSTATE_DS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 6; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = 0; |
dw[5] = 0; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[6] = 0; |
dw[7] = 0; |
dw[8] = 0; |
} |
} |
static inline void |
gen6_3DSTATE_GS(struct ilo_builder *builder, |
const struct ilo_shader_state *gs) |
{ |
const uint8_t cmd_len = 7; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5, dw6, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
cso = ilo_shader_get_kernel_cso(gs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
dw6 = cso->payload[3]; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(gs); |
dw[2] = dw2; |
dw[3] = 0; /* scratch */ |
dw[4] = dw4; |
dw[5] = dw5; |
dw[6] = dw6; |
} |
static inline void |
gen6_so_3DSTATE_GS(struct ilo_builder *builder, |
const struct ilo_shader_state *vs, |
int verts_per_prim) |
{ |
const uint8_t cmd_len = 7; |
struct ilo_shader_cso cso; |
enum ilo_kernel_param param; |
uint32_t dw2, dw4, dw5, dw6, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
assert(ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)); |
switch (verts_per_prim) { |
case 1: |
param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; |
break; |
case 2: |
param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; |
break; |
default: |
param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; |
break; |
} |
/* cannot use VS's CSO */ |
ilo_gpe_init_gs_cso(builder->dev, vs, &cso); |
dw2 = cso.payload[0]; |
dw4 = cso.payload[1]; |
dw5 = cso.payload[2]; |
dw6 = cso.payload[3]; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(vs) + |
ilo_shader_get_kernel_param(vs, param); |
dw[2] = dw2; |
dw[3] = 0; |
dw[4] = dw4; |
dw[5] = dw5; |
dw[6] = dw6; |
} |
static inline void |
gen6_disable_3DSTATE_GS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 7; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
/* honor the valid range of URB read length */ |
dw[4] = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; |
dw[5] = GEN6_GS_DW5_STATISTICS; |
dw[6] = 0; |
} |
static inline void |
gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, |
int index, unsigned svbi, |
unsigned max_svbi, |
bool load_vertex_count) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
assert(index >= 0 && index < 4); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | (cmd_len - 2); |
dw[1] = index << GEN6_SVBI_DW1_INDEX__SHIFT; |
if (load_vertex_count) |
dw[1] |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT; |
dw[2] = svbi; |
dw[3] = max_svbi; |
} |
static inline void |
gen7_3DSTATE_GS(struct ilo_builder *builder, |
const struct ilo_shader_state *gs) |
{ |
const uint8_t cmd_len = 7; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5, *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
cso = ilo_shader_get_kernel_cso(gs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
dw[1] = ilo_shader_get_kernel_offset(gs); |
dw[2] = dw2; |
dw[3] = 0; /* scratch */ |
dw[4] = dw4; |
dw[5] = dw5; |
dw[6] = 0; |
} |
static inline void |
gen7_disable_3DSTATE_GS(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 7; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw[4] = 0; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[7] = GEN8_GS_DW7_STATISTICS; |
dw[8] = 0; |
dw[9] = 0; |
} else { |
dw[5] = GEN7_GS_DW5_STATISTICS; |
dw[6] = 0; |
} |
} |
static inline void |
gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, |
int render_stream, |
bool render_disable, |
int vertex_attrib_count, |
const int *buf_strides) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3; |
uint32_t *dw; |
int buf_mask; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2); |
dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT; |
if (render_disable) |
dw[1] |= GEN7_SO_DW1_RENDER_DISABLE; |
if (buf_strides) { |
buf_mask = ((bool) buf_strides[3]) << 3 | |
((bool) buf_strides[2]) << 2 | |
((bool) buf_strides[1]) << 1 | |
((bool) buf_strides[0]); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[3] = buf_strides[1] << 16 | buf_strides[0]; |
dw[4] = buf_strides[3] << 16 | buf_strides[1]; |
} |
} else { |
buf_mask = 0; |
} |
if (buf_mask) { |
int read_len; |
dw[1] |= GEN7_SO_DW1_SO_ENABLE | |
GEN7_SO_DW1_STATISTICS; |
/* API_OPENGL */ |
if (true) |
dw[1] |= GEN7_SO_DW1_REORDER_TRAILING; |
if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) |
dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; |
read_len = (vertex_attrib_count + 1) / 2; |
if (!read_len) |
read_len = 1; |
dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | |
(read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | |
0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | |
(read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | |
0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | |
(read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | |
0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | |
(read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; |
} else { |
dw[2] = 0; |
} |
} |
static inline void |
gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, |
const struct pipe_stream_output_info *so_info) |
{ |
/* |
* Note that "DWord Length" has 9 bits for this command and the type of |
* cmd_len cannot be uint8_t. |
*/ |
uint16_t cmd_len; |
struct { |
int buf_selects; |
int decl_count; |
uint16_t decls[128]; |
} streams[4]; |
unsigned buf_offsets[PIPE_MAX_SO_BUFFERS]; |
int hw_decl_count, i; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
memset(streams, 0, sizeof(streams)); |
memset(buf_offsets, 0, sizeof(buf_offsets)); |
for (i = 0; i < so_info->num_outputs; i++) { |
unsigned decl, st, buf, reg, mask; |
st = so_info->output[i].stream; |
buf = so_info->output[i].output_buffer; |
/* pad with holes */ |
while (buf_offsets[buf] < so_info->output[i].dst_offset) { |
int num_dwords; |
num_dwords = so_info->output[i].dst_offset - buf_offsets[buf]; |
if (num_dwords > 4) |
num_dwords = 4; |
decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | |
GEN7_SO_DECL_HOLE_FLAG | |
((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; |
assert(streams[st].decl_count < Elements(streams[st].decls)); |
streams[st].decls[streams[st].decl_count++] = decl; |
buf_offsets[buf] += num_dwords; |
} |
assert(buf_offsets[buf] == so_info->output[i].dst_offset); |
reg = so_info->output[i].register_index; |
mask = ((1 << so_info->output[i].num_components) - 1) << |
so_info->output[i].start_component; |
decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | |
reg << GEN7_SO_DECL_REG_INDEX__SHIFT | |
mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; |
assert(streams[st].decl_count < Elements(streams[st].decls)); |
streams[st].buf_selects |= 1 << buf; |
streams[st].decls[streams[st].decl_count++] = decl; |
buf_offsets[buf] += so_info->output[i].num_components; |
} |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) { |
hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count, |
streams[2].decl_count, streams[3].decl_count); |
} else { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 201: |
* |
* "Errata: All 128 decls for all four streams must be included |
* whenever this command is issued. The "Num Entries [n]" fields |
* still contain the actual numbers of valid decls." |
*/ |
hw_decl_count = 128; |
} |
cmd_len = 3 + 2 * hw_decl_count; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); |
dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | |
streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | |
streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | |
streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; |
dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | |
streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | |
streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | |
streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; |
dw += 3; |
for (i = 0; i < hw_decl_count; i++) { |
dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i]; |
dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i]; |
dw += 2; |
} |
} |
static inline void |
gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride, |
const struct pipe_stream_output_target *so_target) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4; |
struct ilo_buffer *buf; |
int start, end; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
buf = ilo_buffer(so_target->buffer); |
/* DWord-aligned */ |
assert(stride % 4 == 0); |
assert(so_target->buffer_offset % 4 == 0); |
stride &= ~3; |
start = so_target->buffer_offset & ~3; |
end = (start + so_target->buffer_size) & ~3; |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2); |
dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT | |
stride; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[1] |= builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT; |
dw[4] = end - start; |
dw[5] = 0; |
dw[6] = 0; |
dw[7] = 0; |
ilo_builder_batch_reloc64(builder, pos + 2, |
buf->bo, start, INTEL_RELOC_WRITE); |
} else { |
dw[1] |= builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT; |
ilo_builder_batch_reloc(builder, pos + 2, |
buf->bo, start, INTEL_RELOC_WRITE); |
ilo_builder_batch_reloc(builder, pos + 3, |
buf->bo, end, INTEL_RELOC_WRITE); |
} |
} |
static inline void |
gen7_disable_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2); |
dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT; |
dw[2] = 0; |
dw[3] = 0; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[4] = 0; |
dw[5] = 0; |
dw[6] = 0; |
dw[7] = 0; |
} |
} |
static inline void |
gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder, |
uint32_t vs_binding_table, |
uint32_t gs_binding_table, |
uint32_t ps_binding_table) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) | |
GEN6_BINDING_TABLE_PTR_DW0_VS_CHANGED | |
GEN6_BINDING_TABLE_PTR_DW0_GS_CHANGED | |
GEN6_BINDING_TABLE_PTR_DW0_PS_CHANGED | |
(cmd_len - 2); |
dw[1] = vs_binding_table; |
dw[2] = gs_binding_table; |
dw[3] = ps_binding_table; |
} |
static inline void |
gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder, |
uint32_t vs_sampler_state, |
uint32_t gs_sampler_state, |
uint32_t ps_sampler_state) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) | |
GEN6_SAMPLER_PTR_DW0_VS_CHANGED | |
GEN6_SAMPLER_PTR_DW0_GS_CHANGED | |
GEN6_SAMPLER_PTR_DW0_PS_CHANGED | |
(cmd_len - 2); |
dw[1] = vs_sampler_state; |
dw[2] = gs_sampler_state; |
dw[3] = ps_sampler_state; |
} |
static inline void |
gen7_3dstate_pointer(struct ilo_builder *builder, |
int subop, uint32_t pointer) |
{ |
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | |
GEN6_RENDER_SUBTYPE_3D | |
subop; |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
dw[1] = pointer; |
} |
static inline void |
gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(struct ilo_builder *builder, |
uint32_t binding_table) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS, |
binding_table); |
} |
static inline void |
gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(struct ilo_builder *builder, |
uint32_t binding_table) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS, |
binding_table); |
} |
static inline void |
gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(struct ilo_builder *builder, |
uint32_t binding_table) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS, |
binding_table); |
} |
static inline void |
gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder, |
uint32_t binding_table) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS, |
binding_table); |
} |
static inline void |
gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder, |
uint32_t sampler_state) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS, |
sampler_state); |
} |
static inline void |
gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS(struct ilo_builder *builder, |
uint32_t sampler_state) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS, |
sampler_state); |
} |
static inline void |
gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS(struct ilo_builder *builder, |
uint32_t sampler_state) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS, |
sampler_state); |
} |
static inline void |
gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder, |
uint32_t sampler_state) |
{ |
gen7_3dstate_pointer(builder, |
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS, |
sampler_state); |
} |
static inline void |
gen6_3dstate_constant(struct ilo_builder *builder, int subop, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | |
GEN6_RENDER_SUBTYPE_3D | |
subop; |
const uint8_t cmd_len = 5; |
unsigned buf_enabled = 0x0; |
uint32_t buf_dw[4], *dw; |
int max_read_length, total_read_length; |
int i; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
assert(num_bufs <= 4); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 138: |
* |
* "(3DSTATE_CONSTANT_VS) The sum of all four read length fields (each |
* incremented to represent the actual read length) must be less than |
* or equal to 32" |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 161: |
* |
* "(3DSTATE_CONSTANT_GS) The sum of all four read length fields (each |
* incremented to represent the actual read length) must be less than |
* or equal to 64" |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 287: |
* |
* "(3DSTATE_CONSTANT_PS) The sum of all four read length fields (each |
* incremented to represent the actual read length) must be less than |
* or equal to 64" |
*/ |
switch (subop) { |
case GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS: |
max_read_length = 32; |
break; |
case GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS: |
case GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS: |
max_read_length = 64; |
break; |
default: |
assert(!"unknown pcb subop"); |
max_read_length = 0; |
break; |
} |
total_read_length = 0; |
for (i = 0; i < 4; i++) { |
if (i < num_bufs && sizes[i]) { |
/* in 256-bit units */ |
const int read_len = (sizes[i] + 31) / 32; |
assert(bufs[i] % 32 == 0); |
assert(read_len <= 32); |
buf_enabled |= 1 << i; |
buf_dw[i] = bufs[i] | (read_len - 1); |
total_read_length += read_len; |
} else { |
buf_dw[i] = 0; |
} |
} |
assert(total_read_length <= max_read_length); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2) | |
buf_enabled << GEN6_CONSTANT_DW0_BUFFER_ENABLES__SHIFT | |
builder->mocs << GEN6_CONSTANT_DW0_MOCS__SHIFT; |
memcpy(&dw[1], buf_dw, sizeof(buf_dw)); |
} |
static inline void |
gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen6_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen6_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen7_3dstate_constant(struct ilo_builder *builder, |
int subop, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | |
GEN6_RENDER_SUBTYPE_3D | |
subop; |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 11 : 7; |
uint32_t payload[6], *dw; |
int total_read_length, i; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
/* VS, HS, DS, GS, and PS variants */ |
assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS && |
subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS && |
subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK); |
assert(num_bufs <= 4); |
payload[0] = 0; |
payload[1] = 0; |
total_read_length = 0; |
for (i = 0; i < 4; i++) { |
int read_len; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 112: |
* |
* "Constant buffers must be enabled in order from Constant Buffer 0 |
* to Constant Buffer 3 within this command. For example, it is |
* not allowed to enable Constant Buffer 1 by programming a |
* non-zero value in the VS Constant Buffer 1 Read Length without a |
* non-zero value in VS Constant Buffer 0 Read Length." |
*/ |
if (i >= num_bufs || !sizes[i]) { |
for (; i < 4; i++) { |
assert(i >= num_bufs || !sizes[i]); |
payload[2 + i] = 0; |
} |
break; |
} |
/* read lengths are in 256-bit units */ |
read_len = (sizes[i] + 31) / 32; |
/* the lower 5 bits are used for memory object control state */ |
assert(bufs[i] % 32 == 0); |
payload[i / 2] |= read_len << ((i % 2) ? 16 : 0); |
payload[2 + i] = bufs[i]; |
total_read_length += read_len; |
} |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 113: |
* |
* "The sum of all four read length fields must be less than or equal |
* to the size of 64" |
*/ |
assert(total_read_length <= 64); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = cmd | (cmd_len - 2); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[1] = payload[0]; |
dw[2] = payload[1]; |
dw[3] = payload[2]; |
dw[4] = 0; |
dw[5] = payload[3]; |
dw[6] = 0; |
dw[7] = payload[4]; |
dw[8] = 0; |
dw[9] = payload[5]; |
dw[10] = 0; |
} else { |
payload[2] |= builder->mocs << GEN7_CONSTANT_DW_ADDR_MOCS__SHIFT; |
memcpy(&dw[1], payload, sizeof(payload)); |
} |
} |
static inline void |
gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS, |
bufs, sizes, num_bufs); |
} |
static inline void |
gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, |
const uint32_t *bufs, const int *sizes, |
int num_bufs) |
{ |
gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS, |
bufs, sizes, num_bufs); |
} |
static inline uint32_t |
gen6_BINDING_TABLE_STATE(struct ilo_builder *builder, |
const uint32_t *surface_states, |
int num_surface_states) |
{ |
const int state_align = 32; |
const int state_len = num_surface_states; |
uint32_t state_offset, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 69: |
* |
* "It is stored as an array of up to 256 elements..." |
*/ |
assert(num_surface_states <= 256); |
if (!num_surface_states) |
return 0; |
state_offset = ilo_builder_surface_pointer(builder, |
ILO_BUILDER_ITEM_BINDING_TABLE, state_align, state_len, &dw); |
memcpy(dw, surface_states, state_len << 2); |
return state_offset; |
} |
static inline uint32_t |
gen6_SURFACE_STATE(struct ilo_builder *builder, |
const struct ilo_view_surface *surf, |
bool for_render) |
{ |
int state_align, state_len; |
uint32_t state_offset, *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
state_align = 64; |
state_len = 13; |
state_offset = ilo_builder_surface_pointer(builder, |
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw); |
memcpy(dw, surf->payload, state_len << 2); |
if (surf->bo) { |
const uint32_t mocs = (surf->scanout) ? |
(GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs; |
dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT; |
ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo, |
surf->payload[8], (for_render) ? INTEL_RELOC_WRITE : 0); |
} |
} else { |
state_align = 32; |
state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6; |
state_offset = ilo_builder_surface_pointer(builder, |
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw); |
memcpy(dw, surf->payload, state_len << 2); |
if (surf->bo) { |
/* |
* For scanouts, we should not enable caching in LLC. Since we only |
* enable that on Gen8+, we are fine here. |
*/ |
dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT; |
ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, |
surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0); |
} |
} |
return state_offset; |
} |
static inline uint32_t |
gen6_so_SURFACE_STATE(struct ilo_builder *builder, |
const struct pipe_stream_output_target *so, |
const struct pipe_stream_output_info *so_info, |
int so_index) |
{ |
struct ilo_buffer *buf = ilo_buffer(so->buffer); |
unsigned bo_offset, struct_size; |
enum pipe_format elem_format; |
struct ilo_view_surface surf; |
ILO_DEV_ASSERT(builder->dev, 6, 6); |
bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; |
struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; |
switch (so_info->output[so_index].num_components) { |
case 1: |
elem_format = PIPE_FORMAT_R32_FLOAT; |
break; |
case 2: |
elem_format = PIPE_FORMAT_R32G32_FLOAT; |
break; |
case 3: |
elem_format = PIPE_FORMAT_R32G32B32_FLOAT; |
break; |
case 4: |
elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; |
break; |
default: |
assert(!"unexpected SO components length"); |
elem_format = PIPE_FORMAT_R32_FLOAT; |
break; |
} |
ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset, |
so->buffer_size, struct_size, elem_format, false, true, &surf); |
return gen6_SURFACE_STATE(builder, &surf, false); |
} |
static inline uint32_t |
gen6_SAMPLER_STATE(struct ilo_builder *builder, |
const struct ilo_sampler_cso * const *samplers, |
const struct pipe_sampler_view * const *views, |
const uint32_t *sampler_border_colors, |
int num_samplers) |
{ |
const int state_align = 32; |
const int state_len = 4 * num_samplers; |
uint32_t state_offset, *dw; |
int i; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 101: |
* |
* "The sampler state is stored as an array of up to 16 elements..." |
*/ |
assert(num_samplers <= 16); |
if (!num_samplers) |
return 0; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 132: |
* |
* "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in |
* multiples of 4) the vertex shader 0 kernel uses. Used only for |
* prefetching the associated sampler state entries. |
* |
* It also applies to other shader stages. |
*/ |
ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4))); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); |
for (i = 0; i < num_samplers; i++) { |
const struct ilo_sampler_cso *sampler = samplers[i]; |
const struct pipe_sampler_view *view = views[i]; |
const uint32_t border_color = sampler_border_colors[i]; |
uint32_t dw_filter, dw_wrap; |
/* there may be holes */ |
if (!sampler || !view) { |
/* disabled sampler */ |
dw[0] = 1 << 31; |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw += 4; |
continue; |
} |
/* determine filter and wrap modes */ |
switch (view->texture->target) { |
case PIPE_TEXTURE_1D: |
dw_filter = (sampler->anisotropic) ? |
sampler->dw_filter_aniso : sampler->dw_filter; |
dw_wrap = sampler->dw_wrap_1d; |
break; |
case PIPE_TEXTURE_3D: |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 103: |
* |
* "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for |
* surfaces of type SURFTYPE_3D." |
*/ |
dw_filter = sampler->dw_filter; |
dw_wrap = sampler->dw_wrap; |
break; |
case PIPE_TEXTURE_CUBE: |
dw_filter = (sampler->anisotropic) ? |
sampler->dw_filter_aniso : sampler->dw_filter; |
dw_wrap = sampler->dw_wrap_cube; |
break; |
default: |
dw_filter = (sampler->anisotropic) ? |
sampler->dw_filter_aniso : sampler->dw_filter; |
dw_wrap = sampler->dw_wrap; |
break; |
} |
dw[0] = sampler->payload[0]; |
dw[1] = sampler->payload[1]; |
assert(!(border_color & 0x1f)); |
dw[2] = border_color; |
dw[3] = sampler->payload[2]; |
dw[0] |= dw_filter; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { |
dw[3] |= dw_wrap; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 21: |
* |
* "[DevSNB] Errata: Incorrect behavior is observed in cases |
* where the min and mag mode filters are different and |
* SurfMinLOD is nonzero. The determination of MagMode uses the |
* following equation instead of the one in the above |
* pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" |
* |
* As a way to work around that, we set Base to |
* view->u.tex.first_level. |
*/ |
dw[0] |= view->u.tex.first_level << 22; |
dw[1] |= dw_wrap; |
} |
dw += 4; |
} |
return state_offset; |
} |
static inline uint32_t |
gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, |
const struct ilo_sampler_cso *sampler) |
{ |
const int state_align = |
(ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 64 : 32; |
const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(Elements(sampler->payload) >= 3 + state_len); |
/* see ilo_gpe_init_sampler_cso() */ |
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB, |
state_align, state_len, &sampler->payload[3]); |
} |
static inline uint32_t |
gen6_push_constant_buffer(struct ilo_builder *builder, |
int size, void **pcb) |
{ |
/* |
* For all VS, GS, FS, and CS push constant buffers, they must be aligned |
* to 32 bytes, and their sizes are specified in 256-bit units. |
*/ |
const int state_align = 32; |
const int state_len = align(size, 32) / 4; |
uint32_t state_offset; |
char *buf; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf); |
/* zero out the unused range */ |
if (size < state_len * 4) |
memset(&buf[size], 0, state_len * 4 - size); |
if (pcb) |
*pcb = buf; |
return state_offset; |
} |
static inline uint32_t |
gen6_user_vertex_buffer(struct ilo_builder *builder, |
int size, const void *vertices) |
{ |
const int state_align = 8; |
const int state_len = size / 4; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
assert(size % 4 == 0); |
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB, |
state_align, state_len, vertices); |
} |
#endif /* ILO_BUILDER_3D_TOP_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_blt.h |
---|
0,0 → 1,322 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_BLT_H |
#define ILO_BUILDER_BLT_H |
#include "genhw/genhw.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_builder.h" |
enum gen6_blt_mask { |
GEN6_BLT_MASK_8, |
GEN6_BLT_MASK_16, |
GEN6_BLT_MASK_32, |
GEN6_BLT_MASK_32_LO, |
GEN6_BLT_MASK_32_HI, |
}; |
struct gen6_blt_bo { |
struct intel_bo *bo; |
uint32_t offset; |
int16_t pitch; |
}; |
struct gen6_blt_xy_bo { |
struct intel_bo *bo; |
uint32_t offset; |
int16_t pitch; |
enum gen_surface_tiling tiling; |
int16_t x, y; |
}; |
/* |
* From the Sandy Bridge PRM, volume 1 part 5, page 7: |
* |
* "The BLT engine is capable of transferring very large quantities of |
* graphics data. Any graphics data read from and written to the |
* destination is permitted to represent a number of pixels that occupies |
* up to 65,536 scan lines and up to 32,768 bytes per scan line at the |
* destination. The maximum number of pixels that may be represented per |
* scan line's worth of graphics data depends on the color depth." |
*/ |
static const int gen6_blt_max_bytes_per_scanline = 32768; |
static const int gen6_blt_max_scanlines = 65536; |
static inline uint32_t |
gen6_blt_translate_value_mask(enum gen6_blt_mask value_mask) |
{ |
switch (value_mask) { |
case GEN6_BLT_MASK_8: return GEN6_BLITTER_BR13_FORMAT_8; |
case GEN6_BLT_MASK_16: return GEN6_BLITTER_BR13_FORMAT_565; |
default: return GEN6_BLITTER_BR13_FORMAT_8888; |
} |
} |
static inline uint32_t |
gen6_blt_translate_value_cpp(enum gen6_blt_mask value_mask) |
{ |
switch (value_mask) { |
case GEN6_BLT_MASK_8: return 1; |
case GEN6_BLT_MASK_16: return 2; |
default: return 4; |
} |
} |
static inline uint32_t |
gen6_blt_translate_write_mask(enum gen6_blt_mask write_mask) |
{ |
switch (write_mask) { |
case GEN6_BLT_MASK_32: return GEN6_BLITTER_BR00_WRITE_RGB | |
GEN6_BLITTER_BR00_WRITE_A; |
case GEN6_BLT_MASK_32_LO: return GEN6_BLITTER_BR00_WRITE_RGB; |
case GEN6_BLT_MASK_32_HI: return GEN6_BLITTER_BR00_WRITE_A; |
default: return 0; |
} |
} |
static inline void |
gen6_COLOR_BLT(struct ilo_builder *builder, |
const struct gen6_blt_bo *dst, uint32_t pattern, |
uint16_t width, uint16_t height, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 6 : 5; |
const int cpp = gen6_blt_translate_value_cpp(value_mask); |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(width < gen6_blt_max_bytes_per_scanline); |
assert(height < gen6_blt_max_scanlines); |
/* offsets are naturally aligned and pitches are dword-aligned */ |
assert(dst->offset % cpp == 0 && dst->pitch % 4 == 0); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_BLITTER_CMD(COLOR_BLT) | |
gen6_blt_translate_write_mask(write_mask) | |
(cmd_len - 2); |
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT | |
gen6_blt_translate_value_mask(value_mask) | |
dst->pitch; |
dw[2] = height << 16 | width; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[5] = pattern; |
ilo_builder_batch_reloc64(builder, pos + 3, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
} else { |
dw[4] = pattern; |
ilo_builder_batch_reloc(builder, pos + 3, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
} |
} |
static inline void |
gen6_XY_COLOR_BLT(struct ilo_builder *builder, |
const struct gen6_blt_xy_bo *dst, uint32_t pattern, |
uint16_t width, uint16_t height, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 7 : 6; |
const int cpp = gen6_blt_translate_value_cpp(value_mask); |
int dst_align = 4, dst_pitch_shift = 0; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(width * cpp < gen6_blt_max_bytes_per_scanline); |
assert(height < gen6_blt_max_scanlines); |
/* INT16_MAX */ |
assert(dst->x + width <= 32767 && dst->y + height <= 32767); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_BLITTER_CMD(XY_COLOR_BLT) | |
gen6_blt_translate_write_mask(write_mask) | |
(cmd_len - 2); |
if (dst->tiling != GEN6_TILING_NONE) { |
dw[0] |= GEN6_BLITTER_BR00_DST_TILED; |
assert(dst->tiling == GEN6_TILING_X || dst->tiling == GEN6_TILING_Y); |
dst_align = (dst->tiling == GEN6_TILING_Y) ? 128 : 512; |
/* in dwords when tiled */ |
dst_pitch_shift = 2; |
} |
assert(dst->offset % dst_align == 0 && dst->pitch % dst_align == 0); |
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT | |
gen6_blt_translate_value_mask(value_mask) | |
dst->pitch >> dst_pitch_shift; |
dw[2] = dst->y << 16 | dst->x; |
dw[3] = (dst->y + height) << 16 | (dst->x + width); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[6] = pattern; |
ilo_builder_batch_reloc64(builder, pos + 4, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
} else { |
dw[5] = pattern; |
ilo_builder_batch_reloc(builder, pos + 4, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
} |
} |
static inline void |
gen6_SRC_COPY_BLT(struct ilo_builder *builder, |
const struct gen6_blt_bo *dst, |
const struct gen6_blt_bo *src, |
uint16_t width, uint16_t height, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 6; |
const int cpp = gen6_blt_translate_value_cpp(value_mask); |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(width < gen6_blt_max_bytes_per_scanline); |
assert(height < gen6_blt_max_scanlines); |
/* offsets are naturally aligned and pitches are dword-aligned */ |
assert(dst->offset % cpp == 0 && dst->pitch % 4 == 0); |
assert(src->offset % cpp == 0 && src->pitch % 4 == 0); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_BLITTER_CMD(SRC_COPY_BLT) | |
gen6_blt_translate_write_mask(write_mask) | |
(cmd_len - 2); |
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT | |
gen6_blt_translate_value_mask(value_mask) | |
dst->pitch; |
dw[2] = height << 16 | width; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[5] = src->pitch; |
ilo_builder_batch_reloc64(builder, pos + 3, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
ilo_builder_batch_reloc64(builder, pos + 6, src->bo, src->offset, 0); |
} else { |
dw[4] = src->pitch; |
ilo_builder_batch_reloc(builder, pos + 3, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
ilo_builder_batch_reloc(builder, pos + 5, src->bo, src->offset, 0); |
} |
} |
static inline void |
gen6_XY_SRC_COPY_BLT(struct ilo_builder *builder, |
const struct gen6_blt_xy_bo *dst, |
const struct gen6_blt_xy_bo *src, |
uint16_t width, uint16_t height, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 8; |
const int cpp = gen6_blt_translate_value_cpp(value_mask); |
int dst_align = 4, dst_pitch_shift = 0; |
int src_align = 4, src_pitch_shift = 0; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(width * cpp < gen6_blt_max_bytes_per_scanline); |
assert(height < gen6_blt_max_scanlines); |
/* INT16_MAX */ |
assert(dst->x + width <= 32767 && dst->y + height <= 32767); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_BLITTER_CMD(XY_SRC_COPY_BLT) | |
gen6_blt_translate_write_mask(write_mask) | |
(cmd_len - 2); |
if (dst->tiling != GEN6_TILING_NONE) { |
dw[0] |= GEN6_BLITTER_BR00_DST_TILED; |
assert(dst->tiling == GEN6_TILING_X || dst->tiling == GEN6_TILING_Y); |
dst_align = (dst->tiling == GEN6_TILING_Y) ? 128 : 512; |
/* in dwords when tiled */ |
dst_pitch_shift = 2; |
} |
if (src->tiling != GEN6_TILING_NONE) { |
dw[0] |= GEN6_BLITTER_BR00_SRC_TILED; |
assert(src->tiling == GEN6_TILING_X || src->tiling == GEN6_TILING_Y); |
src_align = (src->tiling == GEN6_TILING_Y) ? 128 : 512; |
/* in dwords when tiled */ |
src_pitch_shift = 2; |
} |
assert(dst->offset % dst_align == 0 && dst->pitch % dst_align == 0); |
assert(src->offset % src_align == 0 && src->pitch % src_align == 0); |
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT | |
gen6_blt_translate_value_mask(value_mask) | |
dst->pitch >> dst_pitch_shift; |
dw[2] = dst->y << 16 | dst->x; |
dw[3] = (dst->y + height) << 16 | (dst->x + width); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[6] = src->y << 16 | src->x; |
dw[7] = src->pitch >> src_pitch_shift; |
ilo_builder_batch_reloc64(builder, pos + 4, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
ilo_builder_batch_reloc64(builder, pos + 8, src->bo, src->offset, 0); |
} else { |
dw[5] = src->y << 16 | src->x; |
dw[6] = src->pitch >> src_pitch_shift; |
ilo_builder_batch_reloc(builder, pos + 4, |
dst->bo, dst->offset, INTEL_RELOC_WRITE); |
ilo_builder_batch_reloc(builder, pos + 7, src->bo, src->offset, 0); |
} |
} |
#endif /* ILO_BUILDER_BLT_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_decode.c |
---|
0,0 → 1,685 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include <stdio.h> |
#include <stdarg.h> |
#include "genhw/genhw.h" |
#include "../shader/toy_compiler.h" |
#include "intel_winsys.h" |
#include "ilo_builder.h" |
static const uint32_t * |
writer_pointer(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned offset) |
{ |
const struct ilo_builder_writer *writer = &builder->writers[which]; |
return (const uint32_t *) ((const char *) writer->ptr + offset); |
} |
static uint32_t _util_printf_format(5, 6) |
writer_dw(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
unsigned offset, unsigned dw_index, |
const char *format, ...) |
{ |
const uint32_t *dw = writer_pointer(builder, which, offset); |
va_list ap; |
char desc[16]; |
int len; |
ilo_printf("0x%08x: 0x%08x: ", |
offset + (dw_index << 2), dw[dw_index]); |
va_start(ap, format); |
len = vsnprintf(desc, sizeof(desc), format, ap); |
va_end(ap); |
if (len >= sizeof(desc)) { |
len = sizeof(desc) - 1; |
desc[len] = '\0'; |
} |
if (desc[len - 1] == '\n') { |
desc[len - 1] = '\0'; |
ilo_printf("%8s: \n", desc); |
} else { |
ilo_printf("%8s: ", desc); |
} |
return dw[dw_index]; |
} |
static void |
writer_decode_blob(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t); |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i += 4) { |
const uint32_t *dw = writer_pointer(builder, which, offset); |
writer_dw(builder, which, offset, 0, "BLOB%d", i / 4); |
switch (count - i) { |
case 1: |
ilo_printf("(%10.4f, %10c, %10c, %10c) " |
"(0x%08x, %10c, %10c, %10c)\n", |
uif(dw[0]), 'X', 'X', 'X', |
dw[0], 'X', 'X', 'X'); |
break; |
case 2: |
ilo_printf("(%10.4f, %10.4f, %10c, %10c) " |
"(0x%08x, 0x%08x, %10c, %10c)\n", |
uif(dw[0]), uif(dw[1]), 'X', 'X', |
dw[0], dw[1], 'X', 'X'); |
break; |
case 3: |
ilo_printf("(%10.4f, %10.4f, %10.4f, %10c) " |
"(0x%08x, 0x%08x, 0x%08x, %10c)\n", |
uif(dw[0]), uif(dw[1]), uif(dw[2]), 'X', |
dw[0], dw[1], dw[2], 'X'); |
break; |
default: |
ilo_printf("(%10.4f, %10.4f, %10.4f, %10.4f) " |
"(0x%08x, 0x%08x, 0x%08x, 0x%08x)\n", |
uif(dw[0]), uif(dw[1]), uif(dw[2]), uif(dw[3]), |
dw[0], dw[1], dw[2], dw[3]); |
break; |
} |
offset += state_size * 4; |
} |
} |
static void |
writer_decode_clip_viewport(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 4; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
uint32_t dw; |
dw = writer_dw(builder, which, offset, 0, "CLIP VP%d", i); |
ilo_printf("xmin = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 1, "CLIP VP%d", i); |
ilo_printf("xmax = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 2, "CLIP VP%d", i); |
ilo_printf("ymin = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 3, "CLIP VP%d", i); |
ilo_printf("ymax = %f\n", uif(dw)); |
offset += state_size; |
} |
} |
static void |
writer_decode_sf_clip_viewport_gen7(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 16; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
uint32_t dw; |
dw = writer_dw(builder, which, offset, 0, "SF_CLIP VP%d", i); |
ilo_printf("m00 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 1, "SF_CLIP VP%d", i); |
ilo_printf("m11 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 2, "SF_CLIP VP%d", i); |
ilo_printf("m22 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 3, "SF_CLIP VP%d", i); |
ilo_printf("m30 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 4, "SF_CLIP VP%d", i); |
ilo_printf("m31 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 5, "SF_CLIP VP%d", i); |
ilo_printf("m32 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 8, "SF_CLIP VP%d", i); |
ilo_printf("guardband xmin = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 9, "SF_CLIP VP%d", i); |
ilo_printf("guardband xmax = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 10, "SF_CLIP VP%d", i); |
ilo_printf("guardband ymin = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 11, "SF_CLIP VP%d", i); |
ilo_printf("guardband ymax = %f\n", uif(dw)); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw = writer_dw(builder, which, offset, 12, "SF_CLIP VP%d", i); |
ilo_printf("extent xmin = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 13, "SF_CLIP VP%d", i); |
ilo_printf("extent xmax = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 14, "SF_CLIP VP%d", i); |
ilo_printf("extent ymin = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 15, "SF_CLIP VP%d", i); |
ilo_printf("extent ymax = %f\n", uif(dw)); |
} |
offset += state_size; |
} |
} |
static void |
writer_decode_sf_viewport_gen6(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 8; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
uint32_t dw; |
dw = writer_dw(builder, which, offset, 0, "SF VP%d", i); |
ilo_printf("m00 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 1, "SF VP%d", i); |
ilo_printf("m11 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 2, "SF VP%d", i); |
ilo_printf("m22 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 3, "SF VP%d", i); |
ilo_printf("m30 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 4, "SF VP%d", i); |
ilo_printf("m31 = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 5, "SF VP%d", i); |
ilo_printf("m32 = %f\n", uif(dw)); |
offset += state_size; |
} |
} |
static void |
writer_decode_sf_viewport(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
writer_decode_sf_clip_viewport_gen7(builder, which, item); |
else |
writer_decode_sf_viewport_gen6(builder, which, item); |
} |
static void |
writer_decode_scissor_rect(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 2; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
uint32_t dw; |
dw = writer_dw(builder, which, offset, 0, "SCISSOR%d", i); |
ilo_printf("xmin %d, ymin %d\n", |
GEN_EXTRACT(dw, GEN6_SCISSOR_DW0_MIN_X), |
GEN_EXTRACT(dw, GEN6_SCISSOR_DW0_MIN_Y)); |
dw = writer_dw(builder, which, offset, 1, "SCISSOR%d", i); |
ilo_printf("xmax %d, ymax %d\n", |
GEN_EXTRACT(dw, GEN6_SCISSOR_DW1_MAX_X), |
GEN_EXTRACT(dw, GEN6_SCISSOR_DW1_MAX_Y)); |
offset += state_size; |
} |
} |
static void |
writer_decode_cc_viewport(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 2; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
uint32_t dw; |
dw = writer_dw(builder, which, offset, 0, "CC VP%d", i); |
ilo_printf("min_depth = %f\n", uif(dw)); |
dw = writer_dw(builder, which, offset, 1, "CC VP%d", i); |
ilo_printf("max_depth = %f\n", uif(dw)); |
offset += state_size; |
} |
} |
static void |
writer_decode_color_calc(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
uint32_t dw; |
dw = writer_dw(builder, which, item->offset, 0, "CC"); |
ilo_printf("alpha test format %s, round disable %d, " |
"stencil ref %d, bf stencil ref %d\n", |
GEN_EXTRACT(dw, GEN6_CC_DW0_ALPHATEST) ? "FLOAT32" : "UNORM8", |
(bool) (dw & GEN6_CC_DW0_ROUND_DISABLE_DISABLE), |
GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL0_REF), |
GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL1_REF)); |
writer_dw(builder, which, item->offset, 1, "CC\n"); |
dw = writer_dw(builder, which, item->offset, 2, "CC"); |
ilo_printf("constant red %f\n", uif(dw)); |
dw = writer_dw(builder, which, item->offset, 3, "CC"); |
ilo_printf("constant green %f\n", uif(dw)); |
dw = writer_dw(builder, which, item->offset, 4, "CC"); |
ilo_printf("constant blue %f\n", uif(dw)); |
dw = writer_dw(builder, which, item->offset, 5, "CC"); |
ilo_printf("constant alpha %f\n", uif(dw)); |
} |
static void |
writer_decode_depth_stencil(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
uint32_t dw; |
dw = writer_dw(builder, which, item->offset, 0, "D_S"); |
ilo_printf("stencil %sable, func %d, write %sable\n", |
(dw & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) ? "en" : "dis", |
GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL0_FUNC), |
(dw & GEN6_ZS_DW0_STENCIL_WRITE_ENABLE) ? "en" : "dis"); |
dw = writer_dw(builder, which, item->offset, 1, "D_S"); |
ilo_printf("stencil test mask 0x%x, write mask 0x%x\n", |
GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_VALUEMASK), |
GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_WRITEMASK)); |
dw = writer_dw(builder, which, item->offset, 2, "D_S"); |
ilo_printf("depth test %sable, func %d, write %sable\n", |
(dw & GEN6_ZS_DW2_DEPTH_TEST_ENABLE) ? "en" : "dis", |
GEN_EXTRACT(dw, GEN6_ZS_DW2_DEPTH_FUNC), |
(dw & GEN6_ZS_DW2_DEPTH_WRITE_ENABLE) ? "en" : "dis"); |
} |
static void |
writer_decode_blend(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 2; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
writer_dw(builder, which, offset, 0, "BLEND\n"); |
offset += 4; |
} |
for (i = 0; i < count; i++) { |
writer_dw(builder, which, offset, 0, "BLEND%d\n", i); |
writer_dw(builder, which, offset, 1, "BLEND%d\n", i); |
offset += state_size; |
} |
} |
static void |
writer_decode_sampler(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 4; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
writer_dw(builder, which, offset, 0, "WM SAMP%d", i); |
ilo_printf("filtering\n"); |
writer_dw(builder, which, offset, 1, "WM SAMP%d", i); |
ilo_printf("wrapping, lod\n"); |
writer_dw(builder, which, offset, 2, "WM SAMP%d", i); |
ilo_printf("default color pointer\n"); |
writer_dw(builder, which, offset, 3, "WM SAMP%d", i); |
ilo_printf("chroma key, aniso\n"); |
offset += state_size; |
} |
} |
static void |
writer_decode_interface_descriptor(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 8; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
writer_dw(builder, which, offset, 0, "IDRT[%d]", i); |
ilo_printf("kernel\n"); |
writer_dw(builder, which, offset, 1, "IDRT[%d]", i); |
ilo_printf("spf, fp mode\n"); |
writer_dw(builder, which, offset, 2, "IDRT[%d]", i); |
ilo_printf("sampler\n"); |
writer_dw(builder, which, offset, 3, "IDRT[%d]", i); |
ilo_printf("binding table\n"); |
writer_dw(builder, which, offset, 4, "IDRT[%d]", i); |
ilo_printf("curbe read len\n"); |
writer_dw(builder, which, offset, 5, "IDRT[%d]", i); |
ilo_printf("rounding mode, slm size\n"); |
writer_dw(builder, which, offset, 6, "IDRT[%d]", i); |
ilo_printf("cross-thread curbe read len\n"); |
writer_dw(builder, which, offset, 7, "IDRT[%d]", i); |
ilo_printf("mbz\n"); |
offset += state_size; |
} |
} |
static void |
writer_decode_surface_gen7(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
uint32_t dw; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw = writer_dw(builder, which, item->offset, 0, "SURF"); |
ilo_printf("type 0x%x, format 0x%x, tiling %d, %s array\n", |
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_TYPE), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_FORMAT), |
GEN_EXTRACT(dw, GEN8_SURFACE_DW0_TILING), |
(dw & GEN7_SURFACE_DW0_IS_ARRAY) ? "is" : "not"); |
writer_dw(builder, which, item->offset, 1, "SURF"); |
ilo_printf("qpitch\n"); |
} else { |
dw = writer_dw(builder, which, item->offset, 0, "SURF"); |
ilo_printf("type 0x%x, format 0x%x, tiling %d, %s array\n", |
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_TYPE), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_FORMAT), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_TILING), |
(dw & GEN7_SURFACE_DW0_IS_ARRAY) ? "is" : "not"); |
writer_dw(builder, which, item->offset, 1, "SURF"); |
ilo_printf("offset\n"); |
} |
dw = writer_dw(builder, which, item->offset, 2, "SURF"); |
ilo_printf("%dx%d size\n", |
GEN_EXTRACT(dw, GEN7_SURFACE_DW2_WIDTH), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW2_HEIGHT)); |
dw = writer_dw(builder, which, item->offset, 3, "SURF"); |
ilo_printf("depth %d, pitch %d\n", |
GEN_EXTRACT(dw, GEN7_SURFACE_DW3_DEPTH), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW3_PITCH)); |
dw = writer_dw(builder, which, item->offset, 4, "SURF"); |
ilo_printf("min array element %d, array extent %d\n", |
GEN_EXTRACT(dw, GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW4_RT_VIEW_EXTENT)); |
dw = writer_dw(builder, which, item->offset, 5, "SURF"); |
ilo_printf("mip base %d, mips %d, x,y offset: %d,%d\n", |
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_MIN_LOD), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_MIP_COUNT_LOD), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_X_OFFSET), |
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_Y_OFFSET)); |
writer_dw(builder, which, item->offset, 6, "SURF\n"); |
writer_dw(builder, which, item->offset, 7, "SURF\n"); |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
writer_dw(builder, which, item->offset, 8, "SURF\n"); |
writer_dw(builder, which, item->offset, 9, "SURF\n"); |
writer_dw(builder, which, item->offset, 10, "SURF\n"); |
writer_dw(builder, which, item->offset, 11, "SURF\n"); |
writer_dw(builder, which, item->offset, 12, "SURF\n"); |
} |
} |
static void |
writer_decode_surface_gen6(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
uint32_t dw; |
dw = writer_dw(builder, which, item->offset, 0, "SURF"); |
ilo_printf("type 0x%x, format 0x%x\n", |
GEN_EXTRACT(dw, GEN6_SURFACE_DW0_TYPE), |
GEN_EXTRACT(dw, GEN6_SURFACE_DW0_FORMAT)); |
writer_dw(builder, which, item->offset, 1, "SURF"); |
ilo_printf("offset\n"); |
dw = writer_dw(builder, which, item->offset, 2, "SURF"); |
ilo_printf("%dx%d size, %d mips\n", |
GEN_EXTRACT(dw, GEN6_SURFACE_DW2_WIDTH), |
GEN_EXTRACT(dw, GEN6_SURFACE_DW2_HEIGHT), |
GEN_EXTRACT(dw, GEN6_SURFACE_DW2_MIP_COUNT_LOD)); |
dw = writer_dw(builder, which, item->offset, 3, "SURF"); |
ilo_printf("pitch %d, tiling %d\n", |
GEN_EXTRACT(dw, GEN6_SURFACE_DW3_PITCH), |
GEN_EXTRACT(dw, GEN6_SURFACE_DW3_TILING)); |
dw = writer_dw(builder, which, item->offset, 4, "SURF"); |
ilo_printf("mip base %d\n", |
GEN_EXTRACT(dw, GEN6_SURFACE_DW4_MIN_LOD)); |
dw = writer_dw(builder, which, item->offset, 5, "SURF"); |
ilo_printf("x,y offset: %d,%d\n", |
GEN_EXTRACT(dw, GEN6_SURFACE_DW5_X_OFFSET), |
GEN_EXTRACT(dw, GEN6_SURFACE_DW5_Y_OFFSET)); |
} |
static void |
writer_decode_surface(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
writer_decode_surface_gen7(builder, which, item); |
else |
writer_decode_surface_gen6(builder, which, item); |
} |
static void |
writer_decode_binding_table(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const unsigned state_size = sizeof(uint32_t) * 1; |
const unsigned count = item->size / state_size; |
unsigned offset = item->offset; |
unsigned i; |
for (i = 0; i < count; i++) { |
writer_dw(builder, which, offset, 0, "BIND"); |
ilo_printf("BINDING_TABLE_STATE[%d]\n", i); |
offset += state_size; |
} |
} |
static void |
writer_decode_kernel(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item) |
{ |
const void *kernel; |
ilo_printf("0x%08x:\n", item->offset); |
kernel = (const void *) writer_pointer(builder, which, item->offset); |
toy_compiler_disassemble(builder->dev, kernel, item->size, true); |
} |
static const struct { |
void (*func)(const struct ilo_builder *builder, |
enum ilo_builder_writer_type which, |
const struct ilo_builder_item *item); |
} writer_decode_table[ILO_BUILDER_ITEM_COUNT] = { |
[ILO_BUILDER_ITEM_BLOB] = { writer_decode_blob }, |
[ILO_BUILDER_ITEM_CLIP_VIEWPORT] = { writer_decode_clip_viewport }, |
[ILO_BUILDER_ITEM_SF_VIEWPORT] = { writer_decode_sf_viewport }, |
[ILO_BUILDER_ITEM_SCISSOR_RECT] = { writer_decode_scissor_rect }, |
[ILO_BUILDER_ITEM_CC_VIEWPORT] = { writer_decode_cc_viewport }, |
[ILO_BUILDER_ITEM_COLOR_CALC] = { writer_decode_color_calc }, |
[ILO_BUILDER_ITEM_DEPTH_STENCIL] = { writer_decode_depth_stencil }, |
[ILO_BUILDER_ITEM_BLEND] = { writer_decode_blend }, |
[ILO_BUILDER_ITEM_SAMPLER] = { writer_decode_sampler }, |
[ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR] = { writer_decode_interface_descriptor }, |
[ILO_BUILDER_ITEM_SURFACE] = { writer_decode_surface }, |
[ILO_BUILDER_ITEM_BINDING_TABLE] = { writer_decode_binding_table }, |
[ILO_BUILDER_ITEM_KERNEL] = { writer_decode_kernel }, |
}; |
static void |
ilo_builder_writer_decode_items(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
int i; |
if (!writer->item_used) |
return; |
writer->ptr = intel_bo_map(writer->bo, false); |
if (!writer->ptr) |
return; |
for (i = 0; i < writer->item_used; i++) { |
const struct ilo_builder_item *item = &writer->items[i]; |
writer_decode_table[item->type].func(builder, which, item); |
} |
intel_bo_unmap(writer->bo); |
writer->ptr = NULL; |
} |
static void |
ilo_builder_writer_decode(struct ilo_builder *builder, |
enum ilo_builder_writer_type which) |
{ |
struct ilo_builder_writer *writer = &builder->writers[which]; |
assert(writer->bo && !writer->ptr); |
switch (which) { |
case ILO_BUILDER_WRITER_BATCH: |
ilo_printf("decoding batch buffer: %d bytes\n", writer->used); |
if (writer->used) |
intel_winsys_decode_bo(builder->winsys, writer->bo, writer->used); |
ilo_printf("decoding dynamic/surface buffer: %d states\n", |
writer->item_used); |
ilo_builder_writer_decode_items(builder, which); |
break; |
case ILO_BUILDER_WRITER_INSTRUCTION: |
if (true) { |
ilo_printf("skipping instruction buffer: %d kernels\n", |
writer->item_used); |
} else { |
ilo_printf("decoding instruction buffer: %d kernels\n", |
writer->item_used); |
ilo_builder_writer_decode_items(builder, which); |
} |
break; |
default: |
break; |
} |
} |
/** |
* Decode the builder according to the recorded items. This can be called |
* only after a successful ilo_builder_end(). |
*/ |
void |
ilo_builder_decode(struct ilo_builder *builder) |
{ |
int i; |
assert(!builder->unrecoverable_error); |
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) |
ilo_builder_writer_decode(builder, i); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_media.h |
---|
0,0 → 1,277 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_MEDIA_H |
#define ILO_BUILDER_MEDIA_H |
#include "genhw/genhw.h" |
#include "../ilo_shader.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_builder.h" |
struct gen6_idrt_data { |
const struct ilo_shader_state *cs; |
uint32_t sampler_offset; |
uint32_t binding_table_offset; |
unsigned curbe_size; |
unsigned thread_group_size; |
}; |
static inline void |
gen6_MEDIA_VFE_STATE(struct ilo_builder *builder, |
unsigned curbe_alloc, bool use_slm) |
{ |
const uint8_t cmd_len = 8; |
const unsigned idrt_alloc = |
((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32; |
int max_threads; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
max_threads = builder->dev->thread_count; |
curbe_alloc = align(curbe_alloc, 32); |
assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1)); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2); |
dw[1] = 0; /* scratch */ |
dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | |
0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER | |
GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) |
dw[2] |= GEN7_VFE_DW2_GPGPU_MODE; |
dw[3] = 0; |
dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT | |
(curbe_alloc / 32); |
dw[5] = 0; |
dw[6] = 0; |
dw[7] = 0; |
} |
static inline void |
gen6_MEDIA_CURBE_LOAD(struct ilo_builder *builder, |
uint32_t offset, unsigned size) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
assert(offset % 32 == 0 && size % 32 == 0); |
/* GPU hangs if size is zero */ |
assert(size); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_CURBE_LOAD) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = size; |
dw[3] = offset; |
} |
static inline void |
gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(struct ilo_builder *builder, |
uint32_t offset, unsigned size) |
{ |
const uint8_t cmd_len = 4; |
const unsigned idrt_alloc = |
((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
assert(offset % 32 == 0 && size % 32 == 0); |
assert(size && size <= idrt_alloc); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_INTERFACE_DESCRIPTOR_LOAD) | |
(cmd_len - 2); |
dw[1] = 0; |
dw[2] = size; |
dw[3] = offset; |
} |
static inline void |
gen6_MEDIA_STATE_FLUSH(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_STATE_FLUSH) | (cmd_len - 2); |
dw[1] = 0; |
} |
static inline void |
gen7_GPGPU_WALKER(struct ilo_builder *builder, |
const unsigned thread_group_offset[3], |
const unsigned thread_group_dim[3], |
unsigned thread_group_size, |
unsigned simd_size) |
{ |
const uint8_t cmd_len = 11; |
uint32_t right_execmask, bottom_execmask; |
unsigned thread_count; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
assert(simd_size == 16 || simd_size == 8); |
thread_count = (thread_group_size + simd_size - 1) / simd_size; |
assert(thread_count <= 64); |
right_execmask = thread_group_size % simd_size; |
if (right_execmask) |
right_execmask = (1 << right_execmask) - 1; |
else |
right_execmask = (1 << simd_size) - 1; |
bottom_execmask = 0xffffffff; |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_RENDER_CMD(MEDIA, GPGPU_WALKER) | (cmd_len - 2); |
dw[1] = 0; /* always first IDRT */ |
dw[2] = (thread_count - 1) << GEN7_GPGPU_DW2_THREAD_MAX_X__SHIFT; |
if (simd_size == 16) |
dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD16; |
else |
dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD8; |
dw[3] = thread_group_offset[0]; |
dw[4] = thread_group_dim[0]; |
dw[5] = thread_group_offset[1]; |
dw[6] = thread_group_dim[1]; |
dw[7] = thread_group_offset[2]; |
dw[8] = thread_group_dim[2]; |
dw[9] = right_execmask; |
dw[10] = bottom_execmask; |
} |
static inline uint32_t |
gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder, |
const struct gen6_idrt_data *data, |
int idrt_count) |
{ |
/* |
* From the Sandy Bridge PRM, volume 2 part 2, page 34: |
* |
* "(Interface Descriptor Total Length) This field must have the same |
* alignment as the Interface Descriptor Data Start Address. |
* |
* It must be DQWord (32-byte) aligned..." |
* |
* From the Sandy Bridge PRM, volume 2 part 2, page 35: |
* |
* "(Interface Descriptor Data Start Address) Specifies the 32-byte |
* aligned address of the Interface Descriptor data." |
*/ |
const int state_align = 32; |
const int state_len = (32 / 4) * idrt_count; |
uint32_t state_offset, *dw; |
int i; |
ILO_DEV_ASSERT(builder->dev, 7, 7.5); |
state_offset = ilo_builder_dynamic_pointer(builder, |
ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw); |
for (i = 0; i < idrt_count; i++) { |
const struct gen6_idrt_data *idrt = &data[i]; |
const struct ilo_shader_state *cs = idrt->cs; |
unsigned sampler_count, bt_size, slm_size; |
sampler_count = |
ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT); |
assert(sampler_count <= 16); |
sampler_count = (sampler_count + 3) / 4; |
bt_size = |
ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT); |
if (bt_size > 31) |
bt_size = 31; |
slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE); |
assert(idrt->curbe_size / 32 <= 63); |
dw[0] = ilo_shader_get_kernel_offset(idrt->cs); |
dw[1] = 0; |
dw[2] = idrt->sampler_offset | |
sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT; |
dw[3] = idrt->binding_table_offset | |
bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT; |
dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT | |
0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { |
dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE; |
if (slm_size) { |
assert(slm_size <= 64 * 1024); |
slm_size = util_next_power_of_two((slm_size + 4095) / 4096); |
dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE | |
slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT | |
idrt->thread_group_size << |
GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT; |
} |
} else { |
dw[5] = 0; |
} |
dw[6] = 0; |
dw[7] = 0; |
dw += 8; |
} |
return state_offset; |
} |
#endif /* ILO_BUILDER_MEDIA_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_mi.h |
---|
0,0 → 1,220 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_MI_H |
#define ILO_BUILDER_MI_H |
#include "genhw/genhw.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_builder.h" |
static inline void |
gen6_MI_STORE_DATA_IMM(struct ilo_builder *builder, |
struct intel_bo *bo, uint32_t bo_offset, |
uint64_t val) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 6 : 5; |
uint32_t reloc_flags = INTEL_RELOC_WRITE; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(bo_offset % 8 == 0); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_MI_CMD(MI_STORE_DATA_IMM) | (cmd_len - 2); |
/* must use GGTT on GEN6 as in PIPE_CONTROL */ |
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) { |
dw[0] |= GEN6_MI_STORE_DATA_IMM_DW0_USE_GGTT; |
reloc_flags |= INTEL_RELOC_GGTT; |
} |
dw[1] = 0; /* MBZ */ |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[4] = (uint32_t) val; |
dw[5] = (uint32_t) (val >> 32); |
ilo_builder_batch_reloc64(builder, pos + 2, bo, bo_offset, reloc_flags); |
} else { |
dw[3] = (uint32_t) val; |
dw[4] = (uint32_t) (val >> 32); |
ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, reloc_flags); |
} |
} |
static inline void |
gen6_MI_LOAD_REGISTER_IMM(struct ilo_builder *builder, |
uint32_t reg, uint32_t val) |
{ |
const uint8_t cmd_len = 3; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(reg % 4 == 0); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_MI_CMD(MI_LOAD_REGISTER_IMM) | (cmd_len - 2); |
dw[1] = reg; |
dw[2] = val; |
} |
static inline void |
gen6_MI_STORE_REGISTER_MEM(struct ilo_builder *builder, uint32_t reg, |
struct intel_bo *bo, uint32_t bo_offset) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 3; |
uint32_t reloc_flags = INTEL_RELOC_WRITE; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
assert(reg % 4 == 0 && bo_offset % 4 == 0); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_MI_CMD(MI_STORE_REGISTER_MEM) | (cmd_len - 2); |
dw[1] = reg; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
ilo_builder_batch_reloc64(builder, pos + 2, bo, bo_offset, reloc_flags); |
} else { |
/* must use GGTT on Gen6 as in PIPE_CONTROL */ |
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) { |
dw[0] |= GEN6_MI_STORE_REGISTER_MEM_DW0_USE_GGTT; |
reloc_flags |= INTEL_RELOC_GGTT; |
} |
ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, reloc_flags); |
} |
} |
static inline void |
gen6_MI_FLUSH_DW(struct ilo_builder *builder) |
{ |
const uint8_t cmd_len = 4; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_MI_CMD(MI_FLUSH_DW) | (cmd_len - 2); |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
} |
static inline void |
gen6_MI_REPORT_PERF_COUNT(struct ilo_builder *builder, |
struct intel_bo *bo, uint32_t bo_offset, |
uint32_t report_id) |
{ |
const uint8_t cmd_len = 3; |
uint32_t reloc_flags = INTEL_RELOC_WRITE; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
assert(bo_offset % 64 == 0); |
/* must use GGTT on GEN6 as in PIPE_CONTROL */ |
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) { |
bo_offset |= GEN6_MI_REPORT_PERF_COUNT_DW1_USE_GGTT; |
reloc_flags |= INTEL_RELOC_GGTT; |
} |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_MI_CMD(MI_REPORT_PERF_COUNT) | (cmd_len - 2); |
dw[2] = report_id; |
ilo_builder_batch_reloc(builder, pos + 1, bo, bo_offset, reloc_flags); |
} |
static inline void |
gen7_MI_LOAD_REGISTER_MEM(struct ilo_builder *builder, uint32_t reg, |
struct intel_bo *bo, uint32_t bo_offset) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 3; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 7, 8); |
assert(reg % 4 == 0 && bo_offset % 4 == 0); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN7_MI_CMD(MI_LOAD_REGISTER_MEM) | (cmd_len - 2); |
dw[1] = reg; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) |
ilo_builder_batch_reloc64(builder, pos + 2, bo, bo_offset, 0); |
else |
ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, 0); |
} |
/** |
* Add a MI_BATCH_BUFFER_END to the batch buffer. Pad with MI_NOOP if |
* necessary. |
*/ |
static inline void |
gen6_mi_batch_buffer_end(struct ilo_builder *builder) |
{ |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 107: |
* |
* "The batch buffer must be QWord aligned and a multiple of QWords in |
* length." |
*/ |
const bool pad = !(builder->writers[ILO_BUILDER_WRITER_BATCH].used & 0x7); |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
if (pad) { |
ilo_builder_batch_pointer(builder, 2, &dw); |
dw[0] = GEN6_MI_CMD(MI_BATCH_BUFFER_END); |
dw[1] = GEN6_MI_CMD(MI_NOOP); |
} else { |
ilo_builder_batch_pointer(builder, 1, &dw); |
dw[0] = GEN6_MI_CMD(MI_BATCH_BUFFER_END); |
} |
} |
#endif /* ILO_BUILDER_MI_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_render.h |
---|
0,0 → 1,303 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BUILDER_RENDER_H |
#define ILO_BUILDER_RENDER_H |
#include "genhw/genhw.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
#include "ilo_builder.h" |
static inline void |
gen6_STATE_SIP(struct ilo_builder *builder, uint32_t sip) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 3 : 2; |
uint32_t *dw; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(COMMON, STATE_SIP) | (cmd_len - 2); |
dw[1] = sip; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) |
dw[2] = 0; |
} |
static inline void |
gen6_PIPELINE_SELECT(struct ilo_builder *builder, int pipeline) |
{ |
const uint8_t cmd_len = 1; |
const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, PIPELINE_SELECT) | |
pipeline; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
switch (pipeline) { |
case GEN6_PIPELINE_SELECT_DW0_SELECT_3D: |
case GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA: |
break; |
case GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU: |
assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); |
break; |
default: |
assert(!"unknown pipeline"); |
break; |
} |
ilo_builder_batch_write(builder, cmd_len, &dw0); |
} |
static inline void |
gen6_PIPE_CONTROL(struct ilo_builder *builder, uint32_t dw1, |
struct intel_bo *bo, uint32_t bo_offset, |
uint64_t imm) |
{ |
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 6 : 5; |
uint32_t reloc_flags = INTEL_RELOC_WRITE; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 8); |
if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 73: |
* |
* "1 of the following must also be set (when CS stall is set): |
* |
* * Depth Cache Flush Enable ([0] of DW1) |
* * Stall at Pixel Scoreboard ([1] of DW1) |
* * Depth Stall ([13] of DW1) |
* * Post-Sync Operation ([13] of DW1) |
* * Render Target Cache Flush Enable ([12] of DW1) |
* * Notify Enable ([8] of DW1)" |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 61: |
* |
* "One of the following must also be set (when CS stall is set): |
* |
* * Render Target Cache Flush Enable ([12] of DW1) |
* * Depth Cache Flush Enable ([0] of DW1) |
* * Stall at Pixel Scoreboard ([1] of DW1) |
* * Depth Stall ([13] of DW1) |
* * Post-Sync Operation ([13] of DW1)" |
*/ |
uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL | |
GEN6_PIPE_CONTROL_DEPTH_STALL; |
/* post-sync op */ |
bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM | |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT | |
GEN6_PIPE_CONTROL_WRITE_TIMESTAMP; |
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) |
bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE; |
assert(dw1 & bit_test); |
} |
if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 73: |
* |
* "Following bits must be clear (when Depth Stall is set): |
* |
* * Render Target Cache Flush Enable ([12] of DW1) |
* * Depth Cache Flush Enable ([0] of DW1)" |
*/ |
assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH))); |
} |
switch (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) { |
case GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT: |
case GEN6_PIPE_CONTROL_WRITE_TIMESTAMP: |
assert(!imm); |
break; |
default: |
break; |
} |
assert(bo_offset % 8 == 0); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(3D, PIPE_CONTROL) | (cmd_len - 2); |
dw[1] = dw1; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
dw[4] = (uint32_t) imm; |
dw[5] = (uint32_t) (imm >> 32); |
if (bo) { |
ilo_builder_batch_reloc64(builder, pos + 2, |
bo, bo_offset, reloc_flags); |
} else { |
dw[2] = 0; |
dw[3] = 0; |
} |
} else { |
dw[3] = (uint32_t) imm; |
dw[4] = (uint32_t) (imm >> 32); |
if (bo) { |
/* |
* From the Sandy Bridge PRM, volume 1 part 3, page 19: |
* |
* "[DevSNB] PPGTT memory writes by MI_* (such as |
* MI_STORE_DATA_IMM) and PIPE_CONTROL are not supported." |
*/ |
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) { |
bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT; |
reloc_flags |= INTEL_RELOC_GGTT; |
} |
ilo_builder_batch_reloc(builder, pos + 2, |
bo, bo_offset, reloc_flags); |
} else { |
dw[2] = 0; |
} |
} |
} |
static inline void |
ilo_builder_batch_patch_sba(struct ilo_builder *builder) |
{ |
const struct ilo_builder_writer *inst = |
&builder->writers[ILO_BUILDER_WRITER_INSTRUCTION]; |
if (!builder->sba_instruction_pos) |
return; |
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { |
ilo_builder_batch_reloc64(builder, builder->sba_instruction_pos, |
inst->bo, |
builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED, |
0); |
} else { |
ilo_builder_batch_reloc(builder, builder->sba_instruction_pos, inst->bo, |
builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED, |
0); |
} |
} |
/** |
* Add a STATE_BASE_ADDRESS to the batch buffer. The relocation entry for the |
* instruction buffer is not added until ilo_builder_end() or next |
* gen6_state_base_address(). |
*/ |
static inline void |
gen6_state_base_address(struct ilo_builder *builder, bool init_all) |
{ |
const uint8_t cmd_len = 10; |
const struct ilo_builder_writer *bat = |
&builder->writers[ILO_BUILDER_WRITER_BATCH]; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 6, 7.5); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) | (cmd_len - 2); |
dw[1] = builder->mocs << GEN6_SBA_MOCS__SHIFT | |
builder->mocs << GEN6_SBA_DW1_GENERAL_STATELESS_MOCS__SHIFT | |
init_all; |
ilo_builder_batch_reloc(builder, pos + 2, bat->bo, |
builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED, |
0); |
ilo_builder_batch_reloc(builder, pos + 3, bat->bo, |
builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED, |
0); |
dw[4] = builder->mocs << GEN6_SBA_MOCS__SHIFT | init_all; |
/* |
* Since the instruction writer has WRITER_FLAG_APPEND set, it is tempting |
* not to set Instruction Base Address. The problem is that we do not know |
* if the bo has been or will be moved by the kernel. We need a relocation |
* entry because of that. |
* |
* And since we also set WRITER_FLAG_GROW, we have to wait until |
* ilo_builder_end(), when the final bo is known, to add the relocation |
* entry. |
*/ |
ilo_builder_batch_patch_sba(builder); |
builder->sba_instruction_pos = pos + 5; |
/* skip range checks */ |
dw[6] = init_all; |
dw[7] = 0xfffff000 + init_all; |
dw[8] = 0xfffff000 + init_all; |
dw[9] = init_all; |
} |
static inline void |
gen8_state_base_address(struct ilo_builder *builder, bool init_all) |
{ |
const uint8_t cmd_len = 16; |
const struct ilo_builder_writer *bat = |
&builder->writers[ILO_BUILDER_WRITER_BATCH]; |
uint32_t *dw; |
unsigned pos; |
ILO_DEV_ASSERT(builder->dev, 8, 8); |
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); |
dw[0] = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) | (cmd_len - 2); |
dw[1] = builder->mocs << GEN8_SBA_MOCS__SHIFT | init_all; |
dw[2] = 0; |
dw[3] = builder->mocs << GEN8_SBA_DW3_STATELESS_MOCS__SHIFT; |
ilo_builder_batch_reloc64(builder, pos + 4, bat->bo, |
builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED, |
0); |
ilo_builder_batch_reloc64(builder, pos + 6, bat->bo, |
builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED, |
0); |
dw[8] = builder->mocs << GEN8_SBA_MOCS__SHIFT | init_all; |
dw[9] = 0; |
ilo_builder_batch_patch_sba(builder); |
builder->sba_instruction_pos = pos + 10; |
/* skip range checks */ |
dw[12] = 0xfffff000 + init_all; |
dw[13] = 0xfffff000 + init_all; |
dw[14] = 0xfffff000 + init_all; |
dw[15] = 0xfffff000 + init_all; |
} |
#endif /* ILO_BUILDER_RENDER_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_core.h |
---|
0,0 → 1,46 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_CORE_H |
#define ILO_CORE_H |
#include "pipe/p_compiler.h" |
#include "pipe/p_defines.h" |
#include "pipe/p_format.h" |
#include "util/u_debug.h" |
#include "util/list.h" |
#include "util/u_format.h" |
#include "util/u_inlines.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_pointer.h" |
#define ILO_PRIM_RECTANGLES PIPE_PRIM_MAX |
#define ILO_PRIM_MAX (PIPE_PRIM_MAX + 1) |
#endif /* ILO_CORE_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_debug.c |
---|
0,0 → 1,51 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "ilo_debug.h" |
static const struct debug_named_value ilo_debug_flags[] = { |
{ "batch", ILO_DEBUG_BATCH, "Dump batch/dynamic/surface/instruction buffers" }, |
{ "vs", ILO_DEBUG_VS, "Dump vertex shaders" }, |
{ "gs", ILO_DEBUG_GS, "Dump geometry shaders" }, |
{ "fs", ILO_DEBUG_FS, "Dump fragment shaders" }, |
{ "cs", ILO_DEBUG_CS, "Dump compute shaders" }, |
{ "draw", ILO_DEBUG_DRAW, "Show draw information" }, |
{ "submit", ILO_DEBUG_SUBMIT, "Show batch buffer submissions" }, |
{ "hang", ILO_DEBUG_HANG, "Detect GPU hangs" }, |
{ "nohw", ILO_DEBUG_NOHW, "Do not send commands to HW" }, |
{ "nocache", ILO_DEBUG_NOCACHE, "Always invalidate HW caches" }, |
{ "nohiz", ILO_DEBUG_NOHIZ, "Disable HiZ" }, |
DEBUG_NAMED_VALUE_END |
}; |
int ilo_debug; |
void |
ilo_debug_init(const char *name) |
{ |
ilo_debug = debug_get_flags_option(name, ilo_debug_flags, 0); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_debug.h |
---|
0,0 → 1,103 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_DEBUG_H |
#define ILO_DEBUG_H |
#include "ilo_core.h" |
/* enable debug flags affecting hot pathes only with debug builds */ |
#ifdef DEBUG |
#define ILO_DEBUG_HOT 1 |
#else |
#define ILO_DEBUG_HOT 0 |
#endif |
enum ilo_debug { |
ILO_DEBUG_BATCH = 1 << 0, |
ILO_DEBUG_VS = 1 << 1, |
ILO_DEBUG_GS = 1 << 2, |
ILO_DEBUG_FS = 1 << 3, |
ILO_DEBUG_CS = 1 << 4, |
ILO_DEBUG_DRAW = ILO_DEBUG_HOT << 5, |
ILO_DEBUG_SUBMIT = 1 << 6, |
ILO_DEBUG_HANG = 1 << 7, |
/* flags that affect the behaviors of the driver */ |
ILO_DEBUG_NOHW = 1 << 20, |
ILO_DEBUG_NOCACHE = 1 << 21, |
ILO_DEBUG_NOHIZ = 1 << 22, |
}; |
extern int ilo_debug; |
void |
ilo_debug_init(const char *name); |
/** |
* Print a message, for dumping or debugging. |
*/ |
static inline void _util_printf_format(1, 2) |
ilo_printf(const char *format, ...) |
{ |
va_list ap; |
va_start(ap, format); |
_debug_vprintf(format, ap); |
va_end(ap); |
} |
/** |
* Print a critical error. |
*/ |
static inline void _util_printf_format(1, 2) |
ilo_err(const char *format, ...) |
{ |
va_list ap; |
va_start(ap, format); |
_debug_vprintf(format, ap); |
va_end(ap); |
} |
/** |
* Print a warning, silenced for release builds. |
*/ |
static inline void _util_printf_format(1, 2) |
ilo_warn(const char *format, ...) |
{ |
#ifdef DEBUG |
va_list ap; |
va_start(ap, format); |
_debug_vprintf(format, ap); |
va_end(ap); |
#else |
#endif |
} |
#endif /* ILO_DEBUG_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_dev.c |
---|
0,0 → 1,186 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "genhw/genhw.h" |
#include "intel_winsys.h" |
#include "ilo_debug.h" |
#include "ilo_dev.h" |
/** |
* Initialize the \p dev from \p winsys. \p winsys is considered owned by \p |
* dev and will be destroyed in \p ilo_dev_cleanup(). |
*/ |
bool |
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys) |
{ |
const struct intel_winsys_info *info; |
info = intel_winsys_get_info(winsys); |
dev->winsys = winsys; |
dev->devid = info->devid; |
dev->aperture_total = info->aperture_total; |
dev->aperture_mappable = info->aperture_mappable; |
dev->has_llc = info->has_llc; |
dev->has_address_swizzling = info->has_address_swizzling; |
dev->has_logical_context = info->has_logical_context; |
dev->has_ppgtt = info->has_ppgtt; |
dev->has_timestamp = info->has_timestamp; |
dev->has_gen7_sol_reset = info->has_gen7_sol_reset; |
if (!dev->has_logical_context) { |
ilo_err("missing hardware logical context support\n"); |
return false; |
} |
/* |
* PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT |
* writes on GEN6. |
* |
* From the Sandy Bridge PRM, volume 1 part 3, page 101: |
* |
* "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all |
* code is in a secure environment, independent of address space. |
* Under this condition, this bit only specifies the address space |
* (GGTT or PPGTT). All commands are executed "as-is"" |
* |
* We need PPGTT to be enabled on GEN6 too. |
*/ |
if (!dev->has_ppgtt) { |
/* experiments show that it does not really matter... */ |
ilo_warn("PPGTT disabled\n"); |
} |
if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) { |
dev->gen_opaque = ILO_GEN(8); |
dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1; |
/* XXX random values */ |
if (dev->gt == 3) { |
dev->eu_count = 48; |
dev->thread_count = 336; |
dev->urb_size = 384 * 1024; |
} else if (dev->gt == 2) { |
dev->eu_count = 24; |
dev->thread_count = 168; |
dev->urb_size = 384 * 1024; |
} else { |
dev->eu_count = 12; |
dev->thread_count = 84; |
dev->urb_size = 192 * 1024; |
} |
} else if (gen_is_hsw(info->devid)) { |
/* |
* From the Haswell PRM, volume 4, page 8: |
* |
* "Description GT3 GT2 GT1.5 GT1 |
* (...) |
* EUs (Total) 40 20 12 10 |
* Threads (Total) 280 140 84 70 |
* (...) |
* URB Size (max, within L3$) 512KB 256KB 256KB 128KB |
*/ |
dev->gen_opaque = ILO_GEN(7.5); |
dev->gt = gen_get_hsw_gt(info->devid); |
if (dev->gt == 3) { |
dev->eu_count = 40; |
dev->thread_count = 280; |
dev->urb_size = 512 * 1024; |
} else if (dev->gt == 2) { |
dev->eu_count = 20; |
dev->thread_count = 140; |
dev->urb_size = 256 * 1024; |
} else { |
dev->eu_count = 10; |
dev->thread_count = 70; |
dev->urb_size = 128 * 1024; |
} |
} else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) { |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 18: |
* |
* "Device # of EUs #Threads/EU |
* Ivy Bridge (GT2) 16 8 |
* Ivy Bridge (GT1) 6 6" |
* |
* From the Ivy Bridge PRM, volume 4 part 2, page 17: |
* |
* "URB Size URB Rows URB Rows when SLM Enabled |
* 128k 4096 2048 |
* 256k 8096 4096" |
*/ |
dev->gen_opaque = ILO_GEN(7); |
dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1; |
if (dev->gt == 2) { |
dev->eu_count = 16; |
dev->thread_count = 128; |
dev->urb_size = 256 * 1024; |
} else { |
dev->eu_count = 6; |
dev->thread_count = 36; |
dev->urb_size = 128 * 1024; |
} |
} else if (gen_is_snb(info->devid)) { |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 22: |
* |
* "Device # of EUs #Threads/EU |
* SNB GT2 12 5 |
* SNB GT1 6 4" |
* |
* From the Sandy Bridge PRM, volume 4 part 2, page 18: |
* |
* "[DevSNB]: The GT1 product's URB provides 32KB of storage, |
* arranged as 1024 256-bit rows. The GT2 product's URB provides |
* 64KB of storage, arranged as 2048 256-bit rows. A row |
* corresponds in size to an EU GRF register. Read/write access to |
* the URB is generally supported on a row-granular basis." |
*/ |
dev->gen_opaque = ILO_GEN(6); |
dev->gt = gen_get_snb_gt(info->devid); |
if (dev->gt == 2) { |
dev->eu_count = 12; |
dev->thread_count = 60; |
dev->urb_size = 64 * 1024; |
} else { |
dev->eu_count = 6; |
dev->thread_count = 24; |
dev->urb_size = 32 * 1024; |
} |
} else { |
ilo_err("unknown GPU generation\n"); |
return false; |
} |
return true; |
} |
void |
ilo_dev_cleanup(struct ilo_dev *dev) |
{ |
intel_winsys_destroy(dev->winsys); |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_dev.h |
---|
0,0 → 1,81 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_DEV_H |
#define ILO_DEV_H |
#include "ilo_core.h" |
#define ILO_GEN(gen) ((int) (gen * 100)) |
#define ILO_DEV_ASSERT(dev, min_gen, max_gen) \ |
ilo_dev_assert(dev, ILO_GEN(min_gen), ILO_GEN(max_gen)) |
struct intel_winsys; |
struct ilo_dev { |
struct intel_winsys *winsys; |
/* these mirror intel_winsys_info */ |
int devid; |
size_t aperture_total; |
size_t aperture_mappable; |
bool has_llc; |
bool has_address_swizzling; |
bool has_logical_context; |
bool has_ppgtt; |
bool has_timestamp; |
bool has_gen7_sol_reset; |
/* use ilo_dev_gen() to access */ |
int gen_opaque; |
int gt; |
int eu_count; |
int thread_count; |
int urb_size; |
}; |
bool |
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys); |
void |
ilo_dev_cleanup(struct ilo_dev *dev); |
static inline int |
ilo_dev_gen(const struct ilo_dev *dev) |
{ |
return dev->gen_opaque; |
} |
static inline void |
ilo_dev_assert(const struct ilo_dev *dev, int min_opqaue, int max_opqaue) |
{ |
assert(dev->gen_opaque >= min_opqaue && dev->gen_opaque <= max_opqaue); |
} |
#endif /* ILO_DEV_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_fence.h |
---|
0,0 → 1,73 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_FENCE_H |
#define ILO_FENCE_H |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
struct ilo_fence { |
struct intel_bo *seq_bo; |
}; |
static inline void |
ilo_fence_init(struct ilo_fence *fence, const struct ilo_dev *dev) |
{ |
/* no-op */ |
} |
static inline void |
ilo_fence_cleanup(struct ilo_fence *fence) |
{ |
intel_bo_unref(fence->seq_bo); |
} |
/** |
* Set the sequence bo for waiting. The fence is considered signaled when |
* there is no sequence bo. |
*/ |
static inline void |
ilo_fence_set_seq_bo(struct ilo_fence *fence, struct intel_bo *seq_bo) |
{ |
intel_bo_unref(fence->seq_bo); |
fence->seq_bo = intel_bo_ref(seq_bo); |
} |
/** |
* Wait for the fence to be signaled or until \p timeout nanoseconds has |
* passed. It will wait indefinitely when \p timeout is negative. |
*/ |
static inline bool |
ilo_fence_wait(struct ilo_fence *fence, int64_t timeout) |
{ |
return (!fence->seq_bo || intel_bo_wait(fence->seq_bo, timeout) == 0); |
} |
#endif /* ILO_FENCE_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_format.c |
---|
0,0 → 1,755 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "genhw/genhw.h" |
#include "ilo_format.h" |
struct ilo_vf_cap { |
int vertex_element; |
}; |
struct ilo_sol_cap { |
int buffer; |
}; |
struct ilo_sampler_cap { |
int sampling; |
int filtering; |
int shadow_map; |
int chroma_key; |
}; |
struct ilo_dp_cap { |
int rt_write; |
int rt_write_blending; |
int typed_write; |
int media_color_processing; |
}; |
/* |
* This table is based on: |
* |
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97 |
* - the Ivy Bridge PRM, volume 2 part 1, page 97-99 |
* - the Haswell PRM, volume 7, page 467-470 |
*/ |
static const struct ilo_vf_cap ilo_vf_caps[] = { |
#define CAP(vertex_element) { ILO_GEN(vertex_element) } |
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_UNORM] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_SNORM] = CAP( 1), |
[GEN6_FORMAT_R64G64_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_USCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_SFIXED] = CAP(7.5), |
[GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_UNORM] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_SNORM] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_USCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_SFIXED] = CAP(7.5), |
[GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1), |
[GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1), |
[GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1), |
[GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1), |
[GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32G32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32G32_UNORM] = CAP( 1), |
[GEN6_FORMAT_R32G32_SNORM] = CAP( 1), |
[GEN6_FORMAT_R64_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R16G16B16A16_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R16G16B16A16_USCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32_USCALED] = CAP( 1), |
[GEN6_FORMAT_R32G32_SFIXED] = CAP(7.5), |
[GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1), |
[GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1), |
[GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1), |
[GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1), |
[GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1), |
[GEN6_FORMAT_R16G16_UNORM] = CAP( 1), |
[GEN6_FORMAT_R16G16_SNORM] = CAP( 1), |
[GEN6_FORMAT_R16G16_SINT] = CAP( 1), |
[GEN6_FORMAT_R16G16_UINT] = CAP( 1), |
[GEN6_FORMAT_R16G16_FLOAT] = CAP( 1), |
[GEN6_FORMAT_B10G10R10A2_UNORM] = CAP(7.5), |
[GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32_UNORM] = CAP( 1), |
[GEN6_FORMAT_R32_SNORM] = CAP( 1), |
[GEN6_FORMAT_R10G10B10X2_USCALED] = CAP( 1), |
[GEN6_FORMAT_R8G8B8A8_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R8G8B8A8_USCALED] = CAP( 1), |
[GEN6_FORMAT_R16G16_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R16G16_USCALED] = CAP( 1), |
[GEN6_FORMAT_R32_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R32_USCALED] = CAP( 1), |
[GEN6_FORMAT_R8G8_UNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8_SNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8_SINT] = CAP( 1), |
[GEN6_FORMAT_R8G8_UINT] = CAP( 1), |
[GEN6_FORMAT_R16_UNORM] = CAP( 1), |
[GEN6_FORMAT_R16_SNORM] = CAP( 1), |
[GEN6_FORMAT_R16_SINT] = CAP( 1), |
[GEN6_FORMAT_R16_UINT] = CAP( 1), |
[GEN6_FORMAT_R16_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R8G8_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R8G8_USCALED] = CAP( 1), |
[GEN6_FORMAT_R16_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R16_USCALED] = CAP( 1), |
[GEN6_FORMAT_R8_UNORM] = CAP( 1), |
[GEN6_FORMAT_R8_SNORM] = CAP( 1), |
[GEN6_FORMAT_R8_SINT] = CAP( 1), |
[GEN6_FORMAT_R8_UINT] = CAP( 1), |
[GEN6_FORMAT_R8_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R8_USCALED] = CAP( 1), |
[GEN6_FORMAT_R8G8B8_UNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8B8_SNORM] = CAP( 1), |
[GEN6_FORMAT_R8G8B8_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R8G8B8_USCALED] = CAP( 1), |
[GEN6_FORMAT_R64G64B64A64_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R64G64B64_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 6), |
[GEN6_FORMAT_R16G16B16_UNORM] = CAP( 1), |
[GEN6_FORMAT_R16G16B16_SNORM] = CAP( 1), |
[GEN6_FORMAT_R16G16B16_SSCALED] = CAP( 1), |
[GEN6_FORMAT_R16G16B16_USCALED] = CAP( 1), |
[GEN6_FORMAT_R16G16B16_UINT] = CAP(7.5), |
[GEN6_FORMAT_R16G16B16_SINT] = CAP(7.5), |
[GEN6_FORMAT_R32_SFIXED] = CAP(7.5), |
[GEN6_FORMAT_R10G10B10A2_SNORM] = CAP(7.5), |
[GEN6_FORMAT_R10G10B10A2_USCALED] = CAP(7.5), |
[GEN6_FORMAT_R10G10B10A2_SSCALED] = CAP(7.5), |
[GEN6_FORMAT_R10G10B10A2_SINT] = CAP(7.5), |
[GEN6_FORMAT_B10G10R10A2_SNORM] = CAP(7.5), |
[GEN6_FORMAT_B10G10R10A2_USCALED] = CAP(7.5), |
[GEN6_FORMAT_B10G10R10A2_SSCALED] = CAP(7.5), |
[GEN6_FORMAT_B10G10R10A2_UINT] = CAP(7.5), |
[GEN6_FORMAT_B10G10R10A2_SINT] = CAP(7.5), |
[GEN6_FORMAT_R8G8B8_UINT] = CAP(7.5), |
[GEN6_FORMAT_R8G8B8_SINT] = CAP(7.5), |
#undef CAP |
}; |
/* |
* This table is based on: |
* |
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97 |
* - the Ivy Bridge PRM, volume 2 part 1, page 195 |
* - the Haswell PRM, volume 7, page 535 |
*/ |
static const struct ilo_sol_cap ilo_sol_caps[] = { |
#define CAP(buffer) { ILO_GEN(buffer) } |
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32G32B32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1), |
[GEN6_FORMAT_R32G32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32G32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32_SINT] = CAP( 1), |
[GEN6_FORMAT_R32_UINT] = CAP( 1), |
[GEN6_FORMAT_R32_FLOAT] = CAP( 1), |
#undef CAP |
}; |
/* |
* This table is based on: |
* |
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97 |
* - the Ivy Bridge PRM, volume 4 part 1, page 84-87 |
*/ |
static const struct ilo_sampler_cap ilo_sampler_caps[] = { |
#define CAP(sampling, filtering, shadow_map, chroma_key) \ |
{ ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) } |
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32G32B32X32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_R32G32B32_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32G32B32_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_X32_TYPELESS_G8X24_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_L32A32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_R16G16B16X16_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16G16B16X16_FLOAT] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_A32X32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_L32X32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_I32X32_FLOAT] = CAP( 1, 5, 0, 0), |
[GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R32_FLOAT] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_R24_UNORM_X8_TYPELESS] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_X24_TYPELESS_G8_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_L16A16_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_I24X8_UNORM] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_L24X8_UNORM] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_A24X8_UNORM] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_I32_FLOAT] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_L32_FLOAT] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_A32_FLOAT] = CAP( 1, 5, 1, 0), |
[GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_B8G8R8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8B8X8_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8B8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R9G9B9E5_SHAREDEXP] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B10G10R10X2_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_L16A16_FLOAT] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16_UNORM] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_R16_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_A8P8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0), |
[GEN6_FORMAT_A8P8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0), |
[GEN6_FORMAT_I16_UNORM] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_L16_UNORM] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_A16_UNORM] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_L8A8_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_I16_FLOAT] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_L16_FLOAT] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_A16_FLOAT] = CAP( 1, 1, 1, 0), |
[GEN6_FORMAT_L8A8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_R5G5_SNORM_B6_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_P8A8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0), |
[GEN6_FORMAT_P8A8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0), |
[GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 0, 4.5), |
[GEN6_FORMAT_R8_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_I8_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_L8_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_P4A4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_A4P4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_P8_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_L8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_P8_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_P4A4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_A4P4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_DXT1_RGB_SRGB] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_R1_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_P2_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_P2_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0), |
[GEN6_FORMAT_BC1_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_BC2_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_BC3_UNORM] = CAP( 1, 1, 0, 1), |
[GEN6_FORMAT_BC4_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_BC5_UNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_BC1_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_BC2_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_BC3_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_MONO8] = CAP( 1, 0, 0, 0), |
[GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_DXT1_RGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_FXT1] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_BC4_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_BC5_SNORM] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 5, 5, 0, 0), |
[GEN6_FORMAT_BC6H_SF16] = CAP( 7, 7, 0, 0), |
[GEN6_FORMAT_BC7_UNORM] = CAP( 7, 7, 0, 0), |
[GEN6_FORMAT_BC7_UNORM_SRGB] = CAP( 7, 7, 0, 0), |
[GEN6_FORMAT_BC6H_UF16] = CAP( 7, 7, 0, 0), |
#undef CAP |
}; |
/* |
* This table is based on: |
* |
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97 |
* - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278 |
* - the Haswell PRM, volume 7, page 262-264 |
*/ |
static const struct ilo_dp_cap ilo_dp_caps[] = { |
#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \ |
{ ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) } |
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 4.5, 7, 6), |
[GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 6, 7, 0), |
[GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 7, 6), |
[GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 7, 6), |
[GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 0, 0, 0, 6), |
[GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 7, 6), |
[GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 6), |
[GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 6, 7, 0), |
[GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 4.5, 7, 0), |
[GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 6, 7, 0), |
[GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 7, 6), |
[GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 6), |
[GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R32_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 0, 0, 0, 6), |
[GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 6, 7, 0), |
[GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16_UNORM] = CAP( 1, 4.5, 7, 7), |
[GEN6_FORMAT_R16_SNORM] = CAP( 1, 6, 7, 0), |
[GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B5G5R5X1_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_B5G5R5X1_UNORM_SRGB] = CAP( 1, 1, 0, 0), |
[GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_R8_SNORM] = CAP( 1, 6, 7, 0), |
[GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 7, 0), |
[GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 7, 0), |
[GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 0, 0, 6), |
[GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 0, 0, 6), |
[GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 0, 0, 6), |
[GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 0, 0, 6), |
#undef CAP |
}; |
bool |
ilo_format_support_vb(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER); |
const struct ilo_vf_cap *cap = (idx >= 0 && idx < Elements(ilo_vf_caps)) ? |
&ilo_vf_caps[idx] : NULL; |
return (cap && cap->vertex_element && |
ilo_dev_gen(dev) >= cap->vertex_element); |
} |
bool |
ilo_format_support_sol(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT); |
const struct ilo_sol_cap *cap = (idx >= 0 && idx < Elements(ilo_sol_caps)) ? |
&ilo_sol_caps[idx] : NULL; |
return (cap && cap->buffer && ilo_dev_gen(dev) >= cap->buffer); |
} |
bool |
ilo_format_support_sampler(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW); |
const struct ilo_sampler_cap *cap = (idx >= 0 && |
idx < Elements(ilo_sampler_caps)) ? &ilo_sampler_caps[idx] : NULL; |
if (!cap || !cap->sampling) |
return false; |
assert(!cap->filtering || cap->filtering >= cap->sampling); |
if (util_format_is_pure_integer(format)) |
return (ilo_dev_gen(dev) >= cap->sampling); |
else if (cap->filtering) |
return (ilo_dev_gen(dev) >= cap->filtering); |
else |
return false; |
} |
bool |
ilo_format_support_rt(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET); |
const struct ilo_dp_cap *cap = (idx >= 0 && idx < Elements(ilo_dp_caps)) ? |
&ilo_dp_caps[idx] : NULL; |
if (!cap || !cap->rt_write) |
return false; |
assert(!cap->rt_write_blending || cap->rt_write_blending >= cap->rt_write); |
if (util_format_is_pure_integer(format)) |
return (ilo_dev_gen(dev) >= cap->rt_write); |
else if (cap->rt_write_blending) |
return (ilo_dev_gen(dev) >= cap->rt_write_blending); |
else |
return false; |
} |
bool |
ilo_format_support_zs(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z32_FLOAT: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
return true; |
case PIPE_FORMAT_S8_UINT: |
/* TODO separate stencil */ |
default: |
return false; |
} |
} |
/** |
* Translate a color (non-depth/stencil) pipe format to the matching hardware |
* format. Return -1 on errors. |
*/ |
int |
ilo_format_translate_color(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
static const int format_mapping[PIPE_FORMAT_COUNT] = { |
[PIPE_FORMAT_NONE] = 0, |
[PIPE_FORMAT_B8G8R8A8_UNORM] = GEN6_FORMAT_B8G8R8A8_UNORM, |
[PIPE_FORMAT_B8G8R8X8_UNORM] = GEN6_FORMAT_B8G8R8X8_UNORM, |
[PIPE_FORMAT_A8R8G8B8_UNORM] = 0, |
[PIPE_FORMAT_X8R8G8B8_UNORM] = 0, |
[PIPE_FORMAT_B5G5R5A1_UNORM] = GEN6_FORMAT_B5G5R5A1_UNORM, |
[PIPE_FORMAT_B4G4R4A4_UNORM] = GEN6_FORMAT_B4G4R4A4_UNORM, |
[PIPE_FORMAT_B5G6R5_UNORM] = GEN6_FORMAT_B5G6R5_UNORM, |
[PIPE_FORMAT_R10G10B10A2_UNORM] = GEN6_FORMAT_R10G10B10A2_UNORM, |
[PIPE_FORMAT_L8_UNORM] = GEN6_FORMAT_L8_UNORM, |
[PIPE_FORMAT_A8_UNORM] = GEN6_FORMAT_A8_UNORM, |
[PIPE_FORMAT_I8_UNORM] = GEN6_FORMAT_I8_UNORM, |
[PIPE_FORMAT_L8A8_UNORM] = GEN6_FORMAT_L8A8_UNORM, |
[PIPE_FORMAT_L16_UNORM] = GEN6_FORMAT_L16_UNORM, |
[PIPE_FORMAT_UYVY] = GEN6_FORMAT_YCRCB_SWAPUVY, |
[PIPE_FORMAT_YUYV] = GEN6_FORMAT_YCRCB_NORMAL, |
[PIPE_FORMAT_Z16_UNORM] = 0, |
[PIPE_FORMAT_Z32_UNORM] = 0, |
[PIPE_FORMAT_Z32_FLOAT] = 0, |
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = 0, |
[PIPE_FORMAT_S8_UINT_Z24_UNORM] = 0, |
[PIPE_FORMAT_Z24X8_UNORM] = 0, |
[PIPE_FORMAT_X8Z24_UNORM] = 0, |
[PIPE_FORMAT_S8_UINT] = 0, |
[PIPE_FORMAT_R64_FLOAT] = GEN6_FORMAT_R64_FLOAT, |
[PIPE_FORMAT_R64G64_FLOAT] = GEN6_FORMAT_R64G64_FLOAT, |
[PIPE_FORMAT_R64G64B64_FLOAT] = GEN6_FORMAT_R64G64B64_FLOAT, |
[PIPE_FORMAT_R64G64B64A64_FLOAT] = GEN6_FORMAT_R64G64B64A64_FLOAT, |
[PIPE_FORMAT_R32_FLOAT] = GEN6_FORMAT_R32_FLOAT, |
[PIPE_FORMAT_R32G32_FLOAT] = GEN6_FORMAT_R32G32_FLOAT, |
[PIPE_FORMAT_R32G32B32_FLOAT] = GEN6_FORMAT_R32G32B32_FLOAT, |
[PIPE_FORMAT_R32G32B32A32_FLOAT] = GEN6_FORMAT_R32G32B32A32_FLOAT, |
[PIPE_FORMAT_R32_UNORM] = GEN6_FORMAT_R32_UNORM, |
[PIPE_FORMAT_R32G32_UNORM] = GEN6_FORMAT_R32G32_UNORM, |
[PIPE_FORMAT_R32G32B32_UNORM] = GEN6_FORMAT_R32G32B32_UNORM, |
[PIPE_FORMAT_R32G32B32A32_UNORM] = GEN6_FORMAT_R32G32B32A32_UNORM, |
[PIPE_FORMAT_R32_USCALED] = GEN6_FORMAT_R32_USCALED, |
[PIPE_FORMAT_R32G32_USCALED] = GEN6_FORMAT_R32G32_USCALED, |
[PIPE_FORMAT_R32G32B32_USCALED] = GEN6_FORMAT_R32G32B32_USCALED, |
[PIPE_FORMAT_R32G32B32A32_USCALED] = GEN6_FORMAT_R32G32B32A32_USCALED, |
[PIPE_FORMAT_R32_SNORM] = GEN6_FORMAT_R32_SNORM, |
[PIPE_FORMAT_R32G32_SNORM] = GEN6_FORMAT_R32G32_SNORM, |
[PIPE_FORMAT_R32G32B32_SNORM] = GEN6_FORMAT_R32G32B32_SNORM, |
[PIPE_FORMAT_R32G32B32A32_SNORM] = GEN6_FORMAT_R32G32B32A32_SNORM, |
[PIPE_FORMAT_R32_SSCALED] = GEN6_FORMAT_R32_SSCALED, |
[PIPE_FORMAT_R32G32_SSCALED] = GEN6_FORMAT_R32G32_SSCALED, |
[PIPE_FORMAT_R32G32B32_SSCALED] = GEN6_FORMAT_R32G32B32_SSCALED, |
[PIPE_FORMAT_R32G32B32A32_SSCALED] = GEN6_FORMAT_R32G32B32A32_SSCALED, |
[PIPE_FORMAT_R16_UNORM] = GEN6_FORMAT_R16_UNORM, |
[PIPE_FORMAT_R16G16_UNORM] = GEN6_FORMAT_R16G16_UNORM, |
[PIPE_FORMAT_R16G16B16_UNORM] = GEN6_FORMAT_R16G16B16_UNORM, |
[PIPE_FORMAT_R16G16B16A16_UNORM] = GEN6_FORMAT_R16G16B16A16_UNORM, |
[PIPE_FORMAT_R16_USCALED] = GEN6_FORMAT_R16_USCALED, |
[PIPE_FORMAT_R16G16_USCALED] = GEN6_FORMAT_R16G16_USCALED, |
[PIPE_FORMAT_R16G16B16_USCALED] = GEN6_FORMAT_R16G16B16_USCALED, |
[PIPE_FORMAT_R16G16B16A16_USCALED] = GEN6_FORMAT_R16G16B16A16_USCALED, |
[PIPE_FORMAT_R16_SNORM] = GEN6_FORMAT_R16_SNORM, |
[PIPE_FORMAT_R16G16_SNORM] = GEN6_FORMAT_R16G16_SNORM, |
[PIPE_FORMAT_R16G16B16_SNORM] = GEN6_FORMAT_R16G16B16_SNORM, |
[PIPE_FORMAT_R16G16B16A16_SNORM] = GEN6_FORMAT_R16G16B16A16_SNORM, |
[PIPE_FORMAT_R16_SSCALED] = GEN6_FORMAT_R16_SSCALED, |
[PIPE_FORMAT_R16G16_SSCALED] = GEN6_FORMAT_R16G16_SSCALED, |
[PIPE_FORMAT_R16G16B16_SSCALED] = GEN6_FORMAT_R16G16B16_SSCALED, |
[PIPE_FORMAT_R16G16B16A16_SSCALED] = GEN6_FORMAT_R16G16B16A16_SSCALED, |
[PIPE_FORMAT_R8_UNORM] = GEN6_FORMAT_R8_UNORM, |
[PIPE_FORMAT_R8G8_UNORM] = GEN6_FORMAT_R8G8_UNORM, |
[PIPE_FORMAT_R8G8B8_UNORM] = GEN6_FORMAT_R8G8B8_UNORM, |
[PIPE_FORMAT_R8G8B8A8_UNORM] = GEN6_FORMAT_R8G8B8A8_UNORM, |
[PIPE_FORMAT_X8B8G8R8_UNORM] = 0, |
[PIPE_FORMAT_R8_USCALED] = GEN6_FORMAT_R8_USCALED, |
[PIPE_FORMAT_R8G8_USCALED] = GEN6_FORMAT_R8G8_USCALED, |
[PIPE_FORMAT_R8G8B8_USCALED] = GEN6_FORMAT_R8G8B8_USCALED, |
[PIPE_FORMAT_R8G8B8A8_USCALED] = GEN6_FORMAT_R8G8B8A8_USCALED, |
[PIPE_FORMAT_R8_SNORM] = GEN6_FORMAT_R8_SNORM, |
[PIPE_FORMAT_R8G8_SNORM] = GEN6_FORMAT_R8G8_SNORM, |
[PIPE_FORMAT_R8G8B8_SNORM] = GEN6_FORMAT_R8G8B8_SNORM, |
[PIPE_FORMAT_R8G8B8A8_SNORM] = GEN6_FORMAT_R8G8B8A8_SNORM, |
[PIPE_FORMAT_R8_SSCALED] = GEN6_FORMAT_R8_SSCALED, |
[PIPE_FORMAT_R8G8_SSCALED] = GEN6_FORMAT_R8G8_SSCALED, |
[PIPE_FORMAT_R8G8B8_SSCALED] = GEN6_FORMAT_R8G8B8_SSCALED, |
[PIPE_FORMAT_R8G8B8A8_SSCALED] = GEN6_FORMAT_R8G8B8A8_SSCALED, |
[PIPE_FORMAT_R32_FIXED] = GEN6_FORMAT_R32_SFIXED, |
[PIPE_FORMAT_R32G32_FIXED] = GEN6_FORMAT_R32G32_SFIXED, |
[PIPE_FORMAT_R32G32B32_FIXED] = GEN6_FORMAT_R32G32B32_SFIXED, |
[PIPE_FORMAT_R32G32B32A32_FIXED] = GEN6_FORMAT_R32G32B32A32_SFIXED, |
[PIPE_FORMAT_R16_FLOAT] = GEN6_FORMAT_R16_FLOAT, |
[PIPE_FORMAT_R16G16_FLOAT] = GEN6_FORMAT_R16G16_FLOAT, |
[PIPE_FORMAT_R16G16B16_FLOAT] = GEN6_FORMAT_R16G16B16_FLOAT, |
[PIPE_FORMAT_R16G16B16A16_FLOAT] = GEN6_FORMAT_R16G16B16A16_FLOAT, |
[PIPE_FORMAT_L8_SRGB] = GEN6_FORMAT_L8_UNORM_SRGB, |
[PIPE_FORMAT_L8A8_SRGB] = GEN6_FORMAT_L8A8_UNORM_SRGB, |
[PIPE_FORMAT_R8G8B8_SRGB] = GEN6_FORMAT_R8G8B8_UNORM_SRGB, |
[PIPE_FORMAT_A8B8G8R8_SRGB] = 0, |
[PIPE_FORMAT_X8B8G8R8_SRGB] = 0, |
[PIPE_FORMAT_B8G8R8A8_SRGB] = GEN6_FORMAT_B8G8R8A8_UNORM_SRGB, |
[PIPE_FORMAT_B8G8R8X8_SRGB] = GEN6_FORMAT_B8G8R8X8_UNORM_SRGB, |
[PIPE_FORMAT_A8R8G8B8_SRGB] = 0, |
[PIPE_FORMAT_X8R8G8B8_SRGB] = 0, |
[PIPE_FORMAT_R8G8B8A8_SRGB] = GEN6_FORMAT_R8G8B8A8_UNORM_SRGB, |
[PIPE_FORMAT_DXT1_RGB] = GEN6_FORMAT_DXT1_RGB, |
[PIPE_FORMAT_DXT1_RGBA] = GEN6_FORMAT_BC1_UNORM, |
[PIPE_FORMAT_DXT3_RGBA] = GEN6_FORMAT_BC2_UNORM, |
[PIPE_FORMAT_DXT5_RGBA] = GEN6_FORMAT_BC3_UNORM, |
[PIPE_FORMAT_DXT1_SRGB] = GEN6_FORMAT_DXT1_RGB_SRGB, |
[PIPE_FORMAT_DXT1_SRGBA] = GEN6_FORMAT_BC1_UNORM_SRGB, |
[PIPE_FORMAT_DXT3_SRGBA] = GEN6_FORMAT_BC2_UNORM_SRGB, |
[PIPE_FORMAT_DXT5_SRGBA] = GEN6_FORMAT_BC3_UNORM_SRGB, |
[PIPE_FORMAT_RGTC1_UNORM] = GEN6_FORMAT_BC4_UNORM, |
[PIPE_FORMAT_RGTC1_SNORM] = GEN6_FORMAT_BC4_SNORM, |
[PIPE_FORMAT_RGTC2_UNORM] = GEN6_FORMAT_BC5_UNORM, |
[PIPE_FORMAT_RGTC2_SNORM] = GEN6_FORMAT_BC5_SNORM, |
[PIPE_FORMAT_R8G8_B8G8_UNORM] = 0, |
[PIPE_FORMAT_G8R8_G8B8_UNORM] = 0, |
[PIPE_FORMAT_R8SG8SB8UX8U_NORM] = 0, |
[PIPE_FORMAT_R5SG5SB6U_NORM] = 0, |
[PIPE_FORMAT_A8B8G8R8_UNORM] = 0, |
[PIPE_FORMAT_B5G5R5X1_UNORM] = GEN6_FORMAT_B5G5R5X1_UNORM, |
[PIPE_FORMAT_R10G10B10A2_USCALED] = GEN6_FORMAT_R10G10B10A2_USCALED, |
[PIPE_FORMAT_R11G11B10_FLOAT] = GEN6_FORMAT_R11G11B10_FLOAT, |
[PIPE_FORMAT_R9G9B9E5_FLOAT] = GEN6_FORMAT_R9G9B9E5_SHAREDEXP, |
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = 0, |
[PIPE_FORMAT_R1_UNORM] = GEN6_FORMAT_R1_UNORM, |
[PIPE_FORMAT_R10G10B10X2_USCALED] = GEN6_FORMAT_R10G10B10X2_USCALED, |
[PIPE_FORMAT_R10G10B10X2_SNORM] = 0, |
[PIPE_FORMAT_L4A4_UNORM] = 0, |
[PIPE_FORMAT_B10G10R10A2_UNORM] = GEN6_FORMAT_B10G10R10A2_UNORM, |
[PIPE_FORMAT_R10SG10SB10SA2U_NORM] = 0, |
[PIPE_FORMAT_R8G8Bx_SNORM] = 0, |
[PIPE_FORMAT_R8G8B8X8_UNORM] = GEN6_FORMAT_R8G8B8X8_UNORM, |
[PIPE_FORMAT_B4G4R4X4_UNORM] = 0, |
[PIPE_FORMAT_X24S8_UINT] = 0, |
[PIPE_FORMAT_S8X24_UINT] = 0, |
[PIPE_FORMAT_X32_S8X24_UINT] = 0, |
[PIPE_FORMAT_B2G3R3_UNORM] = 0, |
[PIPE_FORMAT_L16A16_UNORM] = GEN6_FORMAT_L16A16_UNORM, |
[PIPE_FORMAT_A16_UNORM] = GEN6_FORMAT_A16_UNORM, |
[PIPE_FORMAT_I16_UNORM] = GEN6_FORMAT_I16_UNORM, |
[PIPE_FORMAT_LATC1_UNORM] = 0, |
[PIPE_FORMAT_LATC1_SNORM] = 0, |
[PIPE_FORMAT_LATC2_UNORM] = 0, |
[PIPE_FORMAT_LATC2_SNORM] = 0, |
[PIPE_FORMAT_A8_SNORM] = 0, |
[PIPE_FORMAT_L8_SNORM] = 0, |
[PIPE_FORMAT_L8A8_SNORM] = 0, |
[PIPE_FORMAT_I8_SNORM] = 0, |
[PIPE_FORMAT_A16_SNORM] = 0, |
[PIPE_FORMAT_L16_SNORM] = 0, |
[PIPE_FORMAT_L16A16_SNORM] = 0, |
[PIPE_FORMAT_I16_SNORM] = 0, |
[PIPE_FORMAT_A16_FLOAT] = GEN6_FORMAT_A16_FLOAT, |
[PIPE_FORMAT_L16_FLOAT] = GEN6_FORMAT_L16_FLOAT, |
[PIPE_FORMAT_L16A16_FLOAT] = GEN6_FORMAT_L16A16_FLOAT, |
[PIPE_FORMAT_I16_FLOAT] = GEN6_FORMAT_I16_FLOAT, |
[PIPE_FORMAT_A32_FLOAT] = GEN6_FORMAT_A32_FLOAT, |
[PIPE_FORMAT_L32_FLOAT] = GEN6_FORMAT_L32_FLOAT, |
[PIPE_FORMAT_L32A32_FLOAT] = GEN6_FORMAT_L32A32_FLOAT, |
[PIPE_FORMAT_I32_FLOAT] = GEN6_FORMAT_I32_FLOAT, |
[PIPE_FORMAT_YV12] = 0, |
[PIPE_FORMAT_YV16] = 0, |
[PIPE_FORMAT_IYUV] = 0, |
[PIPE_FORMAT_NV12] = 0, |
[PIPE_FORMAT_NV21] = 0, |
[PIPE_FORMAT_A4R4_UNORM] = 0, |
[PIPE_FORMAT_R4A4_UNORM] = 0, |
[PIPE_FORMAT_R8A8_UNORM] = 0, |
[PIPE_FORMAT_A8R8_UNORM] = 0, |
[PIPE_FORMAT_R10G10B10A2_SSCALED] = GEN6_FORMAT_R10G10B10A2_SSCALED, |
[PIPE_FORMAT_R10G10B10A2_SNORM] = GEN6_FORMAT_R10G10B10A2_SNORM, |
[PIPE_FORMAT_B10G10R10A2_USCALED] = GEN6_FORMAT_B10G10R10A2_USCALED, |
[PIPE_FORMAT_B10G10R10A2_SSCALED] = GEN6_FORMAT_B10G10R10A2_SSCALED, |
[PIPE_FORMAT_B10G10R10A2_SNORM] = GEN6_FORMAT_B10G10R10A2_SNORM, |
[PIPE_FORMAT_R8_UINT] = GEN6_FORMAT_R8_UINT, |
[PIPE_FORMAT_R8G8_UINT] = GEN6_FORMAT_R8G8_UINT, |
[PIPE_FORMAT_R8G8B8_UINT] = GEN6_FORMAT_R8G8B8_UINT, |
[PIPE_FORMAT_R8G8B8A8_UINT] = GEN6_FORMAT_R8G8B8A8_UINT, |
[PIPE_FORMAT_R8_SINT] = GEN6_FORMAT_R8_SINT, |
[PIPE_FORMAT_R8G8_SINT] = GEN6_FORMAT_R8G8_SINT, |
[PIPE_FORMAT_R8G8B8_SINT] = GEN6_FORMAT_R8G8B8_SINT, |
[PIPE_FORMAT_R8G8B8A8_SINT] = GEN6_FORMAT_R8G8B8A8_SINT, |
[PIPE_FORMAT_R16_UINT] = GEN6_FORMAT_R16_UINT, |
[PIPE_FORMAT_R16G16_UINT] = GEN6_FORMAT_R16G16_UINT, |
[PIPE_FORMAT_R16G16B16_UINT] = GEN6_FORMAT_R16G16B16_UINT, |
[PIPE_FORMAT_R16G16B16A16_UINT] = GEN6_FORMAT_R16G16B16A16_UINT, |
[PIPE_FORMAT_R16_SINT] = GEN6_FORMAT_R16_SINT, |
[PIPE_FORMAT_R16G16_SINT] = GEN6_FORMAT_R16G16_SINT, |
[PIPE_FORMAT_R16G16B16_SINT] = GEN6_FORMAT_R16G16B16_SINT, |
[PIPE_FORMAT_R16G16B16A16_SINT] = GEN6_FORMAT_R16G16B16A16_SINT, |
[PIPE_FORMAT_R32_UINT] = GEN6_FORMAT_R32_UINT, |
[PIPE_FORMAT_R32G32_UINT] = GEN6_FORMAT_R32G32_UINT, |
[PIPE_FORMAT_R32G32B32_UINT] = GEN6_FORMAT_R32G32B32_UINT, |
[PIPE_FORMAT_R32G32B32A32_UINT] = GEN6_FORMAT_R32G32B32A32_UINT, |
[PIPE_FORMAT_R32_SINT] = GEN6_FORMAT_R32_SINT, |
[PIPE_FORMAT_R32G32_SINT] = GEN6_FORMAT_R32G32_SINT, |
[PIPE_FORMAT_R32G32B32_SINT] = GEN6_FORMAT_R32G32B32_SINT, |
[PIPE_FORMAT_R32G32B32A32_SINT] = GEN6_FORMAT_R32G32B32A32_SINT, |
[PIPE_FORMAT_A8_UINT] = 0, |
[PIPE_FORMAT_I8_UINT] = GEN6_FORMAT_I8_UINT, |
[PIPE_FORMAT_L8_UINT] = GEN6_FORMAT_L8_UINT, |
[PIPE_FORMAT_L8A8_UINT] = GEN6_FORMAT_L8A8_UINT, |
[PIPE_FORMAT_A8_SINT] = 0, |
[PIPE_FORMAT_I8_SINT] = GEN6_FORMAT_I8_SINT, |
[PIPE_FORMAT_L8_SINT] = GEN6_FORMAT_L8_SINT, |
[PIPE_FORMAT_L8A8_SINT] = GEN6_FORMAT_L8A8_SINT, |
[PIPE_FORMAT_A16_UINT] = 0, |
[PIPE_FORMAT_I16_UINT] = 0, |
[PIPE_FORMAT_L16_UINT] = 0, |
[PIPE_FORMAT_L16A16_UINT] = 0, |
[PIPE_FORMAT_A16_SINT] = 0, |
[PIPE_FORMAT_I16_SINT] = 0, |
[PIPE_FORMAT_L16_SINT] = 0, |
[PIPE_FORMAT_L16A16_SINT] = 0, |
[PIPE_FORMAT_A32_UINT] = 0, |
[PIPE_FORMAT_I32_UINT] = 0, |
[PIPE_FORMAT_L32_UINT] = 0, |
[PIPE_FORMAT_L32A32_UINT] = 0, |
[PIPE_FORMAT_A32_SINT] = 0, |
[PIPE_FORMAT_I32_SINT] = 0, |
[PIPE_FORMAT_L32_SINT] = 0, |
[PIPE_FORMAT_L32A32_SINT] = 0, |
[PIPE_FORMAT_B10G10R10A2_UINT] = GEN6_FORMAT_B10G10R10A2_UINT, |
[PIPE_FORMAT_ETC1_RGB8] = GEN6_FORMAT_ETC1_RGB8, |
[PIPE_FORMAT_R8G8_R8B8_UNORM] = 0, |
[PIPE_FORMAT_G8R8_B8R8_UNORM] = 0, |
[PIPE_FORMAT_R8G8B8X8_SNORM] = 0, |
[PIPE_FORMAT_R8G8B8X8_SRGB] = 0, |
[PIPE_FORMAT_R8G8B8X8_UINT] = 0, |
[PIPE_FORMAT_R8G8B8X8_SINT] = 0, |
[PIPE_FORMAT_B10G10R10X2_UNORM] = GEN6_FORMAT_B10G10R10X2_UNORM, |
[PIPE_FORMAT_R16G16B16X16_UNORM] = GEN6_FORMAT_R16G16B16X16_UNORM, |
[PIPE_FORMAT_R16G16B16X16_SNORM] = 0, |
[PIPE_FORMAT_R16G16B16X16_FLOAT] = GEN6_FORMAT_R16G16B16X16_FLOAT, |
[PIPE_FORMAT_R16G16B16X16_UINT] = 0, |
[PIPE_FORMAT_R16G16B16X16_SINT] = 0, |
[PIPE_FORMAT_R32G32B32X32_FLOAT] = GEN6_FORMAT_R32G32B32X32_FLOAT, |
[PIPE_FORMAT_R32G32B32X32_UINT] = 0, |
[PIPE_FORMAT_R32G32B32X32_SINT] = 0, |
[PIPE_FORMAT_R8A8_SNORM] = 0, |
[PIPE_FORMAT_R16A16_UNORM] = 0, |
[PIPE_FORMAT_R16A16_SNORM] = 0, |
[PIPE_FORMAT_R16A16_FLOAT] = 0, |
[PIPE_FORMAT_R32A32_FLOAT] = 0, |
[PIPE_FORMAT_R8A8_UINT] = 0, |
[PIPE_FORMAT_R8A8_SINT] = 0, |
[PIPE_FORMAT_R16A16_UINT] = 0, |
[PIPE_FORMAT_R16A16_SINT] = 0, |
[PIPE_FORMAT_R32A32_UINT] = 0, |
[PIPE_FORMAT_R32A32_SINT] = 0, |
[PIPE_FORMAT_R10G10B10A2_UINT] = GEN6_FORMAT_R10G10B10A2_UINT, |
[PIPE_FORMAT_B5G6R5_SRGB] = GEN6_FORMAT_B5G6R5_UNORM_SRGB, |
}; |
int sfmt = format_mapping[format]; |
/* GEN6_FORMAT_R32G32B32A32_FLOAT happens to be 0 */ |
if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT) |
sfmt = -1; |
return sfmt; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_format.h |
---|
0,0 → 1,168 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_FORMAT_H |
#define ILO_FORMAT_H |
#include "genhw/genhw.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
bool |
ilo_format_support_vb(const struct ilo_dev *dev, |
enum pipe_format format); |
bool |
ilo_format_support_sol(const struct ilo_dev *dev, |
enum pipe_format format); |
bool |
ilo_format_support_sampler(const struct ilo_dev *dev, |
enum pipe_format format); |
bool |
ilo_format_support_rt(const struct ilo_dev *dev, |
enum pipe_format format); |
bool |
ilo_format_support_zs(const struct ilo_dev *dev, |
enum pipe_format format); |
int |
ilo_format_translate_color(const struct ilo_dev *dev, |
enum pipe_format format); |
/** |
* Translate a pipe format to a hardware surface format suitable for |
* the given purpose. Return -1 on errors. |
* |
* This is an inline function not only for performance reasons. There are |
* caveats that the callers should be aware of before calling this function. |
*/ |
static inline int |
ilo_format_translate(const struct ilo_dev *dev, |
enum pipe_format format, unsigned bind) |
{ |
switch (bind) { |
case PIPE_BIND_RENDER_TARGET: |
/* |
* Some RGBX formats are not supported as render target formats. But we |
* can use their RGBA counterparts and force the destination alpha to be |
* one when blending is enabled. |
*/ |
switch (format) { |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
return GEN6_FORMAT_B8G8R8A8_UNORM; |
default: |
return ilo_format_translate_color(dev, format); |
} |
break; |
case PIPE_BIND_SAMPLER_VIEW: |
/* |
* For depth formats, we want the depth values to be returned as R |
* values. But we assume in many places that the depth values are |
* returned as I values (util_make_fragment_tex_shader_writedepth() is |
* one such example). We have to live with that at least for now. |
* |
* For ETC1 format, the texture data will be decompressed before being |
* written to the bo. See tex_staging_sys_convert_write(). |
*/ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
return GEN6_FORMAT_I16_UNORM; |
case PIPE_FORMAT_Z32_FLOAT: |
return GEN6_FORMAT_I32_FLOAT; |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
return GEN6_FORMAT_I24X8_UNORM; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
return GEN6_FORMAT_I32X32_FLOAT; |
case PIPE_FORMAT_ETC1_RGB8: |
return GEN6_FORMAT_R8G8B8X8_UNORM; |
default: |
return ilo_format_translate_color(dev, format); |
} |
break; |
case PIPE_BIND_VERTEX_BUFFER: |
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) |
return ilo_format_translate_color(dev, format); |
/* |
* Some 3-component formats are not supported as vertex element formats. |
* But since we move between vertices using vb->stride, we should be |
* good to use their 4-component counterparts if we force the W |
* component to be one. The only exception is that the vb boundary |
* check for the last vertex may fail. |
*/ |
switch (format) { |
case PIPE_FORMAT_R16G16B16_FLOAT: |
return GEN6_FORMAT_R16G16B16A16_FLOAT; |
case PIPE_FORMAT_R16G16B16_UINT: |
return GEN6_FORMAT_R16G16B16A16_UINT; |
case PIPE_FORMAT_R16G16B16_SINT: |
return GEN6_FORMAT_R16G16B16A16_SINT; |
case PIPE_FORMAT_R8G8B8_UINT: |
return GEN6_FORMAT_R8G8B8A8_UINT; |
case PIPE_FORMAT_R8G8B8_SINT: |
return GEN6_FORMAT_R8G8B8A8_SINT; |
default: |
return ilo_format_translate_color(dev, format); |
} |
break; |
case PIPE_BIND_STREAM_OUTPUT: |
return ilo_format_translate_color(dev, format); |
break; |
default: |
assert(!"cannot translate format"); |
break; |
} |
return -1; |
} |
static inline int |
ilo_format_translate_render(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
return ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET); |
} |
static inline int |
ilo_format_translate_texture(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
return ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW); |
} |
static inline int |
ilo_format_translate_vertex(const struct ilo_dev *dev, |
enum pipe_format format) |
{ |
return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER); |
} |
#endif /* ILO_FORMAT_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_image.c |
---|
0,0 → 1,1437 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "ilo_debug.h" |
#include "ilo_image.h" |
enum { |
IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE, |
IMAGE_TILING_X = 1 << GEN6_TILING_X, |
IMAGE_TILING_Y = 1 << GEN6_TILING_Y, |
IMAGE_TILING_W = 1 << GEN8_TILING_W, |
IMAGE_TILING_ALL = (IMAGE_TILING_NONE | |
IMAGE_TILING_X | |
IMAGE_TILING_Y | |
IMAGE_TILING_W) |
}; |
struct ilo_image_params { |
const struct ilo_dev *dev; |
const struct pipe_resource *templ; |
unsigned valid_tilings; |
bool compressed; |
unsigned h0, h1; |
unsigned max_x, max_y; |
}; |
static void |
img_get_slice_size(const struct ilo_image *img, |
const struct ilo_image_params *params, |
unsigned level, unsigned *width, unsigned *height) |
{ |
const struct pipe_resource *templ = params->templ; |
unsigned w, h; |
w = u_minify(img->width0, level); |
h = u_minify(img->height0, level); |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 114: |
* |
* "The dimensions of the mip maps are first determined by applying the |
* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, |
* if necessary, they are padded out to compression block boundaries." |
*/ |
w = align(w, img->block_width); |
h = align(h, img->block_height); |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 111: |
* |
* "If the surface is multisampled (4x), these values must be adjusted |
* as follows before proceeding: |
* |
* W_L = ceiling(W_L / 2) * 4 |
* H_L = ceiling(H_L / 2) * 4" |
* |
* From the Ivy Bridge PRM, volume 1 part 1, page 108: |
* |
* "If the surface is multisampled and it is a depth or stencil surface |
* or Multisampled Surface StorageFormat in SURFACE_STATE is |
* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before |
* proceeding: |
* |
* #samples W_L = H_L = |
* 2 ceiling(W_L / 2) * 4 HL [no adjustment] |
* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 |
* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 |
* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" |
* |
* For interleaved samples (4x), where pixels |
* |
* (x, y ) (x+1, y ) |
* (x, y+1) (x+1, y+1) |
* |
* would be is occupied by |
* |
* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) |
* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) |
* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) |
* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) |
* |
* Thus the need to |
* |
* w = align(w, 2) * 2; |
* y = align(y, 2) * 2; |
*/ |
if (img->interleaved_samples) { |
switch (templ->nr_samples) { |
case 0: |
case 1: |
break; |
case 2: |
w = align(w, 2) * 2; |
break; |
case 4: |
w = align(w, 2) * 2; |
h = align(h, 2) * 2; |
break; |
case 8: |
w = align(w, 2) * 4; |
h = align(h, 2) * 2; |
break; |
case 16: |
w = align(w, 2) * 4; |
h = align(h, 2) * 4; |
break; |
default: |
assert(!"unsupported sample count"); |
break; |
} |
} |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 108: |
* |
* "For separate stencil buffer, the width must be mutiplied by 2 and |
* height divided by 2..." |
* |
* To make things easier (for transfer), we will just double the stencil |
* stride in 3DSTATE_STENCIL_BUFFER. |
*/ |
w = align(w, img->align_i); |
h = align(h, img->align_j); |
*width = w; |
*height = h; |
} |
static unsigned |
img_get_num_layers(const struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
unsigned num_layers = templ->array_size; |
/* samples of the same index are stored in a layer */ |
if (templ->nr_samples > 1 && !img->interleaved_samples) |
num_layers *= templ->nr_samples; |
return num_layers; |
} |
static void |
img_init_layer_height(struct ilo_image *img, |
struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
unsigned num_layers; |
if (img->walk != ILO_IMAGE_WALK_LAYER) |
return; |
num_layers = img_get_num_layers(img, params); |
if (num_layers <= 1) |
return; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 115: |
* |
* "The following equation is used for surface formats other than |
* compressed textures: |
* |
* QPitch = (h0 + h1 + 11j)" |
* |
* "The equation for compressed textures (BC* and FXT1 surface formats) |
* follows: |
* |
* QPitch = (h0 + h1 + 11j) / 4" |
* |
* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the |
* value calculated in the equation above, for every other odd Surface |
* Height starting from 1 i.e. 1,5,9,13" |
* |
* From the Ivy Bridge PRM, volume 1 part 1, page 111-112: |
* |
* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth |
* buffer and stencil buffer have an implied value of ARYSPC_FULL): |
* |
* QPitch = (h0 + h1 + 12j) |
* QPitch = (h0 + h1 + 12j) / 4 (compressed) |
* |
* (There are many typos or missing words here...)" |
* |
* To access the N-th slice, an offset of (Stride * QPitch * N) is added to |
* the base address. The PRM divides QPitch by 4 for compressed formats |
* because the block height for those formats are 4, and it wants QPitch to |
* mean the number of memory rows, as opposed to texel rows, between |
* slices. Since we use texel rows everywhere, we do not need to divide |
* QPitch by 4. |
*/ |
img->walk_layer_height = params->h0 + params->h1 + |
((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j; |
if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 && |
img->height0 % 4 == 1) |
img->walk_layer_height += 4; |
params->max_y += img->walk_layer_height * (num_layers - 1); |
} |
static void |
img_init_lods(struct ilo_image *img, |
struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
unsigned cur_x, cur_y; |
unsigned lv; |
cur_x = 0; |
cur_y = 0; |
for (lv = 0; lv <= templ->last_level; lv++) { |
unsigned lod_w, lod_h; |
img_get_slice_size(img, params, lv, &lod_w, &lod_h); |
img->lods[lv].x = cur_x; |
img->lods[lv].y = cur_y; |
img->lods[lv].slice_width = lod_w; |
img->lods[lv].slice_height = lod_h; |
switch (img->walk) { |
case ILO_IMAGE_WALK_LAYER: |
/* MIPLAYOUT_BELOW */ |
if (lv == 1) |
cur_x += lod_w; |
else |
cur_y += lod_h; |
break; |
case ILO_IMAGE_WALK_LOD: |
lod_h *= img_get_num_layers(img, params); |
if (lv == 1) |
cur_x += lod_w; |
else |
cur_y += lod_h; |
/* every LOD begins at tile boundaries */ |
if (templ->last_level > 0) { |
assert(img->format == PIPE_FORMAT_S8_UINT); |
cur_x = align(cur_x, 64); |
cur_y = align(cur_y, 64); |
} |
break; |
case ILO_IMAGE_WALK_3D: |
{ |
const unsigned num_slices = u_minify(templ->depth0, lv); |
const unsigned num_slices_per_row = 1 << lv; |
const unsigned num_rows = |
(num_slices + num_slices_per_row - 1) / num_slices_per_row; |
lod_w *= num_slices_per_row; |
lod_h *= num_rows; |
cur_y += lod_h; |
} |
break; |
} |
if (params->max_x < img->lods[lv].x + lod_w) |
params->max_x = img->lods[lv].x + lod_w; |
if (params->max_y < img->lods[lv].y + lod_h) |
params->max_y = img->lods[lv].y + lod_h; |
} |
if (img->walk == ILO_IMAGE_WALK_LAYER) { |
params->h0 = img->lods[0].slice_height; |
if (templ->last_level > 0) |
params->h1 = img->lods[1].slice_height; |
else |
img_get_slice_size(img, params, 1, &cur_x, ¶ms->h1); |
} |
} |
static void |
img_init_alignments(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 113: |
* |
* "surface format align_i align_j |
* YUV 4:2:2 formats 4 *see below |
* BC1-5 4 4 |
* FXT1 8 4 |
* all other formats 4 *see below" |
* |
* "- align_j = 4 for any depth buffer |
* - align_j = 2 for separate stencil buffer |
* - align_j = 4 for any render target surface is multisampled (4x) |
* - align_j = 4 for any render target surface with Surface Vertical |
* Alignment = VALIGN_4 |
* - align_j = 2 for any render target surface with Surface Vertical |
* Alignment = VALIGN_2 |
* - align_j = 2 for all other render target surface |
* - align_j = 2 for any sampling engine surface with Surface Vertical |
* Alignment = VALIGN_2 |
* - align_j = 4 for any sampling engine surface with Surface Vertical |
* Alignment = VALIGN_4" |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 86: |
* |
* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if |
* the Surface Format is 96 bits per element (BPE)." |
* |
* They can be rephrased as |
* |
* align_i align_j |
* compressed formats block width block height |
* PIPE_FORMAT_S8_UINT 4 2 |
* other depth/stencil formats 4 4 |
* 4x multisampled 4 4 |
* bpp 96 4 2 |
* others 4 2 or 4 |
*/ |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 110: |
* |
* "surface defined by surface format align_i align_j |
* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 |
* not D16_UNORM 4 4 |
* 3DSTATE_STENCIL_BUFFER N/A 8 8 |
* SURFACE_STATE BC*, ETC*, EAC* 4 4 |
* FXT1 8 4 |
* all others (set by SURFACE_STATE)" |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 63: |
* |
* "- This field (Surface Vertical Aligment) is intended to be set to |
* VALIGN_4 if the surface was rendered as a depth buffer, for a |
* multisampled (4x) render target, or for a multisampled (8x) |
* render target, since these surfaces support only alignment of 4. |
* - Use of VALIGN_4 for other surfaces is supported, but uses more |
* memory. |
* - This field must be set to VALIGN_4 for all tiled Y Render Target |
* surfaces. |
* - Value of 1 is not supported for format YCRCB_NORMAL (0x182), |
* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) |
* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field |
* must be set to VALIGN_4." |
* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." |
* |
* "- This field (Surface Horizontal Aligment) is intended to be set to |
* HALIGN_8 only if the surface was rendered as a depth buffer with |
* Z16 format or a stencil buffer, since these surfaces support only |
* alignment of 8. |
* - Use of HALIGN_8 for other surfaces is supported, but uses more |
* memory. |
* - This field must be set to HALIGN_4 if the Surface Format is BC*. |
* - This field must be set to HALIGN_8 if the Surface Format is |
* FXT1." |
* |
* They can be rephrased as |
* |
* align_i align_j |
* compressed formats block width block height |
* PIPE_FORMAT_Z16_UNORM 8 4 |
* PIPE_FORMAT_S8_UINT 8 8 |
* other depth/stencil formats 4 4 |
* 2x or 4x multisampled 4 or 8 4 |
* tiled Y 4 or 8 4 (if rt) |
* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 |
* others 4 or 8 2 or 4 |
*/ |
if (params->compressed) { |
/* this happens to be the case */ |
img->align_i = img->block_width; |
img->align_j = img->block_height; |
} else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) { |
switch (img->format) { |
case PIPE_FORMAT_Z16_UNORM: |
img->align_i = 8; |
img->align_j = 4; |
break; |
case PIPE_FORMAT_S8_UINT: |
img->align_i = 8; |
img->align_j = 8; |
break; |
default: |
img->align_i = 4; |
img->align_j = 4; |
break; |
} |
} else { |
switch (img->format) { |
case PIPE_FORMAT_S8_UINT: |
img->align_i = 4; |
img->align_j = 2; |
break; |
default: |
img->align_i = 4; |
img->align_j = 4; |
break; |
} |
} |
} else { |
const bool valign_4 = |
(templ->nr_samples > 1) || |
(ilo_dev_gen(params->dev) >= ILO_GEN(8)) || |
(ilo_dev_gen(params->dev) >= ILO_GEN(7) && |
img->tiling == GEN6_TILING_Y && |
(templ->bind & PIPE_BIND_RENDER_TARGET)); |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && |
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4) |
assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT); |
img->align_i = 4; |
img->align_j = (valign_4) ? 4 : 2; |
} |
/* |
* the fact that align i and j are multiples of block width and height |
* respectively is what makes the size of the bo a multiple of the block |
* size, slices start at block boundaries, and many of the computations |
* work. |
*/ |
assert(img->align_i % img->block_width == 0); |
assert(img->align_j % img->block_height == 0); |
/* make sure align() works */ |
assert(util_is_power_of_two(img->align_i) && |
util_is_power_of_two(img->align_j)); |
assert(util_is_power_of_two(img->block_width) && |
util_is_power_of_two(img->block_height)); |
} |
static void |
img_init_tiling(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
unsigned preferred_tilings = params->valid_tilings; |
/* no fencing nor BLT support */ |
if (preferred_tilings & ~IMAGE_TILING_W) |
preferred_tilings &= ~IMAGE_TILING_W; |
if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { |
/* |
* heuristically set a minimum width/height for enabling tiling |
*/ |
if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X)) |
preferred_tilings &= ~IMAGE_TILING_X; |
if ((img->width0 < 32 || img->height0 < 16) && |
(img->width0 < 16 || img->height0 < 32) && |
(preferred_tilings & ~IMAGE_TILING_Y)) |
preferred_tilings &= ~IMAGE_TILING_Y; |
} else { |
/* force linear if we are not sure where the texture is bound to */ |
if (preferred_tilings & IMAGE_TILING_NONE) |
preferred_tilings &= IMAGE_TILING_NONE; |
} |
/* prefer tiled over linear */ |
if (preferred_tilings & IMAGE_TILING_Y) |
img->tiling = GEN6_TILING_Y; |
else if (preferred_tilings & IMAGE_TILING_X) |
img->tiling = GEN6_TILING_X; |
else if (preferred_tilings & IMAGE_TILING_W) |
img->tiling = GEN8_TILING_W; |
else |
img->tiling = GEN6_TILING_NONE; |
} |
static void |
img_init_walk_gen7(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
/* |
* It is not explicitly states, but render targets are expected to be |
* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected |
* to be IMS (samples interleaved). |
* |
* See "Multisampled Surface Storage Format" field of SURFACE_STATE. |
*/ |
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 111: |
* |
* "note that the depth buffer and stencil buffer have an implied |
* value of ARYSPC_FULL" |
*/ |
img->walk = (templ->target == PIPE_TEXTURE_3D) ? |
ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER; |
img->interleaved_samples = true; |
} else { |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 66: |
* |
* "If Multisampled Surface Storage Format is MSFMT_MSS and Number |
* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface |
* Array Spacing) must be set to ARYSPC_LOD0." |
* |
* As multisampled resources are not mipmapped, we never use |
* ARYSPC_FULL for them. |
*/ |
if (templ->nr_samples > 1) |
assert(templ->last_level == 0); |
img->walk = |
(templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D : |
(templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER : |
ILO_IMAGE_WALK_LOD; |
img->interleaved_samples = false; |
} |
} |
static void |
img_init_walk_gen6(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 115: |
* |
* "The separate stencil buffer does not support mip mapping, thus the |
* storage for LODs other than LOD 0 is not needed. The following |
* QPitch equation applies only to the separate stencil buffer: |
* |
* QPitch = h_0" |
* |
* GEN6 does not support compact spacing otherwise. |
*/ |
img->walk = |
(params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D : |
(img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD : |
ILO_IMAGE_WALK_LAYER; |
/* GEN6 supports only interleaved samples */ |
img->interleaved_samples = true; |
} |
static void |
img_init_walk(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) |
img_init_walk_gen7(img, params); |
else |
img_init_walk_gen6(img, params); |
} |
static unsigned |
img_get_valid_tilings(const struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
const enum pipe_format format = img->format; |
unsigned valid_tilings = params->valid_tilings; |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 32: |
* |
* "Display/Overlay Y-Major not supported. |
* X-Major required for Async Flips" |
*/ |
if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) |
valid_tilings &= IMAGE_TILING_X; |
/* |
* From the Sandy Bridge PRM, volume 3 part 2, page 158: |
* |
* "The cursor surface address must be 4K byte aligned. The cursor must |
* be in linear memory, it cannot be tiled." |
*/ |
if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR))) |
valid_tilings &= IMAGE_TILING_NONE; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 318: |
* |
* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear |
* Depth Buffer is not supported." |
* |
* "The Depth Buffer, if tiled, must use Y-Major tiling." |
* |
* From the Sandy Bridge PRM, volume 1 part 2, page 22: |
* |
* "W-Major Tile Format is used for separate stencil." |
*/ |
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { |
switch (format) { |
case PIPE_FORMAT_S8_UINT: |
valid_tilings &= IMAGE_TILING_W; |
break; |
default: |
valid_tilings &= IMAGE_TILING_Y; |
break; |
} |
} |
if (templ->bind & PIPE_BIND_RENDER_TARGET) { |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 32: |
* |
* "NOTE: 128BPE Format Color buffer ( render target ) MUST be |
* either TileX or Linear." |
* |
* From the Haswell PRM, volume 5, page 32: |
* |
* "NOTE: 128 BPP format color buffer (render target) supports |
* Linear, TiledX and TiledY." |
*/ |
if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16) |
valid_tilings &= ~IMAGE_TILING_Y; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 63: |
* |
* "This field (Surface Vertical Aligment) must be set to VALIGN_4 |
* for all tiled Y Render Target surfaces." |
* |
* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." |
*/ |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && |
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && |
img->format == PIPE_FORMAT_R32G32B32_FLOAT) |
valid_tilings &= ~IMAGE_TILING_Y; |
valid_tilings &= ~IMAGE_TILING_W; |
} |
if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { |
if (ilo_dev_gen(params->dev) < ILO_GEN(8)) |
valid_tilings &= ~IMAGE_TILING_W; |
} |
/* no conflicting binding flags */ |
assert(valid_tilings); |
return valid_tilings; |
} |
static void |
img_init_size_and_format(struct ilo_image *img, |
struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
enum pipe_format format = templ->format; |
bool require_separate_stencil = false; |
img->width0 = templ->width0; |
img->height0 = templ->height0; |
img->depth0 = templ->depth0; |
img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "This field (Separate Stencil Buffer Enable) must be set to the same |
* value (enabled or disabled) as Hierarchical Depth Buffer Enable." |
* |
* GEN7+ requires separate stencil buffers. |
*/ |
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) |
require_separate_stencil = true; |
else |
require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ); |
} |
switch (format) { |
case PIPE_FORMAT_ETC1_RGB8: |
format = PIPE_FORMAT_R8G8B8X8_UNORM; |
break; |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
if (require_separate_stencil) { |
format = PIPE_FORMAT_Z24X8_UNORM; |
img->separate_stencil = true; |
} |
break; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
if (require_separate_stencil) { |
format = PIPE_FORMAT_Z32_FLOAT; |
img->separate_stencil = true; |
} |
break; |
default: |
break; |
} |
img->format = format; |
img->block_width = util_format_get_blockwidth(format); |
img->block_height = util_format_get_blockheight(format); |
img->block_size = util_format_get_blocksize(format); |
params->valid_tilings = img_get_valid_tilings(img, params); |
params->compressed = util_format_is_compressed(img->format); |
} |
static bool |
img_want_mcs(const struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
bool want_mcs = false; |
/* MCS is for RT on GEN7+ */ |
if (ilo_dev_gen(params->dev) < ILO_GEN(7)) |
return false; |
if (templ->target != PIPE_TEXTURE_2D || |
!(templ->bind & PIPE_BIND_RENDER_TARGET)) |
return false; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 77: |
* |
* "For Render Target and Sampling Engine Surfaces:If the surface is |
* multisampled (Number of Multisamples any value other than |
* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." |
* |
* "This field must be set to 0 for all SINT MSRTs when all RT channels |
* are not written" |
*/ |
if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) { |
want_mcs = true; |
} else if (templ->nr_samples <= 1) { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 326: |
* |
* "When MCS is buffer is used for color clear of non-multisampler |
* render target, the following restrictions apply. |
* - Support is limited to tiled render targets. |
* - Support is for non-mip-mapped and non-array surface types |
* only. |
* - Clear is supported only on the full RT; i.e., no partial clear |
* or overlapping clears. |
* - MCS buffer for non-MSRT is supported only for RT formats |
* 32bpp, 64bpp and 128bpp. |
* ..." |
*/ |
if (img->tiling != GEN6_TILING_NONE && |
templ->last_level == 0 && templ->array_size == 1) { |
switch (img->block_size) { |
case 4: |
case 8: |
case 16: |
want_mcs = true; |
break; |
default: |
break; |
} |
} |
} |
return want_mcs; |
} |
static bool |
img_want_hiz(const struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
const struct util_format_description *desc = |
util_format_description(templ->format); |
if (ilo_debug & ILO_DEBUG_NOHIZ) |
return false; |
if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) |
return false; |
if (!util_format_has_depth(desc)) |
return false; |
/* no point in having HiZ */ |
if (templ->usage == PIPE_USAGE_STAGING) |
return false; |
/* |
* As can be seen in img_calculate_hiz_size(), HiZ may not be enabled |
* for every level. This is generally fine except on GEN6, where HiZ and |
* separate stencil are enabled and disabled at the same time. When the |
* format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ |
* can result in incompatible formats. |
*/ |
if (ilo_dev_gen(params->dev) == ILO_GEN(6) && |
templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && |
templ->last_level) |
return false; |
return true; |
} |
static void |
img_init_aux(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
if (img_want_hiz(img, params)) |
img->aux.type = ILO_IMAGE_AUX_HIZ; |
else if (img_want_mcs(img, params)) |
img->aux.type = ILO_IMAGE_AUX_MCS; |
} |
static void |
img_align(struct ilo_image *img, struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
int align_w = 1, align_h = 1, pad_h = 0; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 118: |
* |
* "To determine the necessary padding on the bottom and right side of |
* the surface, refer to the table in Section 7.18.3.4 for the i and j |
* parameters for the surface format in use. The surface must then be |
* extended to the next multiple of the alignment unit size in each |
* dimension, and all texels contained in this extended surface must |
* have valid GTT entries." |
* |
* "For cube surfaces, an additional two rows of padding are required |
* at the bottom of the surface. This must be ensured regardless of |
* whether the surface is stored tiled or linear. This is due to the |
* potential rotation of cache line orientation from memory to cache." |
* |
* "For compressed textures (BC* and FXT1 surface formats), padding at |
* the bottom of the surface is to an even compressed row, which is |
* equal to a multiple of 8 uncompressed texel rows. Thus, for padding |
* purposes, these surfaces behave as if j = 8 only for surface |
* padding purposes. The value of 4 for j still applies for mip level |
* alignment and QPitch calculation." |
*/ |
if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { |
align_w = MAX2(align_w, img->align_i); |
align_h = MAX2(align_h, img->align_j); |
if (templ->target == PIPE_TEXTURE_CUBE) |
pad_h += 2; |
if (params->compressed) |
align_h = MAX2(align_h, img->align_j * 2); |
} |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 118: |
* |
* "If the surface contains an odd number of rows of data, a final row |
* below the surface must be allocated." |
*/ |
if (templ->bind & PIPE_BIND_RENDER_TARGET) |
align_h = MAX2(align_h, 2); |
/* |
* Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ |
* for unaligned non-mipmapped and non-array images. |
*/ |
if (img->aux.type == ILO_IMAGE_AUX_HIZ && |
templ->last_level == 0 && |
templ->array_size == 1 && |
templ->depth0 == 1) { |
align_w = MAX2(align_w, 8); |
align_h = MAX2(align_h, 4); |
} |
params->max_x = align(params->max_x, align_w); |
params->max_y = align(params->max_y + pad_h, align_h); |
} |
/* note that this may force the texture to be linear */ |
static void |
img_calculate_bo_size(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
assert(params->max_x % img->block_width == 0); |
assert(params->max_y % img->block_height == 0); |
assert(img->walk_layer_height % img->block_height == 0); |
img->bo_stride = |
(params->max_x / img->block_width) * img->block_size; |
img->bo_height = params->max_y / img->block_height; |
while (true) { |
unsigned w = img->bo_stride, h = img->bo_height; |
unsigned align_w, align_h; |
/* |
* From the Haswell PRM, volume 5, page 163: |
* |
* "For linear surfaces, additional padding of 64 bytes is required |
* at the bottom of the surface. This is in addition to the padding |
* required above." |
*/ |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) && |
(params->templ->bind & PIPE_BIND_SAMPLER_VIEW) && |
img->tiling == GEN6_TILING_NONE) |
h += (64 + img->bo_stride - 1) / img->bo_stride; |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "- For linear render target surfaces, the pitch must be a |
* multiple of the element size for non-YUV surface formats. |
* Pitch must be a multiple of 2 * element size for YUV surface |
* formats. |
* - For other linear surfaces, the pitch can be any multiple of |
* bytes. |
* - For tiled surfaces, the pitch must be a multiple of the tile |
* width." |
* |
* Different requirements may exist when the bo is used in different |
* places, but our alignments here should be good enough that we do not |
* need to check params->templ->bind. |
*/ |
switch (img->tiling) { |
case GEN6_TILING_X: |
align_w = 512; |
align_h = 8; |
break; |
case GEN6_TILING_Y: |
align_w = 128; |
align_h = 32; |
break; |
case GEN8_TILING_W: |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 22: |
* |
* "A 4KB tile is subdivided into 8-high by 8-wide array of |
* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 |
* bytes." |
*/ |
align_w = 64; |
align_h = 64; |
break; |
default: |
assert(img->tiling == GEN6_TILING_NONE); |
/* some good enough values */ |
align_w = 64; |
align_h = 2; |
break; |
} |
w = align(w, align_w); |
h = align(h, align_h); |
/* make sure the bo is mappable */ |
if (img->tiling != GEN6_TILING_NONE) { |
/* |
* Usually only the first 256MB of the GTT is mappable. |
* |
* See also how intel_context::max_gtt_map_object_size is calculated. |
*/ |
const size_t mappable_gtt_size = 256 * 1024 * 1024; |
/* |
* Be conservative. We may be able to switch from VALIGN_4 to |
* VALIGN_2 if the image was Y-tiled, but let's keep it simple. |
*/ |
if (mappable_gtt_size / w / 4 < h) { |
if (params->valid_tilings & IMAGE_TILING_NONE) { |
img->tiling = GEN6_TILING_NONE; |
/* MCS support for non-MSRTs is limited to tiled RTs */ |
if (img->aux.type == ILO_IMAGE_AUX_MCS && |
params->templ->nr_samples <= 1) |
img->aux.type = ILO_IMAGE_AUX_NONE; |
continue; |
} else { |
ilo_warn("cannot force texture to be linear\n"); |
} |
} |
} |
img->bo_stride = w; |
img->bo_height = h; |
break; |
} |
} |
static void |
img_calculate_hiz_size(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
const unsigned hz_align_j = 8; |
enum ilo_image_walk_type hz_walk; |
unsigned hz_width, hz_height, lv; |
unsigned hz_clear_w, hz_clear_h; |
assert(img->aux.type == ILO_IMAGE_AUX_HIZ); |
assert(img->walk == ILO_IMAGE_WALK_LAYER || |
img->walk == ILO_IMAGE_WALK_3D); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 312: |
* |
* "The hierarchical depth buffer does not support the LOD field, it is |
* assumed by hardware to be zero. A separate hierarachical depth |
* buffer is required for each LOD used, and the corresponding |
* buffer's state delivered to hardware each time a new depth buffer |
* state with modified LOD is delivered." |
* |
* We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD. |
*/ |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) |
hz_walk = img->walk; |
else |
hz_walk = ILO_IMAGE_WALK_LOD; |
/* |
* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge |
* PRM, volume 2 part 1, page 312-313. |
* |
* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a |
* memory row. |
*/ |
switch (hz_walk) { |
case ILO_IMAGE_WALK_LAYER: |
{ |
const unsigned h0 = align(params->h0, hz_align_j); |
const unsigned h1 = align(params->h1, hz_align_j); |
const unsigned htail = |
((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; |
const unsigned hz_qpitch = h0 + h1 + htail; |
hz_width = align(img->lods[0].slice_width, 16); |
hz_height = hz_qpitch * templ->array_size / 2; |
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) |
hz_height = align(hz_height, 8); |
img->aux.walk_layer_height = hz_qpitch; |
} |
break; |
case ILO_IMAGE_WALK_LOD: |
{ |
unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS]; |
unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS]; |
unsigned cur_tx, cur_ty; |
/* figure out the tile offsets of LODs */ |
hz_width = 0; |
hz_height = 0; |
cur_tx = 0; |
cur_ty = 0; |
for (lv = 0; lv <= templ->last_level; lv++) { |
unsigned tw, th; |
lod_tx[lv] = cur_tx; |
lod_ty[lv] = cur_ty; |
tw = align(img->lods[lv].slice_width, 16); |
th = align(img->lods[lv].slice_height, hz_align_j) * |
templ->array_size / 2; |
/* convert to Y-tiles */ |
tw = align(tw, 128) / 128; |
th = align(th, 32) / 32; |
if (hz_width < cur_tx + tw) |
hz_width = cur_tx + tw; |
if (hz_height < cur_ty + th) |
hz_height = cur_ty + th; |
if (lv == 1) |
cur_tx += tw; |
else |
cur_ty += th; |
} |
/* convert tile offsets to memory offsets */ |
for (lv = 0; lv <= templ->last_level; lv++) { |
img->aux.walk_lod_offsets[lv] = |
(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; |
} |
hz_width *= 128; |
hz_height *= 32; |
} |
break; |
case ILO_IMAGE_WALK_3D: |
hz_width = align(img->lods[0].slice_width, 16); |
hz_height = 0; |
for (lv = 0; lv <= templ->last_level; lv++) { |
const unsigned h = align(img->lods[lv].slice_height, hz_align_j); |
/* according to the formula, slices are packed together vertically */ |
hz_height += h * u_minify(templ->depth0, lv); |
} |
hz_height /= 2; |
break; |
default: |
assert(!"unknown HiZ walk"); |
hz_width = 0; |
hz_height = 0; |
break; |
} |
/* |
* In hiz_align_fb(), we will align the LODs to 8x4 sample blocks. |
* Experiments on Haswell show that aligning the RECTLIST primitive and |
* 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be |
* aligned. |
*/ |
hz_clear_w = 8; |
hz_clear_h = 4; |
switch (templ->nr_samples) { |
case 0: |
case 1: |
default: |
break; |
case 2: |
hz_clear_w /= 2; |
break; |
case 4: |
hz_clear_w /= 2; |
hz_clear_h /= 2; |
break; |
case 8: |
hz_clear_w /= 4; |
hz_clear_h /= 2; |
break; |
case 16: |
hz_clear_w /= 4; |
hz_clear_h /= 4; |
break; |
} |
for (lv = 0; lv <= templ->last_level; lv++) { |
if (u_minify(img->width0, lv) % hz_clear_w || |
u_minify(img->height0, lv) % hz_clear_h) |
break; |
img->aux.enables |= 1 << lv; |
} |
/* we padded to allow this in img_align() */ |
if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1) |
img->aux.enables |= 0x1; |
/* align to Y-tile */ |
img->aux.bo_stride = align(hz_width, 128); |
img->aux.bo_height = align(hz_height, 32); |
} |
static void |
img_calculate_mcs_size(struct ilo_image *img, |
const struct ilo_image_params *params) |
{ |
const struct pipe_resource *templ = params->templ; |
int mcs_width, mcs_height, mcs_cpp; |
int downscale_x, downscale_y; |
assert(img->aux.type == ILO_IMAGE_AUX_MCS); |
if (templ->nr_samples > 1) { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear |
* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The |
* need of scale down could be that the clear rectangle is used to clear |
* the MCS instead of the RT. |
* |
* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The |
* 2x2 factor could come from that the hardware writes 128 bits (an |
* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in |
* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the |
* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 |
* pixel block in the RT. |
*/ |
switch (templ->nr_samples) { |
case 2: |
case 4: |
downscale_x = 8; |
downscale_y = 2; |
mcs_cpp = 1; |
break; |
case 8: |
downscale_x = 2; |
downscale_y = 2; |
mcs_cpp = 4; |
break; |
case 16: |
downscale_x = 2; |
downscale_y = 1; |
mcs_cpp = 8; |
break; |
default: |
assert(!"unsupported sample count"); |
return; |
break; |
} |
/* |
* It also appears that the 2x2 subspans generated by the scaled-down |
* clear rectangle cannot be masked. The scale-down clear rectangle |
* thus must be aligned to 2x2, and we need to pad. |
*/ |
mcs_width = align(img->width0, downscale_x * 2); |
mcs_height = align(img->height0, downscale_y * 2); |
} else { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 327: |
* |
* " Pixels Lines |
* TiledY RT CL |
* bpp |
* 32 8 4 |
* 64 4 4 |
* 128 2 4 |
* |
* TiledX RT CL |
* bpp |
* 32 16 2 |
* 64 8 2 |
* 128 4 2" |
* |
* This table and the two following tables define the RT alignments, the |
* clear rectangle alignments, and the clear rectangle scale factors. |
* Viewing the RT alignments as the sizes of 128-byte blocks, we can see |
* that the clear rectangle alignments are 16x32 blocks, and the clear |
* rectangle scale factors are 8x16 blocks. |
* |
* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the |
* RT. Similar to the MSAA cases, we can argue that an OWord maps to |
* 8x16 blocks. |
* |
* One problem with this reasoning is that a Y-tile in MCS has 8x32 |
* OWords and maps to 64x512 128-byte blocks. This differs from i965, |
* which says that a Y-tile maps to 128x256 blocks (\see |
* intel_get_non_msrt_mcs_alignment). It does not really change |
* anything except for the size of the allocated MCS. Let's see if we |
* hit out-of-bound access. |
*/ |
switch (img->tiling) { |
case GEN6_TILING_X: |
downscale_x = 64 / img->block_size; |
downscale_y = 2; |
break; |
case GEN6_TILING_Y: |
downscale_x = 32 / img->block_size; |
downscale_y = 4; |
break; |
default: |
assert(!"unsupported tiling mode"); |
return; |
break; |
} |
downscale_x *= 8; |
downscale_y *= 16; |
/* |
* From the Haswell PRM, volume 7, page 652: |
* |
* "Clear rectangle must be aligned to two times the number of |
* pixels in the table shown below due to 16X16 hashing across the |
* slice." |
* |
* The scaled-down clear rectangle must be aligned to 4x4 instead of |
* 2x2, and we need to pad. |
*/ |
mcs_width = align(img->width0, downscale_x * 4) / downscale_x; |
mcs_height = align(img->height0, downscale_y * 4) / downscale_y; |
mcs_cpp = 16; /* an OWord */ |
} |
img->aux.enables = (1 << (templ->last_level + 1)) - 1; |
/* align to Y-tile */ |
img->aux.bo_stride = align(mcs_width * mcs_cpp, 128); |
img->aux.bo_height = align(mcs_height, 32); |
} |
static void |
img_init(struct ilo_image *img, |
struct ilo_image_params *params) |
{ |
/* there are hard dependencies between every function here */ |
img_init_aux(img, params); |
img_init_size_and_format(img, params); |
img_init_walk(img, params); |
img_init_tiling(img, params); |
img_init_alignments(img, params); |
img_init_lods(img, params); |
img_init_layer_height(img, params); |
img_align(img, params); |
img_calculate_bo_size(img, params); |
img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT); |
switch (img->aux.type) { |
case ILO_IMAGE_AUX_HIZ: |
img_calculate_hiz_size(img, params); |
break; |
case ILO_IMAGE_AUX_MCS: |
img_calculate_mcs_size(img, params); |
break; |
default: |
break; |
} |
} |
/** |
* The texutre is for transfer only. We can define our own layout to save |
* space. |
*/ |
static void |
img_init_for_transfer(struct ilo_image *img, |
const struct ilo_dev *dev, |
const struct pipe_resource *templ) |
{ |
const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ? |
templ->depth0 : templ->array_size; |
unsigned layer_width, layer_height; |
assert(templ->last_level == 0); |
assert(templ->nr_samples <= 1); |
img->aux.type = ILO_IMAGE_AUX_NONE; |
img->width0 = templ->width0; |
img->height0 = templ->height0; |
img->depth0 = templ->depth0; |
img->sample_count = 1; |
img->format = templ->format; |
img->block_width = util_format_get_blockwidth(templ->format); |
img->block_height = util_format_get_blockheight(templ->format); |
img->block_size = util_format_get_blocksize(templ->format); |
img->walk = ILO_IMAGE_WALK_LOD; |
img->tiling = GEN6_TILING_NONE; |
img->align_i = img->block_width; |
img->align_j = img->block_height; |
assert(util_is_power_of_two(img->block_width) && |
util_is_power_of_two(img->block_height)); |
/* use packed layout */ |
layer_width = align(templ->width0, img->align_i); |
layer_height = align(templ->height0, img->align_j); |
img->lods[0].slice_width = layer_width; |
img->lods[0].slice_height = layer_height; |
img->bo_stride = (layer_width / img->block_width) * img->block_size; |
img->bo_stride = align(img->bo_stride, 64); |
img->bo_height = (layer_height / img->block_height) * num_layers; |
} |
/** |
* Initialize the image. Callers should zero-initialize \p img first. |
*/ |
void ilo_image_init(struct ilo_image *img, |
const struct ilo_dev *dev, |
const struct pipe_resource *templ) |
{ |
struct ilo_image_params params; |
bool transfer_only; |
/* use transfer layout when the texture is never bound to GPU */ |
transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | |
PIPE_BIND_TRANSFER_READ)); |
if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) { |
img_init_for_transfer(img, dev, templ); |
return; |
} |
memset(¶ms, 0, sizeof(params)); |
params.dev = dev; |
params.templ = templ; |
params.valid_tilings = IMAGE_TILING_ALL; |
img_init(img, ¶ms); |
} |
bool |
ilo_image_init_for_imported(struct ilo_image *img, |
const struct ilo_dev *dev, |
const struct pipe_resource *templ, |
enum gen_surface_tiling tiling, |
unsigned bo_stride) |
{ |
struct ilo_image_params params; |
if ((tiling == GEN6_TILING_X && bo_stride % 512) || |
(tiling == GEN6_TILING_Y && bo_stride % 128) || |
(tiling == GEN8_TILING_W && bo_stride % 64)) |
return false; |
memset(¶ms, 0, sizeof(params)); |
params.dev = dev; |
params.templ = templ; |
params.valid_tilings = 1 << tiling; |
img_init(img, ¶ms); |
assert(img->tiling == tiling); |
if (img->bo_stride > bo_stride) |
return false; |
img->bo_stride = bo_stride; |
/* assume imported RTs are also scanouts */ |
if (!img->scanout) |
img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET); |
return true; |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_image.h |
---|
0,0 → 1,341 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_IMAGE_H |
#define ILO_IMAGE_H |
#include "genhw/genhw.h" |
#include "intel_winsys.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
enum ilo_image_aux_type { |
ILO_IMAGE_AUX_NONE, |
ILO_IMAGE_AUX_HIZ, |
ILO_IMAGE_AUX_MCS, |
}; |
enum ilo_image_walk_type { |
/* |
* LODs of each array layer are first packed together in MIPLAYOUT_BELOW. |
* Array layers are then stacked together vertically. |
* |
* This can be used for mipmapped 2D textures. |
*/ |
ILO_IMAGE_WALK_LAYER, |
/* |
* Array layers of each LOD are first stacked together vertically and |
* tightly. LODs are then packed together in MIPLAYOUT_BELOW with each LOD |
* starting at page boundaries. |
* |
* This is usually used for non-mipmapped 2D textures, as multiple LODs are |
* not supported natively. |
*/ |
ILO_IMAGE_WALK_LOD, |
/* |
* 3D slices of each LOD are first packed together horizontally and tightly |
* with wrapping. LODs are then stacked together vertically and tightly. |
* |
* This is used for 3D textures. |
*/ |
ILO_IMAGE_WALK_3D, |
}; |
/* |
* When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each |
* LOD and this is used to describe LODs in the first array layer. Otherwise, |
* there can be multiple slices in each LOD and this is used to describe the |
* first slice in each LOD. |
*/ |
struct ilo_image_lod { |
/* physical position in pixels */ |
unsigned x; |
unsigned y; |
/* physical size of a slice in pixels */ |
unsigned slice_width; |
unsigned slice_height; |
}; |
/** |
* Texture layout. |
*/ |
struct ilo_image { |
/* size, format, etc for programming hardware states */ |
unsigned width0; |
unsigned height0; |
unsigned depth0; |
unsigned sample_count; |
enum pipe_format format; |
bool separate_stencil; |
/* |
* width, height, and size of pixel blocks for conversion between pixel |
* positions and memory offsets |
*/ |
unsigned block_width; |
unsigned block_height; |
unsigned block_size; |
enum ilo_image_walk_type walk; |
bool interleaved_samples; |
enum gen_surface_tiling tiling; |
/* physical LOD slice alignments */ |
unsigned align_i; |
unsigned align_j; |
struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS]; |
/* physical layer height for ILO_IMAGE_WALK_LAYER */ |
unsigned walk_layer_height; |
/* distance in bytes between two pixel block rows */ |
unsigned bo_stride; |
/* number of pixel block rows */ |
unsigned bo_height; |
bool scanout; |
struct intel_bo *bo; |
struct { |
enum ilo_image_aux_type type; |
/* bitmask of levels that can use aux */ |
unsigned enables; |
/* LOD offsets for ILO_IMAGE_WALK_LOD */ |
unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS]; |
unsigned walk_layer_height; |
unsigned bo_stride; |
unsigned bo_height; |
struct intel_bo *bo; |
} aux; |
}; |
struct pipe_resource; |
void |
ilo_image_init(struct ilo_image *img, |
const struct ilo_dev *dev, |
const struct pipe_resource *templ); |
bool |
ilo_image_init_for_imported(struct ilo_image *img, |
const struct ilo_dev *dev, |
const struct pipe_resource *templ, |
enum gen_surface_tiling tiling, |
unsigned bo_stride); |
static inline void |
ilo_image_cleanup(struct ilo_image *img) |
{ |
intel_bo_unref(img->bo); |
intel_bo_unref(img->aux.bo); |
} |
static inline void |
ilo_image_set_bo(struct ilo_image *img, struct intel_bo *bo) |
{ |
intel_bo_unref(img->bo); |
img->bo = intel_bo_ref(bo); |
} |
static inline void |
ilo_image_set_aux_bo(struct ilo_image *img, struct intel_bo *bo) |
{ |
intel_bo_unref(img->aux.bo); |
img->aux.bo = intel_bo_ref(bo); |
} |
static inline bool |
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level) |
{ |
return (img->aux.bo && (img->aux.enables & (1 << level))); |
} |
/** |
* Convert from pixel position to 2D memory offset. |
*/ |
static inline void |
ilo_image_pos_to_mem(const struct ilo_image *img, |
unsigned pos_x, unsigned pos_y, |
unsigned *mem_x, unsigned *mem_y) |
{ |
assert(pos_x % img->block_width == 0); |
assert(pos_y % img->block_height == 0); |
*mem_x = pos_x / img->block_width * img->block_size; |
*mem_y = pos_y / img->block_height; |
} |
/** |
* Convert from 2D memory offset to linear offset. |
*/ |
static inline unsigned |
ilo_image_mem_to_linear(const struct ilo_image *img, |
unsigned mem_x, unsigned mem_y) |
{ |
return mem_y * img->bo_stride + mem_x; |
} |
/** |
* Convert from 2D memory offset to raw offset. |
*/ |
static inline unsigned |
ilo_image_mem_to_raw(const struct ilo_image *img, |
unsigned mem_x, unsigned mem_y) |
{ |
unsigned tile_w, tile_h; |
switch (img->tiling) { |
case GEN6_TILING_NONE: |
tile_w = 1; |
tile_h = 1; |
break; |
case GEN6_TILING_X: |
tile_w = 512; |
tile_h = 8; |
break; |
case GEN6_TILING_Y: |
tile_w = 128; |
tile_h = 32; |
break; |
case GEN8_TILING_W: |
tile_w = 64; |
tile_h = 64; |
break; |
default: |
assert(!"unknown tiling"); |
tile_w = 1; |
tile_h = 1; |
break; |
} |
assert(mem_x % tile_w == 0); |
assert(mem_y % tile_h == 0); |
return mem_y * img->bo_stride + mem_x * tile_h; |
} |
/** |
* Return the stride, in bytes, between slices within a level. |
*/ |
static inline unsigned |
ilo_image_get_slice_stride(const struct ilo_image *img, unsigned level) |
{ |
unsigned h; |
switch (img->walk) { |
case ILO_IMAGE_WALK_LAYER: |
h = img->walk_layer_height; |
break; |
case ILO_IMAGE_WALK_LOD: |
h = img->lods[level].slice_height; |
break; |
case ILO_IMAGE_WALK_3D: |
if (level == 0) { |
h = img->lods[0].slice_height; |
break; |
} |
/* fall through */ |
default: |
assert(!"no single stride to walk across slices"); |
h = 0; |
break; |
} |
assert(h % img->block_height == 0); |
return (h / img->block_height) * img->bo_stride; |
} |
/** |
* Return the physical size, in bytes, of a slice in a level. |
*/ |
static inline unsigned |
ilo_image_get_slice_size(const struct ilo_image *img, unsigned level) |
{ |
const unsigned w = img->lods[level].slice_width; |
const unsigned h = img->lods[level].slice_height; |
assert(w % img->block_width == 0); |
assert(h % img->block_height == 0); |
return (w / img->block_width * img->block_size) * |
(h / img->block_height); |
} |
/** |
* Return the pixel position of a slice. |
*/ |
static inline void |
ilo_image_get_slice_pos(const struct ilo_image *img, |
unsigned level, unsigned slice, |
unsigned *x, unsigned *y) |
{ |
switch (img->walk) { |
case ILO_IMAGE_WALK_LAYER: |
*x = img->lods[level].x; |
*y = img->lods[level].y + img->walk_layer_height * slice; |
break; |
case ILO_IMAGE_WALK_LOD: |
*x = img->lods[level].x; |
*y = img->lods[level].y + img->lods[level].slice_height * slice; |
break; |
case ILO_IMAGE_WALK_3D: |
{ |
/* slices are packed horizontally with wrapping */ |
const unsigned sx = slice & ((1 << level) - 1); |
const unsigned sy = slice >> level; |
assert(slice < u_minify(img->depth0, level)); |
*x = img->lods[level].x + img->lods[level].slice_width * sx; |
*y = img->lods[level].y + img->lods[level].slice_height * sy; |
} |
break; |
default: |
assert(!"unknown img walk type"); |
*x = 0; |
*y = 0; |
break; |
} |
/* should not exceed the bo size */ |
assert(*y + img->lods[level].slice_height <= |
img->bo_height * img->block_height); |
} |
#endif /* ILO_IMAGE_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d.h |
---|
0,0 → 1,427 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_STATE_3D_H |
#define ILO_STATE_3D_H |
#include "genhw/genhw.h" |
#include "pipe/p_state.h" |
#include "ilo_core.h" |
#include "ilo_dev.h" |
/** |
* \see brw_context.h |
*/ |
#define ILO_MAX_DRAW_BUFFERS 8 |
#define ILO_MAX_CONST_BUFFERS (1 + 12) |
#define ILO_MAX_SAMPLER_VIEWS 16 |
#define ILO_MAX_SAMPLERS 16 |
#define ILO_MAX_SO_BINDINGS 64 |
#define ILO_MAX_SO_BUFFERS 4 |
#define ILO_MAX_VIEWPORTS 1 |
#define ILO_MAX_SURFACES 256 |
struct intel_bo; |
struct ilo_buffer; |
struct ilo_image; |
struct ilo_shader_state; |
struct ilo_vb_state { |
struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; |
uint32_t enabled_mask; |
}; |
struct ilo_ib_state { |
struct pipe_resource *buffer; |
const void *user_buffer; |
unsigned offset; |
unsigned index_size; |
/* these are not valid until the state is finalized */ |
struct pipe_resource *hw_resource; |
unsigned hw_index_size; |
/* an offset to be added to pipe_draw_info::start */ |
int64_t draw_start_offset; |
}; |
struct ilo_ve_cso { |
/* VERTEX_ELEMENT_STATE */ |
uint32_t payload[2]; |
}; |
struct ilo_ve_state { |
struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS]; |
unsigned count; |
unsigned instance_divisors[PIPE_MAX_ATTRIBS]; |
unsigned vb_mapping[PIPE_MAX_ATTRIBS]; |
unsigned vb_count; |
/* these are not valid until the state is finalized */ |
struct ilo_ve_cso edgeflag_cso; |
bool last_cso_edgeflag; |
struct ilo_ve_cso nosrc_cso; |
bool prepend_nosrc_cso; |
}; |
struct ilo_so_state { |
struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; |
unsigned count; |
unsigned append_bitmask; |
bool enabled; |
}; |
struct ilo_viewport_cso { |
/* matrix form */ |
float m00, m11, m22, m30, m31, m32; |
/* guardband in NDC space */ |
float min_gbx, min_gby, max_gbx, max_gby; |
/* viewport in screen space */ |
float min_x, min_y, min_z; |
float max_x, max_y, max_z; |
}; |
struct ilo_viewport_state { |
struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS]; |
unsigned count; |
struct pipe_viewport_state viewport0; |
}; |
struct ilo_scissor_state { |
/* SCISSOR_RECT */ |
uint32_t payload[ILO_MAX_VIEWPORTS * 2]; |
struct pipe_scissor_state scissor0; |
}; |
struct ilo_rasterizer_clip { |
/* 3DSTATE_CLIP */ |
uint32_t payload[3]; |
uint32_t can_enable_guardband; |
}; |
struct ilo_rasterizer_sf { |
/* 3DSTATE_SF */ |
uint32_t payload[3]; |
uint32_t dw_msaa; |
/* Global Depth Offset Constant/Scale/Clamp */ |
uint32_t dw_depth_offset_const; |
uint32_t dw_depth_offset_scale; |
uint32_t dw_depth_offset_clamp; |
/* Gen8+ 3DSTATE_RASTER */ |
uint32_t dw_raster; |
}; |
struct ilo_rasterizer_wm { |
/* 3DSTATE_WM */ |
uint32_t payload[2]; |
uint32_t dw_msaa_rast; |
uint32_t dw_msaa_disp; |
}; |
struct ilo_rasterizer_state { |
struct pipe_rasterizer_state state; |
struct ilo_rasterizer_clip clip; |
struct ilo_rasterizer_sf sf; |
struct ilo_rasterizer_wm wm; |
}; |
struct ilo_dsa_state { |
/* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */ |
uint32_t payload[3]; |
uint32_t dw_blend_alpha; |
uint32_t dw_ps_blend_alpha; |
ubyte alpha_ref; |
}; |
struct ilo_blend_cso { |
/* BLEND_STATE */ |
uint32_t payload[2]; |
uint32_t dw_blend; |
uint32_t dw_blend_dst_alpha_forced_one; |
}; |
struct ilo_blend_state { |
struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS]; |
bool dual_blend; |
bool alpha_to_coverage; |
uint32_t dw_shared; |
uint32_t dw_alpha_mod; |
uint32_t dw_logicop; |
/* a part of 3DSTATE_PS_BLEND */ |
uint32_t dw_ps_blend; |
uint32_t dw_ps_blend_dst_alpha_forced_one; |
}; |
struct ilo_sampler_cso { |
/* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */ |
uint32_t payload[15]; |
uint32_t dw_filter; |
uint32_t dw_filter_aniso; |
uint32_t dw_wrap; |
uint32_t dw_wrap_1d; |
uint32_t dw_wrap_cube; |
bool anisotropic; |
bool saturate_r; |
bool saturate_s; |
bool saturate_t; |
}; |
struct ilo_sampler_state { |
const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; |
}; |
struct ilo_view_surface { |
/* SURFACE_STATE */ |
uint32_t payload[13]; |
struct intel_bo *bo; |
uint32_t scanout; |
}; |
struct ilo_view_cso { |
struct pipe_sampler_view base; |
struct ilo_view_surface surface; |
}; |
struct ilo_view_state { |
struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS]; |
unsigned count; |
}; |
struct ilo_cbuf_cso { |
struct pipe_resource *resource; |
struct ilo_view_surface surface; |
/* |
* this CSO is not so constant because user buffer needs to be uploaded in |
* finalize_constant_buffers() |
*/ |
const void *user_buffer; |
unsigned user_buffer_size; |
}; |
struct ilo_cbuf_state { |
struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; |
uint32_t enabled_mask; |
}; |
struct ilo_resource_state { |
struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; |
unsigned count; |
}; |
struct ilo_surface_cso { |
struct pipe_surface base; |
bool is_rt; |
union { |
struct ilo_view_surface rt; |
struct ilo_zs_surface { |
uint32_t payload[12]; |
uint32_t dw_aligned_8x4; |
struct intel_bo *bo; |
struct intel_bo *hiz_bo; |
struct intel_bo *separate_s8_bo; |
} zs; |
} u; |
}; |
struct ilo_fb_state { |
struct pipe_framebuffer_state state; |
struct ilo_view_surface null_rt; |
struct ilo_zs_surface null_zs; |
struct ilo_fb_blend_caps { |
bool can_logicop; |
bool can_blend; |
bool can_alpha_test; |
bool dst_alpha_forced_one; |
} blend_caps[PIPE_MAX_COLOR_BUFS]; |
unsigned num_samples; |
}; |
struct ilo_shader_cso { |
uint32_t payload[5]; |
}; |
/** |
* Translate a pipe texture target to the matching hardware surface type. |
*/ |
static inline int |
ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) |
{ |
switch (target) { |
case PIPE_BUFFER: |
return GEN6_SURFTYPE_BUFFER; |
case PIPE_TEXTURE_1D: |
case PIPE_TEXTURE_1D_ARRAY: |
return GEN6_SURFTYPE_1D; |
case PIPE_TEXTURE_2D: |
case PIPE_TEXTURE_RECT: |
case PIPE_TEXTURE_2D_ARRAY: |
return GEN6_SURFTYPE_2D; |
case PIPE_TEXTURE_3D: |
return GEN6_SURFTYPE_3D; |
case PIPE_TEXTURE_CUBE: |
case PIPE_TEXTURE_CUBE_ARRAY: |
return GEN6_SURFTYPE_CUBE; |
default: |
assert(!"unknown texture target"); |
return GEN6_SURFTYPE_BUFFER; |
} |
} |
void |
ilo_gpe_init_ve(const struct ilo_dev *dev, |
unsigned num_states, |
const struct pipe_vertex_element *states, |
struct ilo_ve_state *ve); |
void |
ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, |
struct ilo_ve_cso *cso); |
void |
ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, |
int comp0, int comp1, int comp2, int comp3, |
struct ilo_ve_cso *cso); |
void |
ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, |
const struct pipe_viewport_state *state, |
struct ilo_viewport_cso *vp); |
void |
ilo_gpe_set_scissor(const struct ilo_dev *dev, |
unsigned start_slot, |
unsigned num_states, |
const struct pipe_scissor_state *states, |
struct ilo_scissor_state *scissor); |
void |
ilo_gpe_set_scissor_null(const struct ilo_dev *dev, |
struct ilo_scissor_state *scissor); |
void |
ilo_gpe_init_rasterizer(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_state *rasterizer); |
void |
ilo_gpe_init_dsa(const struct ilo_dev *dev, |
const struct pipe_depth_stencil_alpha_state *state, |
struct ilo_dsa_state *dsa); |
void |
ilo_gpe_init_blend(const struct ilo_dev *dev, |
const struct pipe_blend_state *state, |
struct ilo_blend_state *blend); |
void |
ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, |
const struct pipe_sampler_state *state, |
struct ilo_sampler_cso *sampler); |
void |
ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, |
const struct ilo_image *img, |
enum pipe_texture_target target, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_zs_surface(const struct ilo_dev *dev, |
const struct ilo_image *img, |
const struct ilo_image *s8_img, |
enum pipe_texture_target target, |
enum pipe_format format, unsigned level, |
unsigned first_layer, unsigned num_layers, |
struct ilo_zs_surface *zs); |
void |
ilo_gpe_init_vs_cso(const struct ilo_dev *dev, |
const struct ilo_shader_state *vs, |
struct ilo_shader_cso *cso); |
void |
ilo_gpe_init_gs_cso(const struct ilo_dev *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso); |
void |
ilo_gpe_init_fs_cso(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso); |
void |
ilo_gpe_set_fb(const struct ilo_dev *dev, |
const struct pipe_framebuffer_state *state, |
struct ilo_fb_state *fb); |
#endif /* ILO_STATE_3D_H */ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c |
---|
0,0 → 1,2222 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "genhw/genhw.h" |
#include "util/u_dual_blend.h" |
#include "util/u_framebuffer.h" |
#include "util/u_half.h" |
#include "ilo_format.h" |
#include "ilo_image.h" |
#include "ilo_state_3d.h" |
#include "../ilo_shader.h" |
static void |
rasterizer_init_clip(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_clip *clip) |
{ |
uint32_t dw1, dw2, dw3; |
ILO_DEV_ASSERT(dev, 6, 8); |
dw1 = GEN6_CLIP_DW1_STATISTICS; |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 219: |
* |
* "Workaround : Due to Hardware issue "EarlyCull" needs to be |
* enabled only for the cases where the incoming primitive topology |
* into the clipper guaranteed to be Trilist." |
* |
* What does this mean? |
*/ |
dw1 |= 0 << 19 | |
GEN7_CLIP_DW1_EARLY_CULL_ENABLE; |
if (ilo_dev_gen(dev) < ILO_GEN(8)) { |
if (state->front_ccw) |
dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW; |
switch (state->cull_face) { |
case PIPE_FACE_NONE: |
dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE; |
break; |
case PIPE_FACE_FRONT: |
dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT; |
break; |
case PIPE_FACE_BACK: |
dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK; |
break; |
case PIPE_FACE_FRONT_AND_BACK: |
dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH; |
break; |
} |
} |
} |
dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | |
GEN6_CLIP_DW2_XY_TEST_ENABLE | |
state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT | |
GEN6_CLIP_DW2_CLIPMODE_NORMAL; |
if (state->clip_halfz) |
dw2 |= GEN6_CLIP_DW2_APIMODE_D3D; |
else |
dw2 |= GEN6_CLIP_DW2_APIMODE_OGL; |
if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip) |
dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; |
if (state->flatshade_first) { |
dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | |
0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | |
1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; |
} |
else { |
dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | |
1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | |
2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; |
} |
dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | |
0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT; |
clip->payload[0] = dw1; |
clip->payload[1] = dw2; |
clip->payload[2] = dw3; |
clip->can_enable_guardband = true; |
/* |
* There are several reasons that guard band test should be disabled |
* |
* - GL wide points (to avoid partially visibie object) |
* - GL wide or AA lines (to avoid partially visibie object) |
*/ |
if (state->point_size_per_vertex || state->point_size > 1.0f) |
clip->can_enable_guardband = false; |
if (state->line_smooth || state->line_width > 1.0f) |
clip->can_enable_guardband = false; |
} |
static void |
rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_sf *sf) |
{ |
ILO_DEV_ASSERT(dev, 6, 8); |
/* |
* Scale the constant term. The minimum representable value used by the HW |
* is not large enouch to be the minimum resolvable difference. |
*/ |
sf->dw_depth_offset_const = fui(state->offset_units * 2.0f); |
sf->dw_depth_offset_scale = fui(state->offset_scale); |
sf->dw_depth_offset_clamp = fui(state->offset_clamp); |
} |
static void |
rasterizer_init_sf_gen6(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_sf *sf) |
{ |
int line_width, point_width; |
uint32_t dw1, dw2, dw3; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 248: |
* |
* "This bit (Statistics Enable) should be set whenever clipping is |
* enabled and the Statistics Enable bit is set in CLIP_STATE. It |
* should be cleared if clipping is disabled or Statistics Enable in |
* CLIP_STATE is clear." |
*/ |
dw1 = GEN7_SF_DW1_STATISTICS | |
GEN7_SF_DW1_VIEWPORT_ENABLE; |
/* XXX GEN6 path seems to work fine for GEN7 */ |
if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 258: |
* |
* "This bit (Legacy Global Depth Bias Enable, Global Depth Offset |
* Enable Solid , Global Depth Offset Enable Wireframe, and Global |
* Depth Offset Enable Point) should be set whenever non zero depth |
* bias (Slope, Bias) values are used. Setting this bit may have |
* some degradation of performance for some workloads." |
*/ |
if (state->offset_tri || state->offset_line || state->offset_point) { |
/* XXX need to scale offset_const according to the depth format */ |
dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET; |
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID | |
GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME | |
GEN7_SF_DW1_DEPTH_OFFSET_POINT; |
} |
} else { |
if (state->offset_tri) |
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID; |
if (state->offset_line) |
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME; |
if (state->offset_point) |
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT; |
} |
switch (state->fill_front) { |
case PIPE_POLYGON_MODE_FILL: |
dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID; |
break; |
case PIPE_POLYGON_MODE_LINE: |
dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME; |
break; |
case PIPE_POLYGON_MODE_POINT: |
dw1 |= GEN7_SF_DW1_FRONTFACE_POINT; |
break; |
} |
switch (state->fill_back) { |
case PIPE_POLYGON_MODE_FILL: |
dw1 |= GEN7_SF_DW1_BACKFACE_SOLID; |
break; |
case PIPE_POLYGON_MODE_LINE: |
dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME; |
break; |
case PIPE_POLYGON_MODE_POINT: |
dw1 |= GEN7_SF_DW1_BACKFACE_POINT; |
break; |
} |
if (state->front_ccw) |
dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW; |
dw2 = 0; |
if (state->line_smooth) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 251: |
* |
* "This field (Anti-aliasing Enable) must be disabled if any of the |
* render targets have integer (UINT or SINT) surface format." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "This field (Hierarchical Depth Buffer Enable) must be disabled |
* if Anti-aliasing Enable in 3DSTATE_SF is enabled. |
* |
* TODO We do not check those yet. |
*/ |
dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE | |
GEN7_SF_DW2_AA_LINE_CAP_1_0; |
} |
switch (state->cull_face) { |
case PIPE_FACE_NONE: |
dw2 |= GEN7_SF_DW2_CULLMODE_NONE; |
break; |
case PIPE_FACE_FRONT: |
dw2 |= GEN7_SF_DW2_CULLMODE_FRONT; |
break; |
case PIPE_FACE_BACK: |
dw2 |= GEN7_SF_DW2_CULLMODE_BACK; |
break; |
case PIPE_FACE_FRONT_AND_BACK: |
dw2 |= GEN7_SF_DW2_CULLMODE_BOTH; |
break; |
} |
/* |
* Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1) |
* pixels in the minor direction. We have to make the lines slightly |
* thicker, 0.5 pixel on both sides, so that they intersect that many |
* pixels are considered into the lines. |
* |
* Line width is in U3.7. |
*/ |
line_width = (int) |
((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); |
line_width = CLAMP(line_width, 0, 1023); |
/* use GIQ rules */ |
if (line_width == 128 && !state->line_smooth) |
line_width = 0; |
dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; |
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable) |
dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE; |
if (state->scissor) |
dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; |
dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | |
GEN7_SF_DW3_SUBPIXEL_8BITS; |
if (state->line_last_pixel) |
dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; |
if (state->flatshade_first) { |
dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | |
0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | |
1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; |
} else { |
dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | |
1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | |
2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; |
} |
if (!state->point_size_per_vertex) |
dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; |
/* in U8.3 */ |
point_width = (int) (state->point_size * 8.0f + 0.5f); |
point_width = CLAMP(point_width, 1, 2047); |
dw3 |= point_width; |
STATIC_ASSERT(Elements(sf->payload) >= 3); |
sf->payload[0] = dw1; |
sf->payload[1] = dw2; |
sf->payload[2] = dw3; |
if (state->multisample) { |
sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 251: |
* |
* "Software must not program a value of 0.0 when running in |
* MSRASTMODE_ON_xxx modes - zero-width lines are not available |
* when multisampling rasterization is enabled." |
*/ |
if (!line_width) { |
line_width = 128; /* 1.0f */ |
sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; |
} |
} else { |
sf->dw_msaa = 0; |
} |
rasterizer_init_sf_depth_offset_gen6(dev, state, sf); |
/* 3DSTATE_RASTER is Gen8+ only */ |
sf->dw_raster = 0; |
} |
static uint32_t |
rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state) |
{ |
uint32_t dw = 0; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (state->front_ccw) |
dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW; |
switch (state->cull_face) { |
case PIPE_FACE_NONE: |
dw |= GEN8_RASTER_DW1_CULLMODE_NONE; |
break; |
case PIPE_FACE_FRONT: |
dw |= GEN8_RASTER_DW1_CULLMODE_FRONT; |
break; |
case PIPE_FACE_BACK: |
dw |= GEN8_RASTER_DW1_CULLMODE_BACK; |
break; |
case PIPE_FACE_FRONT_AND_BACK: |
dw |= GEN8_RASTER_DW1_CULLMODE_BOTH; |
break; |
} |
if (state->point_smooth) |
dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE; |
if (state->multisample) |
dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE; |
if (state->offset_tri) |
dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID; |
if (state->offset_line) |
dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME; |
if (state->offset_point) |
dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT; |
switch (state->fill_front) { |
case PIPE_POLYGON_MODE_FILL: |
dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID; |
break; |
case PIPE_POLYGON_MODE_LINE: |
dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME; |
break; |
case PIPE_POLYGON_MODE_POINT: |
dw |= GEN8_RASTER_DW1_FRONTFACE_POINT; |
break; |
} |
switch (state->fill_back) { |
case PIPE_POLYGON_MODE_FILL: |
dw |= GEN8_RASTER_DW1_BACKFACE_SOLID; |
break; |
case PIPE_POLYGON_MODE_LINE: |
dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME; |
break; |
case PIPE_POLYGON_MODE_POINT: |
dw |= GEN8_RASTER_DW1_BACKFACE_POINT; |
break; |
} |
if (state->line_smooth) |
dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE; |
if (state->scissor) |
dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE; |
if (state->depth_clip) |
dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE; |
return dw; |
} |
static void |
rasterizer_init_sf_gen8(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_sf *sf) |
{ |
int line_width, point_width; |
uint32_t dw1, dw2, dw3; |
ILO_DEV_ASSERT(dev, 8, 8); |
/* in U3.7 */ |
line_width = (int) |
((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f); |
line_width = CLAMP(line_width, 0, 1023); |
/* use GIQ rules */ |
if (line_width == 128 && !state->line_smooth) |
line_width = 0; |
/* in U8.3 */ |
point_width = (int) (state->point_size * 8.0f + 0.5f); |
point_width = CLAMP(point_width, 1, 2047); |
dw1 = GEN7_SF_DW1_STATISTICS | |
GEN7_SF_DW1_VIEWPORT_ENABLE; |
dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT; |
if (state->line_smooth) |
dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0; |
dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE | |
GEN7_SF_DW3_SUBPIXEL_8BITS | |
point_width; |
if (state->line_last_pixel) |
dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE; |
if (state->flatshade_first) { |
dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | |
0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | |
1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; |
} else { |
dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | |
1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | |
2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT; |
} |
if (!state->point_size_per_vertex) |
dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH; |
dw3 |= point_width; |
STATIC_ASSERT(Elements(sf->payload) >= 3); |
sf->payload[0] = dw1; |
sf->payload[1] = dw2; |
sf->payload[2] = dw3; |
rasterizer_init_sf_depth_offset_gen6(dev, state, sf); |
sf->dw_msaa = 0; |
sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state); |
} |
static void |
rasterizer_init_wm_gen6(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_wm *wm) |
{ |
uint32_t dw5, dw6; |
ILO_DEV_ASSERT(dev, 6, 6); |
/* only the FF unit states are set, as in GEN7 */ |
dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0; |
/* same value as in 3DSTATE_SF */ |
if (state->line_smooth) |
dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0; |
if (state->poly_stipple_enable) |
dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE; |
if (state->line_stipple_enable) |
dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE; |
/* |
* assertion that makes sure |
* |
* dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp; |
* |
* is valid |
*/ |
STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 && |
GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0); |
dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL; |
if (state->bottom_edge_rule) |
dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; |
wm->dw_msaa_rast = |
(state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0; |
wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL; |
STATIC_ASSERT(Elements(wm->payload) >= 2); |
wm->payload[0] = dw5; |
wm->payload[1] = dw6; |
} |
static void |
rasterizer_init_wm_gen7(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_wm *wm) |
{ |
uint32_t dw1, dw2; |
ILO_DEV_ASSERT(dev, 7, 7.5); |
/* |
* assertion that makes sure |
* |
* dw1 |= wm->dw_msaa_rast; |
* dw2 |= wm->dw_msaa_disp; |
* |
* is valid |
*/ |
STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 && |
GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0); |
dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL | |
GEN7_WM_DW1_AA_LINE_WIDTH_2_0; |
dw2 = 0; |
/* same value as in 3DSTATE_SF */ |
if (state->line_smooth) |
dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0; |
if (state->poly_stipple_enable) |
dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; |
if (state->line_stipple_enable) |
dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; |
if (state->bottom_edge_rule) |
dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; |
wm->dw_msaa_rast = |
(state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0; |
wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL; |
STATIC_ASSERT(Elements(wm->payload) >= 2); |
wm->payload[0] = dw1; |
wm->payload[1] = dw2; |
} |
static uint32_t |
rasterizer_get_wm_gen8(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
dw = GEN7_WM_DW1_ZW_INTERP_PIXEL | |
GEN7_WM_DW1_AA_LINE_WIDTH_2_0; |
/* same value as in 3DSTATE_SF */ |
if (state->line_smooth) |
dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0; |
if (state->poly_stipple_enable) |
dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE; |
if (state->line_stipple_enable) |
dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE; |
if (state->bottom_edge_rule) |
dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; |
return dw; |
} |
void |
ilo_gpe_init_rasterizer(const struct ilo_dev *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_state *rasterizer) |
{ |
rasterizer_init_clip(dev, state, &rasterizer->clip); |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
memset(&rasterizer->wm, 0, sizeof(rasterizer->wm)); |
rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state); |
rasterizer_init_sf_gen8(dev, state, &rasterizer->sf); |
} else if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
rasterizer_init_wm_gen7(dev, state, &rasterizer->wm); |
rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); |
} else { |
rasterizer_init_wm_gen6(dev, state, &rasterizer->wm); |
rasterizer_init_sf_gen6(dev, state, &rasterizer->sf); |
} |
} |
static void |
fs_init_cso_gen6(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, input_count, sampler_count, interps, max_threads; |
uint32_t dw2, dw4, dw5, dw6; |
ILO_DEV_ASSERT(dev, 6, 6); |
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); |
input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); |
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); |
interps = ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); |
/* see brwCreateContext() */ |
max_threads = (dev->gt == 2) ? 80 : 40; |
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; |
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; |
dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | |
0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | |
0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; |
dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 275: |
* |
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the |
* PS kernel or color calculator has the ability to kill (discard) |
* pixels or samples, other than due to depth or stencil testing. |
* This bit is required to be ENABLED in the following situations: |
* |
* The API pixel shader program contains "killpix" or "discard" |
* instructions, or other code in the pixel shader kernel that can |
* cause the final pixel mask to differ from the pixel mask received |
* on dispatch. |
* |
* A sampler with chroma key enabled with kill pixel mode is used by |
* the pixel shader. |
* |
* Any render target has Alpha Test Enable or AlphaToCoverage Enable |
* enabled. |
* |
* The pixel shader kernel generates and outputs oMask. |
* |
* Note: As ClipDistance clipping is fully supported in hardware and |
* therefore not via PS instructions, there should be no need to |
* ENABLE this bit due to ClipDistance clipping." |
*/ |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) |
dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 275: |
* |
* "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth |
* field must be set to disabled." |
* |
* TODO This is not checked yet. |
*/ |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) |
dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) |
dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) |
dw5 |= GEN6_WM_DW5_PS_USE_W; |
/* |
* TODO set this bit only when |
* |
* a) fs writes colors and color is not masked, or |
* b) fs writes depth, or |
* c) fs or cc kills |
*/ |
if (true) |
dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; |
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); |
dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; |
dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | |
GEN6_WM_DW6_PS_POSOFFSET_NONE | |
interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 4); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
cso->payload[3] = dw6; |
} |
static uint32_t |
fs_get_wm_gen7(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 7, 7.5); |
dw = ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << |
GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; |
/* |
* TODO set this bit only when |
* |
* a) fs writes colors and color is not masked, or |
* b) fs writes depth, or |
* c) fs or cc kills |
*/ |
dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 278: |
* |
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that |
* the PS kernel or color calculator has the ability to kill |
* (discard) pixels or samples, other than due to depth or stencil |
* testing. This bit is required to be ENABLED in the following |
* situations: |
* |
* - The API pixel shader program contains "killpix" or "discard" |
* instructions, or other code in the pixel shader kernel that |
* can cause the final pixel mask to differ from the pixel mask |
* received on dispatch. |
* |
* - A sampler with chroma key enabled with kill pixel mode is used |
* by the pixel shader. |
* |
* - Any render target has Alpha Test Enable or AlphaToCoverage |
* Enable enabled. |
* |
* - The pixel shader kernel generates and outputs oMask. |
* |
* Note: As ClipDistance clipping is fully supported in hardware |
* and therefore not via PS instructions, there should be no need |
* to ENABLE this bit due to ClipDistance clipping." |
*/ |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) |
dw |= GEN7_WM_DW1_PS_KILL_PIXEL; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) |
dw |= GEN7_WM_DW1_PSCDEPTH_ON; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) |
dw |= GEN7_WM_DW1_PS_USE_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) |
dw |= GEN7_WM_DW1_PS_USE_W; |
return dw; |
} |
static void |
fs_init_cso_gen7(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, sampler_count, max_threads; |
uint32_t dw2, dw4, dw5; |
ILO_DEV_ASSERT(dev, 7, 7.5); |
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); |
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); |
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; |
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; |
dw4 = GEN7_PS_DW4_POSOFFSET_NONE; |
/* see brwCreateContext() */ |
switch (ilo_dev_gen(dev)) { |
case ILO_GEN(7.5): |
max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; |
dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; |
dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; |
break; |
case ILO_GEN(7): |
default: |
max_threads = (dev->gt == 2) ? 172 : 48; |
dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; |
break; |
} |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) |
dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) |
dw4 |= GEN7_PS_DW4_ATTR_ENABLE; |
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); |
dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; |
dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT | |
0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | |
0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 4); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
cso->payload[3] = fs_get_wm_gen7(dev, fs); |
} |
static uint32_t |
fs_get_psx_gen8(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
dw = GEN8_PSX_DW1_DISPATCH_ENABLE; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) |
dw |= GEN8_PSX_DW1_KILL_PIXEL; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) |
dw |= GEN8_PSX_DW1_PSCDEPTH_ON; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) |
dw |= GEN8_PSX_DW1_USE_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) |
dw |= GEN8_PSX_DW1_USE_W; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) |
dw |= GEN8_PSX_DW1_ATTR_ENABLE; |
return dw; |
} |
static uint32_t |
fs_get_wm_gen8(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs) |
{ |
ILO_DEV_ASSERT(dev, 8, 8); |
return ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) << |
GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT; |
} |
static void |
fs_init_cso_gen8(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, sampler_count; |
uint32_t dw3, dw6, dw7; |
ILO_DEV_ASSERT(dev, 8, 8); |
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); |
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); |
dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; |
dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; |
/* always 64? */ |
dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT | |
GEN8_PS_DW6_POSOFFSET_NONE; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) |
dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; |
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); |
dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; |
dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT | |
0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | |
0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 5); |
cso->payload[0] = dw3; |
cso->payload[1] = dw6; |
cso->payload[2] = dw7; |
cso->payload[3] = fs_get_psx_gen8(dev, fs); |
cso->payload[4] = fs_get_wm_gen8(dev, fs); |
} |
void |
ilo_gpe_init_fs_cso(const struct ilo_dev *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) |
fs_init_cso_gen8(dev, fs, cso); |
else if (ilo_dev_gen(dev) >= ILO_GEN(7)) |
fs_init_cso_gen7(dev, fs, cso); |
else |
fs_init_cso_gen6(dev, fs, cso); |
} |
struct ilo_zs_surface_info { |
int surface_type; |
int format; |
struct { |
struct intel_bo *bo; |
unsigned stride; |
unsigned qpitch; |
enum gen_surface_tiling tiling; |
uint32_t offset; |
} zs, stencil, hiz; |
unsigned width, height, depth; |
unsigned lod, first_layer, num_layers; |
}; |
static void |
zs_init_info_null(const struct ilo_dev *dev, |
struct ilo_zs_surface_info *info) |
{ |
ILO_DEV_ASSERT(dev, 6, 8); |
memset(info, 0, sizeof(*info)); |
info->surface_type = GEN6_SURFTYPE_NULL; |
info->format = GEN6_ZFORMAT_D32_FLOAT; |
info->width = 1; |
info->height = 1; |
info->depth = 1; |
info->num_layers = 1; |
} |
static void |
zs_init_info(const struct ilo_dev *dev, |
const struct ilo_image *img, |
const struct ilo_image *s8_img, |
enum pipe_texture_target target, |
enum pipe_format format, unsigned level, |
unsigned first_layer, unsigned num_layers, |
struct ilo_zs_surface_info *info) |
{ |
bool separate_stencil; |
ILO_DEV_ASSERT(dev, 6, 8); |
memset(info, 0, sizeof(*info)); |
info->surface_type = ilo_gpe_gen6_translate_texture(target); |
if (info->surface_type == GEN6_SURFTYPE_CUBE) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 325-326: |
* |
* "For Other Surfaces (Cube Surfaces): |
* This field (Minimum Array Element) is ignored." |
* |
* "For Other Surfaces (Cube Surfaces): |
* This field (Render Target View Extent) is ignored." |
* |
* As such, we cannot set first_layer and num_layers on cube surfaces. |
* To work around that, treat it as a 2D surface. |
*/ |
info->surface_type = GEN6_SURFTYPE_2D; |
} |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
separate_stencil = true; |
} else { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "This field (Separate Stencil Buffer Enable) must be set to the |
* same value (enabled or disabled) as Hierarchical Depth Buffer |
* Enable." |
*/ |
separate_stencil = ilo_image_can_enable_aux(img, level); |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "If this field (Hierarchical Depth Buffer Enable) is enabled, the |
* Surface Format of the depth buffer cannot be |
* D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil |
* requires the separate stencil buffer." |
* |
* From the Ironlake PRM, volume 2 part 1, page 330: |
* |
* "If this field (Separate Stencil Buffer Enable) is disabled, the |
* Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." |
* |
* There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT |
* is indeed used, the depth values output by the fragment shaders will |
* be different when read back. |
* |
* As for GEN7+, separate_stencil is always true. |
*/ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
info->format = GEN6_ZFORMAT_D16_UNORM; |
break; |
case PIPE_FORMAT_Z32_FLOAT: |
info->format = GEN6_ZFORMAT_D32_FLOAT; |
break; |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
info->format = (separate_stencil) ? |
GEN6_ZFORMAT_D24_UNORM_X8_UINT : |
GEN6_ZFORMAT_D24_UNORM_S8_UINT; |
break; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
info->format = (separate_stencil) ? |
GEN6_ZFORMAT_D32_FLOAT : |
GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT; |
break; |
case PIPE_FORMAT_S8_UINT: |
if (separate_stencil) { |
info->format = GEN6_ZFORMAT_D32_FLOAT; |
break; |
} |
/* fall through */ |
default: |
assert(!"unsupported depth/stencil format"); |
zs_init_info_null(dev, info); |
return; |
break; |
} |
if (format != PIPE_FORMAT_S8_UINT) { |
info->zs.bo = img->bo; |
info->zs.stride = img->bo_stride; |
assert(img->walk_layer_height % 4 == 0); |
info->zs.qpitch = img->walk_layer_height / 4; |
info->zs.tiling = img->tiling; |
info->zs.offset = 0; |
} |
if (s8_img || format == PIPE_FORMAT_S8_UINT) { |
info->stencil.bo = s8_img->bo; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 329: |
* |
* "The pitch must be set to 2x the value computed based on width, |
* as the stencil buffer is stored with two rows interleaved." |
* |
* For GEN7, we still dobule the stride because we did not double the |
* slice widths when initializing the layout. |
*/ |
info->stencil.stride = s8_img->bo_stride * 2; |
assert(s8_img->walk_layer_height % 4 == 0); |
info->stencil.qpitch = s8_img->walk_layer_height / 4; |
info->stencil.tiling = s8_img->tiling; |
if (ilo_dev_gen(dev) == ILO_GEN(6)) { |
unsigned x, y; |
assert(s8_img->walk == ILO_IMAGE_WALK_LOD); |
/* offset to the level */ |
ilo_image_get_slice_pos(s8_img, level, 0, &x, &y); |
ilo_image_pos_to_mem(s8_img, x, y, &x, &y); |
info->stencil.offset = ilo_image_mem_to_raw(s8_img, x, y); |
} |
} |
if (ilo_image_can_enable_aux(img, level)) { |
info->hiz.bo = img->aux.bo; |
info->hiz.stride = img->aux.bo_stride; |
assert(img->aux.walk_layer_height % 4 == 0); |
info->hiz.qpitch = img->aux.walk_layer_height / 4; |
info->hiz.tiling = GEN6_TILING_Y; |
/* offset to the level */ |
if (ilo_dev_gen(dev) == ILO_GEN(6)) |
info->hiz.offset = img->aux.walk_lod_offsets[level]; |
} |
info->width = img->width0; |
info->height = img->height0; |
info->depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; |
info->lod = level; |
info->first_layer = first_layer; |
info->num_layers = num_layers; |
} |
void |
ilo_gpe_init_zs_surface(const struct ilo_dev *dev, |
const struct ilo_image *img, |
const struct ilo_image *s8_img, |
enum pipe_texture_target target, |
enum pipe_format format, unsigned level, |
unsigned first_layer, unsigned num_layers, |
struct ilo_zs_surface *zs) |
{ |
const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192; |
const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512; |
struct ilo_zs_surface_info info; |
uint32_t dw1, dw2, dw3, dw4, dw5, dw6; |
int align_w = 8, align_h = 4; |
ILO_DEV_ASSERT(dev, 6, 8); |
if (img) { |
zs_init_info(dev, img, s8_img, target, format, |
level, first_layer, num_layers, &info); |
switch (img->sample_count) { |
case 2: |
align_w /= 2; |
break; |
case 4: |
align_w /= 2; |
align_h /= 2; |
break; |
case 8: |
align_w /= 4; |
align_h /= 2; |
break; |
case 16: |
align_w /= 4; |
align_h /= 4; |
break; |
default: |
break; |
} |
} else { |
zs_init_info_null(dev, &info); |
} |
switch (info.surface_type) { |
case GEN6_SURFTYPE_NULL: |
break; |
case GEN6_SURFTYPE_1D: |
assert(info.width <= max_2d_size && info.height == 1 && |
info.depth <= max_array_size); |
assert(info.first_layer < max_array_size - 1 && |
info.num_layers <= max_array_size); |
break; |
case GEN6_SURFTYPE_2D: |
assert(info.width <= max_2d_size && info.height <= max_2d_size && |
info.depth <= max_array_size); |
assert(info.first_layer < max_array_size - 1 && |
info.num_layers <= max_array_size); |
break; |
case GEN6_SURFTYPE_3D: |
assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048); |
assert(info.first_layer < 2048 && info.num_layers <= max_array_size); |
break; |
case GEN6_SURFTYPE_CUBE: |
assert(info.width <= max_2d_size && info.height <= max_2d_size && |
info.depth == 1); |
assert(info.first_layer == 0 && info.num_layers == 1); |
assert(info.width == info.height); |
break; |
default: |
assert(!"unexpected depth surface type"); |
break; |
} |
dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT | |
info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT; |
if (info.zs.bo) { |
/* required for GEN6+ */ |
assert(info.zs.tiling == GEN6_TILING_Y); |
assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 && |
info.zs.stride % 128 == 0); |
assert(info.width <= info.zs.stride); |
dw1 |= (info.zs.stride - 1); |
dw2 = info.zs.offset; |
} else { |
dw2 = 0; |
} |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
if (info.zs.bo) |
dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE; |
if (info.stencil.bo) |
dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE; |
if (info.hiz.bo) |
dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE; |
dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | |
(info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | |
info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; |
zs->dw_aligned_8x4 = |
(align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT | |
(align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT | |
info.lod << GEN7_DEPTH_DW3_LOD__SHIFT; |
dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT | |
info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT; |
dw5 = 0; |
dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT; |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) |
dw6 |= info.zs.qpitch; |
} else { |
/* always Y-tiled */ |
dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT; |
if (info.hiz.bo) { |
dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE | |
GEN6_DEPTH_DW1_SEPARATE_STENCIL; |
} |
dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | |
(info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | |
info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | |
GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; |
zs->dw_aligned_8x4 = |
(align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT | |
(align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT | |
info.lod << GEN6_DEPTH_DW3_LOD__SHIFT | |
GEN6_DEPTH_DW3_MIPLAYOUT_BELOW; |
dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT | |
info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT | |
(info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT; |
dw5 = 0; |
dw6 = 0; |
} |
STATIC_ASSERT(Elements(zs->payload) >= 12); |
zs->payload[0] = dw1; |
zs->payload[1] = dw2; |
zs->payload[2] = dw3; |
zs->payload[3] = dw4; |
zs->payload[4] = dw5; |
zs->payload[5] = dw6; |
/* do not increment reference count */ |
zs->bo = info.zs.bo; |
/* separate stencil */ |
if (info.stencil.bo) { |
assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && |
info.stencil.stride % 128 == 0); |
dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT; |
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) |
dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE; |
dw2 = info.stencil.offset; |
dw4 = info.stencil.qpitch; |
} else { |
dw1 = 0; |
dw2 = 0; |
dw4 = 0; |
} |
zs->payload[6] = dw1; |
zs->payload[7] = dw2; |
zs->payload[8] = dw4; |
/* do not increment reference count */ |
zs->separate_s8_bo = info.stencil.bo; |
/* hiz */ |
if (info.hiz.bo) { |
dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT; |
dw2 = info.hiz.offset; |
dw4 = info.hiz.qpitch; |
} else { |
dw1 = 0; |
dw2 = 0; |
dw4 = 0; |
} |
zs->payload[9] = dw1; |
zs->payload[10] = dw2; |
zs->payload[11] = dw4; |
/* do not increment reference count */ |
zs->hiz_bo = info.hiz.bo; |
} |
static void |
viewport_get_guardband(const struct ilo_dev *dev, |
int center_x, int center_y, |
int *min_gbx, int *max_gbx, |
int *min_gby, int *max_gby) |
{ |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 234: |
* |
* "Per-Device Guardband Extents |
* |
* - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] |
* - Maximum Post-Clamp Delta (X or Y): 16K" |
* |
* "In addition, in order to be correctly rendered, objects must have a |
* screenspace bounding box not exceeding 8K in the X or Y direction. |
* This additional restriction must also be comprehended by software, |
* i.e., enforced by use of clipping." |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 248: |
* |
* "Per-Device Guardband Extents |
* |
* - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] |
* - Maximum Post-Clamp Delta (X or Y): N/A" |
* |
* "In addition, in order to be correctly rendered, objects must have a |
* screenspace bounding box not exceeding 8K in the X or Y direction. |
* This additional restriction must also be comprehended by software, |
* i.e., enforced by use of clipping." |
* |
* Combined, the bounding box of any object can not exceed 8K in both |
* width and height. |
* |
* Below we set the guardband as a squre of length 8K, centered at where |
* the viewport is. This makes sure all objects passing the GB test are |
* valid to the renderer, and those failing the XY clipping have a |
* better chance of passing the GB test. |
*/ |
const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384; |
const int half_len = 8192 / 2; |
/* make sure the guardband is within the valid range */ |
if (center_x - half_len < -max_extent) |
center_x = -max_extent + half_len; |
else if (center_x + half_len > max_extent - 1) |
center_x = max_extent - half_len; |
if (center_y - half_len < -max_extent) |
center_y = -max_extent + half_len; |
else if (center_y + half_len > max_extent - 1) |
center_y = max_extent - half_len; |
*min_gbx = (float) (center_x - half_len); |
*max_gbx = (float) (center_x + half_len); |
*min_gby = (float) (center_y - half_len); |
*max_gby = (float) (center_y + half_len); |
} |
void |
ilo_gpe_set_viewport_cso(const struct ilo_dev *dev, |
const struct pipe_viewport_state *state, |
struct ilo_viewport_cso *vp) |
{ |
const float scale_x = fabs(state->scale[0]); |
const float scale_y = fabs(state->scale[1]); |
const float scale_z = fabs(state->scale[2]); |
int min_gbx, max_gbx, min_gby, max_gby; |
ILO_DEV_ASSERT(dev, 6, 8); |
viewport_get_guardband(dev, |
(int) state->translate[0], |
(int) state->translate[1], |
&min_gbx, &max_gbx, &min_gby, &max_gby); |
/* matrix form */ |
vp->m00 = state->scale[0]; |
vp->m11 = state->scale[1]; |
vp->m22 = state->scale[2]; |
vp->m30 = state->translate[0]; |
vp->m31 = state->translate[1]; |
vp->m32 = state->translate[2]; |
/* guardband in NDC space */ |
vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x; |
vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x; |
vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y; |
vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y; |
/* viewport in screen space */ |
vp->min_x = scale_x * -1.0f + state->translate[0]; |
vp->max_x = scale_x * 1.0f + state->translate[0]; |
vp->min_y = scale_y * -1.0f + state->translate[1]; |
vp->max_y = scale_y * 1.0f + state->translate[1]; |
vp->min_z = scale_z * -1.0f + state->translate[2]; |
vp->max_z = scale_z * 1.0f + state->translate[2]; |
} |
/** |
* Translate a pipe logicop to the matching hardware logicop. |
*/ |
static int |
gen6_translate_pipe_logicop(unsigned logicop) |
{ |
switch (logicop) { |
case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR; |
case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR; |
case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED; |
case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED; |
case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE; |
case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT; |
case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR; |
case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND; |
case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND; |
case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV; |
case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP; |
case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED; |
case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY; |
case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE; |
case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR; |
case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET; |
default: |
assert(!"unknown logicop function"); |
return GEN6_LOGICOP_CLEAR; |
} |
} |
/** |
* Translate a pipe blend function to the matching hardware blend function. |
*/ |
static int |
gen6_translate_pipe_blend(unsigned blend) |
{ |
switch (blend) { |
case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD; |
case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT; |
case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT; |
case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN; |
case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX; |
default: |
assert(!"unknown blend function"); |
return GEN6_BLENDFUNCTION_ADD; |
}; |
} |
/** |
* Translate a pipe blend factor to the matching hardware blend factor. |
*/ |
static int |
gen6_translate_pipe_blendfactor(unsigned blendfactor) |
{ |
switch (blendfactor) { |
case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE; |
case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR; |
case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA; |
case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA; |
case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR; |
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE; |
case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR; |
case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA; |
case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR; |
case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA; |
case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO; |
case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR; |
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA; |
case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA; |
case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR; |
case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR; |
case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA; |
case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR; |
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA; |
default: |
assert(!"unknown blend factor"); |
return GEN6_BLENDFACTOR_ONE; |
}; |
} |
/** |
* Translate a pipe stencil op to the matching hardware stencil op. |
*/ |
static int |
gen6_translate_pipe_stencil_op(unsigned stencil_op) |
{ |
switch (stencil_op) { |
case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP; |
case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO; |
case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE; |
case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT; |
case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT; |
case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR; |
case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR; |
case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT; |
default: |
assert(!"unknown stencil op"); |
return GEN6_STENCILOP_KEEP; |
} |
} |
static int |
gen6_blend_factor_dst_alpha_forced_one(int factor) |
{ |
switch (factor) { |
case GEN6_BLENDFACTOR_DST_ALPHA: |
return GEN6_BLENDFACTOR_ONE; |
case GEN6_BLENDFACTOR_INV_DST_ALPHA: |
case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE: |
return GEN6_BLENDFACTOR_ZERO; |
default: |
return factor; |
} |
} |
static uint32_t |
blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev, |
const struct pipe_rt_blend_state *rt, |
bool dst_alpha_forced_one) |
{ |
int rgb_src, rgb_dst, a_src, a_dst; |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
if (!rt->blend_enable) |
return 0; |
rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); |
rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); |
a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); |
a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); |
if (dst_alpha_forced_one) { |
rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); |
rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); |
a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); |
a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); |
} |
dw = GEN6_RT_DW0_BLEND_ENABLE | |
gen6_translate_pipe_blend(rt->alpha_func) << 26 | |
a_src << 20 | |
a_dst << 15 | |
gen6_translate_pipe_blend(rt->rgb_func) << 11 | |
rgb_src << 5 | |
rgb_dst; |
if (rt->rgb_func != rt->alpha_func || |
rgb_src != a_src || rgb_dst != a_dst) |
dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE; |
return dw; |
} |
static uint32_t |
blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev, |
const struct pipe_rt_blend_state *rt, |
bool dst_alpha_forced_one, |
bool *independent_alpha) |
{ |
int rgb_src, rgb_dst, a_src, a_dst; |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (!rt->blend_enable) { |
*independent_alpha = false; |
return 0; |
} |
rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); |
rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); |
a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); |
a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); |
if (dst_alpha_forced_one) { |
rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); |
rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); |
a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); |
a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); |
} |
dw = GEN8_RT_DW0_BLEND_ENABLE | |
rgb_src << 26 | |
rgb_dst << 21 | |
gen6_translate_pipe_blend(rt->rgb_func) << 18 | |
a_src << 13 | |
a_dst << 8 | |
gen6_translate_pipe_blend(rt->alpha_func) << 5; |
*independent_alpha = (rt->rgb_func != rt->alpha_func || |
rgb_src != a_src || |
rgb_dst != a_dst); |
return dw; |
} |
static void |
blend_init_cso_gen6(const struct ilo_dev *dev, |
const struct pipe_blend_state *state, |
struct ilo_blend_state *blend, |
unsigned index) |
{ |
const struct pipe_rt_blend_state *rt = &state->rt[index]; |
struct ilo_blend_cso *cso = &blend->cso[index]; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
cso->payload[0] = 0; |
cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT | |
GEN6_RT_DW1_PRE_BLEND_CLAMP | |
GEN6_RT_DW1_POST_BLEND_CLAMP; |
if (!(rt->colormask & PIPE_MASK_A)) |
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A; |
if (!(rt->colormask & PIPE_MASK_R)) |
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R; |
if (!(rt->colormask & PIPE_MASK_G)) |
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G; |
if (!(rt->colormask & PIPE_MASK_B)) |
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 365: |
* |
* "Color Buffer Blending and Logic Ops must not be enabled |
* simultaneously, or behavior is UNDEFINED." |
* |
* Since state->logicop_enable takes precedence over rt->blend_enable, |
* no special care is needed. |
*/ |
if (state->logicop_enable) { |
cso->dw_blend = 0; |
cso->dw_blend_dst_alpha_forced_one = 0; |
} else { |
cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false); |
cso->dw_blend_dst_alpha_forced_one = |
blend_get_rt_blend_enable_gen6(dev, rt, true); |
} |
} |
static bool |
blend_init_cso_gen8(const struct ilo_dev *dev, |
const struct pipe_blend_state *state, |
struct ilo_blend_state *blend, |
unsigned index) |
{ |
const struct pipe_rt_blend_state *rt = &state->rt[index]; |
struct ilo_blend_cso *cso = &blend->cso[index]; |
bool independent_alpha = false; |
ILO_DEV_ASSERT(dev, 8, 8); |
cso->payload[0] = 0; |
cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT | |
GEN8_RT_DW1_PRE_BLEND_CLAMP | |
GEN8_RT_DW1_POST_BLEND_CLAMP; |
if (!(rt->colormask & PIPE_MASK_A)) |
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A; |
if (!(rt->colormask & PIPE_MASK_R)) |
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R; |
if (!(rt->colormask & PIPE_MASK_G)) |
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G; |
if (!(rt->colormask & PIPE_MASK_B)) |
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B; |
if (state->logicop_enable) { |
cso->dw_blend = 0; |
cso->dw_blend_dst_alpha_forced_one = 0; |
} else { |
bool tmp[2]; |
cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]); |
cso->dw_blend_dst_alpha_forced_one = |
blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]); |
if (tmp[0] || tmp[1]) |
independent_alpha = true; |
} |
return independent_alpha; |
} |
static uint32_t |
blend_get_logicop_enable_gen6(const struct ilo_dev *dev, |
const struct pipe_blend_state *state) |
{ |
ILO_DEV_ASSERT(dev, 6, 7.5); |
if (!state->logicop_enable) |
return 0; |
return GEN6_RT_DW1_LOGICOP_ENABLE | |
gen6_translate_pipe_logicop(state->logicop_func) << 18; |
} |
static uint32_t |
blend_get_logicop_enable_gen8(const struct ilo_dev *dev, |
const struct pipe_blend_state *state) |
{ |
ILO_DEV_ASSERT(dev, 8, 8); |
if (!state->logicop_enable) |
return 0; |
return GEN8_RT_DW1_LOGICOP_ENABLE | |
gen6_translate_pipe_logicop(state->logicop_func) << 27; |
} |
static uint32_t |
blend_get_alpha_mod_gen6(const struct ilo_dev *dev, |
const struct pipe_blend_state *state, |
bool dual_blend) |
{ |
uint32_t dw = 0; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
if (state->alpha_to_coverage) { |
dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE; |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) |
dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER; |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 378: |
* |
* "If Dual Source Blending is enabled, this bit (AlphaToOne Enable) |
* must be disabled." |
*/ |
if (state->alpha_to_one && !dual_blend) |
dw |= GEN6_RT_DW1_ALPHA_TO_ONE; |
return dw; |
} |
static uint32_t |
blend_get_alpha_mod_gen8(const struct ilo_dev *dev, |
const struct pipe_blend_state *state, |
bool dual_blend) |
{ |
uint32_t dw = 0; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (state->alpha_to_coverage) { |
dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE | |
GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER; |
} |
if (state->alpha_to_one && !dual_blend) |
dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE; |
return dw; |
} |
static uint32_t |
blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0) |
{ |
int rgb_src, rgb_dst, a_src, a_dst; |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE)) |
return 0; |
a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR); |
a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR); |
rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR); |
rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR); |
dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE; |
dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR); |
dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR); |
dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR); |
dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR); |
if (a_src != rgb_src || a_dst != rgb_dst) |
dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE; |
return dw; |
} |
void |
ilo_gpe_init_blend(const struct ilo_dev *dev, |
const struct pipe_blend_state *state, |
struct ilo_blend_state *blend) |
{ |
unsigned i; |
ILO_DEV_ASSERT(dev, 6, 8); |
blend->dual_blend = (util_blend_state_is_dual(state, 0) && |
state->rt[0].blend_enable && |
!state->logicop_enable); |
blend->alpha_to_coverage = state->alpha_to_coverage; |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
bool independent_alpha; |
blend->dw_alpha_mod = |
blend_get_alpha_mod_gen8(dev, state, blend->dual_blend); |
blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state); |
blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0; |
independent_alpha = blend_init_cso_gen8(dev, state, blend, 0); |
if (independent_alpha) |
blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; |
blend->dw_ps_blend = blend_get_ps_blend_gen8(dev, |
blend->cso[0].dw_blend); |
blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev, |
blend->cso[0].dw_blend_dst_alpha_forced_one); |
if (state->independent_blend_enable) { |
for (i = 1; i < Elements(blend->cso); i++) { |
independent_alpha = blend_init_cso_gen8(dev, state, blend, i); |
if (independent_alpha) |
blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE; |
} |
} else { |
for (i = 1; i < Elements(blend->cso); i++) |
blend->cso[i] = blend->cso[0]; |
} |
} else { |
blend->dw_alpha_mod = |
blend_get_alpha_mod_gen6(dev, state, blend->dual_blend); |
blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state); |
blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0; |
blend->dw_ps_blend = 0; |
blend->dw_ps_blend_dst_alpha_forced_one = 0; |
blend_init_cso_gen6(dev, state, blend, 0); |
if (state->independent_blend_enable) { |
for (i = 1; i < Elements(blend->cso); i++) |
blend_init_cso_gen6(dev, state, blend, i); |
} else { |
for (i = 1; i < Elements(blend->cso); i++) |
blend->cso[i] = blend->cso[0]; |
} |
} |
} |
/** |
* Translate a pipe DSA test function to the matching hardware compare |
* function. |
*/ |
static int |
gen6_translate_dsa_func(unsigned func) |
{ |
switch (func) { |
case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER; |
case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS; |
case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; |
case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; |
case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER; |
case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; |
case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; |
case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; |
default: |
assert(!"unknown depth/stencil/alpha test function"); |
return GEN6_COMPAREFUNCTION_NEVER; |
} |
} |
static uint32_t |
dsa_get_stencil_enable_gen6(const struct ilo_dev *dev, |
const struct pipe_stencil_state *stencil0, |
const struct pipe_stencil_state *stencil1) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
if (!stencil0->enabled) |
return 0; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 359: |
* |
* "If the Depth Buffer is either undefined or does not have a surface |
* format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate |
* stencil buffer is disabled, Stencil Test Enable must be DISABLED" |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 370: |
* |
* "This field (Stencil Test Enable) cannot be enabled if |
* Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM." |
* |
* TODO We do not check these yet. |
*/ |
dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE | |
gen6_translate_dsa_func(stencil0->func) << 28 | |
gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 | |
gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 | |
gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19; |
if (stencil0->writemask) |
dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; |
if (stencil1->enabled) { |
dw |= GEN6_ZS_DW0_STENCIL1_ENABLE | |
gen6_translate_dsa_func(stencil1->func) << 12 | |
gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 | |
gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 | |
gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3; |
if (stencil1->writemask) |
dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE; |
} |
return dw; |
} |
static uint32_t |
dsa_get_stencil_enable_gen8(const struct ilo_dev *dev, |
const struct pipe_stencil_state *stencil0, |
const struct pipe_stencil_state *stencil1) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (!stencil0->enabled) |
return 0; |
dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 | |
gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 | |
gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 | |
gen6_translate_dsa_func(stencil0->func) << 8 | |
GEN8_ZS_DW1_STENCIL_TEST_ENABLE; |
if (stencil0->writemask) |
dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; |
if (stencil1->enabled) { |
dw |= gen6_translate_dsa_func(stencil1->func) << 20 | |
gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 | |
gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 | |
gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 | |
GEN8_ZS_DW1_STENCIL1_ENABLE; |
if (stencil1->writemask) |
dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE; |
} |
return dw; |
} |
static uint32_t |
dsa_get_depth_enable_gen6(const struct ilo_dev *dev, |
const struct pipe_depth_state *state) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 360: |
* |
* "Enabling the Depth Test function without defining a Depth Buffer is |
* UNDEFINED." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 375: |
* |
* "A Depth Buffer must be defined before enabling writes to it, or |
* operation is UNDEFINED." |
* |
* TODO We do not check these yet. |
*/ |
if (state->enabled) { |
dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE | |
gen6_translate_dsa_func(state->func) << 27; |
} else { |
dw = GEN6_COMPAREFUNCTION_ALWAYS << 27; |
} |
if (state->writemask) |
dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE; |
return dw; |
} |
static uint32_t |
dsa_get_depth_enable_gen8(const struct ilo_dev *dev, |
const struct pipe_depth_state *state) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (state->enabled) { |
dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE | |
gen6_translate_dsa_func(state->func) << 5; |
} else { |
dw = GEN6_COMPAREFUNCTION_ALWAYS << 5; |
} |
if (state->writemask) |
dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE; |
return dw; |
} |
static uint32_t |
dsa_get_alpha_enable_gen6(const struct ilo_dev *dev, |
const struct pipe_alpha_state *state) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 6, 7.5); |
if (!state->enabled) |
return 0; |
/* this will be ORed to BLEND_STATE */ |
dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE | |
gen6_translate_dsa_func(state->func) << 13; |
return dw; |
} |
static uint32_t |
dsa_get_alpha_enable_gen8(const struct ilo_dev *dev, |
const struct pipe_alpha_state *state) |
{ |
uint32_t dw; |
ILO_DEV_ASSERT(dev, 8, 8); |
if (!state->enabled) |
return 0; |
/* this will be ORed to BLEND_STATE */ |
dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE | |
gen6_translate_dsa_func(state->func) << 24; |
return dw; |
} |
void |
ilo_gpe_init_dsa(const struct ilo_dev *dev, |
const struct pipe_depth_stencil_alpha_state *state, |
struct ilo_dsa_state *dsa) |
{ |
ILO_DEV_ASSERT(dev, 6, 8); |
STATIC_ASSERT(Elements(dsa->payload) >= 3); |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev, |
&state->stencil[0], &state->stencil[1]); |
const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth); |
assert(!(dw_stencil & dw_depth)); |
dsa->payload[0] = dw_stencil | dw_depth; |
dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha); |
dsa->dw_ps_blend_alpha = (state->alpha.enabled) ? |
GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0; |
} else { |
dsa->payload[0] = dsa_get_stencil_enable_gen6(dev, |
&state->stencil[0], &state->stencil[1]); |
dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth); |
dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha); |
dsa->dw_ps_blend_alpha = 0; |
} |
dsa->payload[1] = state->stencil[0].valuemask << 24 | |
state->stencil[0].writemask << 16 | |
state->stencil[1].valuemask << 8 | |
state->stencil[1].writemask; |
dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value); |
} |
void |
ilo_gpe_set_scissor(const struct ilo_dev *dev, |
unsigned start_slot, |
unsigned num_states, |
const struct pipe_scissor_state *states, |
struct ilo_scissor_state *scissor) |
{ |
unsigned i; |
ILO_DEV_ASSERT(dev, 6, 8); |
for (i = 0; i < num_states; i++) { |
uint16_t min_x, min_y, max_x, max_y; |
/* both max and min are inclusive in SCISSOR_RECT */ |
if (states[i].minx < states[i].maxx && |
states[i].miny < states[i].maxy) { |
min_x = states[i].minx; |
min_y = states[i].miny; |
max_x = states[i].maxx - 1; |
max_y = states[i].maxy - 1; |
} |
else { |
/* we have to make min greater than max */ |
min_x = 1; |
min_y = 1; |
max_x = 0; |
max_y = 0; |
} |
scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x; |
scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x; |
} |
if (!start_slot && num_states) |
scissor->scissor0 = states[0]; |
} |
void |
ilo_gpe_set_scissor_null(const struct ilo_dev *dev, |
struct ilo_scissor_state *scissor) |
{ |
unsigned i; |
for (i = 0; i < Elements(scissor->payload); i += 2) { |
scissor->payload[i + 0] = 1 << 16 | 1; |
scissor->payload[i + 1] = 0; |
} |
} |
static void |
fb_set_blend_caps(const struct ilo_dev *dev, |
enum pipe_format format, |
struct ilo_fb_blend_caps *caps) |
{ |
const struct util_format_description *desc = |
util_format_description(format); |
const int ch = util_format_get_first_non_void_channel(format); |
memset(caps, 0, sizeof(*caps)); |
if (format == PIPE_FORMAT_NONE || desc->is_mixed) |
return; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 365: |
* |
* "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB |
* variants), otherwise Logic Ops must be DISABLED." |
* |
* According to the classic driver, this is lifted on Gen8+. |
*/ |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
caps->can_logicop = true; |
} else { |
caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized && |
desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED && |
desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); |
} |
/* no blending for pure integer formats */ |
caps->can_blend = !util_format_is_pure_integer(format); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 382: |
* |
* "Alpha Test can only be enabled if Pixel Shader outputs a float |
* alpha value." |
*/ |
caps->can_alpha_test = !util_format_is_pure_integer(format); |
caps->dst_alpha_forced_one = |
(ilo_format_translate_render(dev, format) != |
ilo_format_translate_color(dev, format)); |
/* sanity check */ |
if (caps->dst_alpha_forced_one) { |
enum pipe_format render_format; |
switch (format) { |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
render_format = PIPE_FORMAT_B8G8R8A8_UNORM; |
break; |
default: |
render_format = PIPE_FORMAT_NONE; |
break; |
} |
assert(ilo_format_translate_render(dev, format) == |
ilo_format_translate_color(dev, render_format)); |
} |
} |
void |
ilo_gpe_set_fb(const struct ilo_dev *dev, |
const struct pipe_framebuffer_state *state, |
struct ilo_fb_state *fb) |
{ |
const struct pipe_surface *first_surf = NULL; |
int i; |
ILO_DEV_ASSERT(dev, 6, 8); |
util_copy_framebuffer_state(&fb->state, state); |
ilo_gpe_init_view_surface_null(dev, |
(state->width) ? state->width : 1, |
(state->height) ? state->height : 1, |
1, 0, &fb->null_rt); |
for (i = 0; i < state->nr_cbufs; i++) { |
if (state->cbufs[i]) { |
fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]); |
if (!first_surf) |
first_surf = state->cbufs[i]; |
} else { |
fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]); |
} |
} |
if (!first_surf && state->zsbuf) |
first_surf = state->zsbuf; |
fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1; |
if (!fb->num_samples) |
fb->num_samples = 1; |
/* |
* The PRMs list several restrictions when the framebuffer has more than |
* one surface. It seems they are actually lifted on GEN6+. |
*/ |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d_top.c |
---|
0,0 → 1,1716 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "genhw/genhw.h" |
#include "util/u_dual_blend.h" |
#include "util/u_framebuffer.h" |
#include "util/u_half.h" |
#include "util/u_resource.h" |
#include "ilo_buffer.h" |
#include "ilo_format.h" |
#include "ilo_image.h" |
#include "ilo_state_3d.h" |
#include "../ilo_shader.h" |
static void |
ve_init_cso(const struct ilo_dev *dev, |
const struct pipe_vertex_element *state, |
unsigned vb_index, |
struct ilo_ve_cso *cso) |
{ |
int comp[4] = { |
GEN6_VFCOMP_STORE_SRC, |
GEN6_VFCOMP_STORE_SRC, |
GEN6_VFCOMP_STORE_SRC, |
GEN6_VFCOMP_STORE_SRC, |
}; |
int format; |
ILO_DEV_ASSERT(dev, 6, 8); |
switch (util_format_get_nr_components(state->src_format)) { |
case 1: comp[1] = GEN6_VFCOMP_STORE_0; |
case 2: comp[2] = GEN6_VFCOMP_STORE_0; |
case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? |
GEN6_VFCOMP_STORE_1_INT : |
GEN6_VFCOMP_STORE_1_FP; |
} |
format = ilo_format_translate_vertex(dev, state->src_format); |
STATIC_ASSERT(Elements(cso->payload) >= 2); |
cso->payload[0] = |
vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT | |
GEN6_VE_DW0_VALID | |
format << GEN6_VE_DW0_FORMAT__SHIFT | |
state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT; |
cso->payload[1] = |
comp[0] << GEN6_VE_DW1_COMP0__SHIFT | |
comp[1] << GEN6_VE_DW1_COMP1__SHIFT | |
comp[2] << GEN6_VE_DW1_COMP2__SHIFT | |
comp[3] << GEN6_VE_DW1_COMP3__SHIFT; |
} |
void |
ilo_gpe_init_ve(const struct ilo_dev *dev, |
unsigned num_states, |
const struct pipe_vertex_element *states, |
struct ilo_ve_state *ve) |
{ |
unsigned i; |
ILO_DEV_ASSERT(dev, 6, 8); |
ve->count = num_states; |
ve->vb_count = 0; |
for (i = 0; i < num_states; i++) { |
const unsigned pipe_idx = states[i].vertex_buffer_index; |
const unsigned instance_divisor = states[i].instance_divisor; |
unsigned hw_idx; |
/* |
* map the pipe vb to the hardware vb, which has a fixed instance |
* divisor |
*/ |
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { |
if (ve->vb_mapping[hw_idx] == pipe_idx && |
ve->instance_divisors[hw_idx] == instance_divisor) |
break; |
} |
/* create one if there is no matching hardware vb */ |
if (hw_idx >= ve->vb_count) { |
hw_idx = ve->vb_count++; |
ve->vb_mapping[hw_idx] = pipe_idx; |
ve->instance_divisors[hw_idx] = instance_divisor; |
} |
ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); |
} |
} |
void |
ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev, |
struct ilo_ve_cso *cso) |
{ |
int format; |
ILO_DEV_ASSERT(dev, 6, 8); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 94: |
* |
* "- This bit (Edge Flag Enable) must only be ENABLED on the last |
* valid VERTEX_ELEMENT structure. |
* |
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, |
* and Component 1-3 Control must be set to VFCOMP_NOSTORE. |
* |
* - The Source Element Format must be set to the UINT format. |
* |
* - [DevSNB]: Edge Flags are not supported for QUADLIST |
* primitives. Software may elect to convert QUADLIST primitives |
* to some set of corresponding edge-flag-supported primitive |
* types (e.g., POLYGONs) prior to submission to the 3D pipeline." |
*/ |
cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE; |
/* |
* Edge flags have format GEN6_FORMAT_R8_USCALED when defined via |
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined |
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. |
* |
* Since all the hardware cares about is whether the flags are zero or not, |
* we can treat them as the corresponding _UINT formats. |
*/ |
format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT); |
cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK; |
switch (format) { |
case GEN6_FORMAT_R32_FLOAT: |
format = GEN6_FORMAT_R32_UINT; |
break; |
case GEN6_FORMAT_R8_USCALED: |
format = GEN6_FORMAT_R8_UINT; |
break; |
default: |
break; |
} |
cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT); |
cso->payload[1] = |
GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT | |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT | |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT | |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT; |
} |
void |
ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev, |
int comp0, int comp1, int comp2, int comp3, |
struct ilo_ve_cso *cso) |
{ |
ILO_DEV_ASSERT(dev, 6, 8); |
STATIC_ASSERT(Elements(cso->payload) >= 2); |
assert(comp0 != GEN6_VFCOMP_STORE_SRC && |
comp1 != GEN6_VFCOMP_STORE_SRC && |
comp2 != GEN6_VFCOMP_STORE_SRC && |
comp3 != GEN6_VFCOMP_STORE_SRC); |
cso->payload[0] = GEN6_VE_DW0_VALID; |
cso->payload[1] = |
comp0 << GEN6_VE_DW1_COMP0__SHIFT | |
comp1 << GEN6_VE_DW1_COMP1__SHIFT | |
comp2 << GEN6_VE_DW1_COMP2__SHIFT | |
comp3 << GEN6_VE_DW1_COMP3__SHIFT; |
} |
void |
ilo_gpe_init_vs_cso(const struct ilo_dev *dev, |
const struct ilo_shader_state *vs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, vue_read_len, sampler_count, max_threads; |
uint32_t dw2, dw4, dw5; |
ILO_DEV_ASSERT(dev, 6, 8); |
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); |
sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 135: |
* |
* "(Vertex URB Entry Read Length) Specifies the number of pairs of |
* 128-bit vertex elements to be passed into the payload for each |
* vertex." |
* |
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB |
* data to be read and passed to the thread." |
*/ |
vue_read_len = (vue_read_len + 1) / 2; |
if (!vue_read_len) |
vue_read_len = 1; |
max_threads = dev->thread_count; |
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2) |
max_threads *= 2; |
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; |
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; |
dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | |
vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | |
0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; |
dw5 = GEN6_VS_DW5_STATISTICS | |
GEN6_VS_DW5_VS_ENABLE; |
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) |
dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT; |
else |
dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 3); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
} |
static void |
gs_init_cso_gen6(const struct ilo_dev *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, vue_read_len, max_threads; |
uint32_t dw2, dw4, dw5, dw6; |
ILO_DEV_ASSERT(dev, 6, 6); |
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { |
start_grf = ilo_shader_get_kernel_param(gs, |
ILO_KERNEL_URB_DATA_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); |
} |
else { |
start_grf = ilo_shader_get_kernel_param(gs, |
ILO_KERNEL_VS_GEN6_SO_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 153: |
* |
* "Specifies the amount of URB data read and passed in the thread |
* payload for each Vertex URB entry, in 256-bit register increments. |
* |
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to |
* 0 indicating no Vertex URB data to be read and passed to the |
* thread." |
*/ |
vue_read_len = (vue_read_len + 1) / 2; |
if (!vue_read_len) |
vue_read_len = 1; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 154: |
* |
* "Maximum Number of Threads valid range is [0,27] when Rendering |
* Enabled bit is set." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 173: |
* |
* "Programming Note: If the GS stage is enabled, software must always |
* allocate at least one GS URB Entry. This is true even if the GS |
* thread never needs to output vertices to the pipeline, e.g., when |
* only performing stream output. This is an artifact of the need to |
* pass the GS thread an initial destination URB handle." |
* |
* As such, we always enable rendering, and limit the number of threads. |
*/ |
if (dev->gt == 2) { |
/* maximum is 60, but limited to 28 */ |
max_threads = 28; |
} |
else { |
/* maximum is 24, but limited to 21 (see brwCreateContext()) */ |
max_threads = 21; |
} |
dw2 = GEN6_THREADDISP_SPF; |
dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | |
0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | |
start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT; |
dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT | |
GEN6_GS_DW5_STATISTICS | |
GEN6_GS_DW5_SO_STATISTICS | |
GEN6_GS_DW5_RENDER_ENABLE; |
/* |
* we cannot make use of GEN6_GS_REORDER because it will reorder |
* triangle strips according to D3D rules (triangle 2N+1 uses vertices |
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices |
* (2N+2, 2N+1, 2N+3)). |
*/ |
dw6 = GEN6_GS_DW6_GS_ENABLE; |
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) |
dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY; |
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { |
const uint32_t svbi_post_inc = |
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); |
dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; |
if (svbi_post_inc) { |
dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | |
svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; |
} |
} |
STATIC_ASSERT(Elements(cso->payload) >= 4); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
cso->payload[3] = dw6; |
} |
static void |
gs_init_cso_gen7(const struct ilo_dev *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, vue_read_len, sampler_count, max_threads; |
uint32_t dw2, dw4, dw5; |
ILO_DEV_ASSERT(dev, 7, 7.5); |
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); |
sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT); |
/* in pairs */ |
vue_read_len = (vue_read_len + 1) / 2; |
switch (ilo_dev_gen(dev)) { |
case ILO_GEN(7.5): |
max_threads = (dev->gt >= 2) ? 256 : 70; |
break; |
case ILO_GEN(7): |
max_threads = (dev->gt == 2) ? 128 : 36; |
break; |
default: |
max_threads = 1; |
break; |
} |
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; |
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; |
dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | |
GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | |
0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | |
start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT; |
dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT | |
GEN7_GS_DW5_STATISTICS | |
GEN7_GS_DW5_GS_ENABLE; |
STATIC_ASSERT(Elements(cso->payload) >= 3); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
} |
void |
ilo_gpe_init_gs_cso(const struct ilo_dev *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso) |
{ |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) |
gs_init_cso_gen7(dev, gs, cso); |
else |
gs_init_cso_gen6(dev, gs, cso); |
} |
static void |
view_init_null_gen6(const struct ilo_dev *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf) |
{ |
uint32_t *dw; |
ILO_DEV_ASSERT(dev, 6, 6); |
assert(width >= 1 && height >= 1 && depth >= 1); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 71: |
* |
* "A null surface will be used in instances where an actual surface is |
* not bound. When a write message is generated to a null surface, no |
* actual surface is written to. When a read message (including any |
* sampling engine message) is generated to a null surface, the result |
* is all zeros. Note that a null surface type is allowed to be used |
* with all messages, even if it is not specificially indicated as |
* supported. All of the remaining fields in surface state are ignored |
* for null surfaces, with the following exceptions: |
* |
* * [DevSNB+]: Width, Height, Depth, and LOD fields must match the |
* depth buffer's corresponding state for all render target |
* surfaces, including null. |
* * Surface Format must be R8G8B8A8_UNORM." |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 82: |
* |
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be |
* true" |
*/ |
STATIC_ASSERT(Elements(surf->payload) >= 6); |
dw = surf->payload; |
dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | |
GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; |
dw[1] = 0; |
dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | |
(width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | |
level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; |
dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | |
GEN6_TILING_X; |
dw[4] = 0; |
dw[5] = 0; |
} |
static void |
view_init_for_buffer_gen6(const struct ilo_dev *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
const int elem_size = util_format_get_blocksize(elem_format); |
int width, height, depth, pitch; |
int surface_format, num_entries; |
uint32_t *dw; |
ILO_DEV_ASSERT(dev, 6, 6); |
/* |
* For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a |
* structure in a buffer. |
*/ |
surface_format = ilo_format_translate_color(dev, elem_format); |
num_entries = size / struct_size; |
/* see if there is enough space to fit another element */ |
if (size % struct_size >= elem_size) |
num_entries++; |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 76: |
* |
* "For SURFTYPE_BUFFER render targets, this field (Surface Base |
* Address) specifies the base address of first element of the |
* surface. The surface is interpreted as a simple array of that |
* single element type. The address must be naturally-aligned to the |
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements |
* must be 16-byte aligned). |
* |
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies |
* the base address of the first element of the surface, computed in |
* software by adding the surface base address to the byte offset of |
* the element in the buffer." |
*/ |
if (is_rt) |
assert(offset % elem_size == 0); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 77: |
* |
* "For buffer surfaces, the number of entries in the buffer ranges |
* from 1 to 2^27." |
*/ |
assert(num_entries >= 1 && num_entries <= 1 << 27); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch) |
* indicates the size of the structure." |
*/ |
pitch = struct_size; |
pitch--; |
num_entries--; |
/* bits [6:0] */ |
width = (num_entries & 0x0000007f); |
/* bits [19:7] */ |
height = (num_entries & 0x000fff80) >> 7; |
/* bits [26:20] */ |
depth = (num_entries & 0x07f00000) >> 20; |
STATIC_ASSERT(Elements(surf->payload) >= 6); |
dw = surf->payload; |
dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | |
surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; |
if (render_cache_rw) |
dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; |
dw[1] = offset; |
dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | |
width << GEN6_SURFACE_DW2_WIDTH__SHIFT; |
dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | |
pitch << GEN6_SURFACE_DW3_PITCH__SHIFT; |
dw[4] = 0; |
dw[5] = 0; |
} |
static void |
view_init_for_image_gen6(const struct ilo_dev *dev, |
const struct ilo_image *img, |
enum pipe_texture_target target, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, |
struct ilo_view_surface *surf) |
{ |
int surface_type, surface_format; |
int width, height, depth, pitch, lod; |
uint32_t *dw; |
ILO_DEV_ASSERT(dev, 6, 6); |
surface_type = ilo_gpe_gen6_translate_texture(target); |
assert(surface_type != GEN6_SURFTYPE_BUFFER); |
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil) |
format = PIPE_FORMAT_Z32_FLOAT; |
if (is_rt) |
surface_format = ilo_format_translate_render(dev, format); |
else |
surface_format = ilo_format_translate_texture(dev, format); |
assert(surface_format >= 0); |
width = img->width0; |
height = img->height0; |
depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; |
pitch = img->bo_stride; |
if (surface_type == GEN6_SURFTYPE_CUBE) { |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the |
* range of this field (Depth) is [0,84], indicating the number of |
* cube array elements (equal to the number of underlying 2D array |
* elements divided by 6). For other surfaces, this field must be |
* zero." |
* |
* When is_rt is true, we treat the texture as a 2D one to avoid the |
* restriction. |
*/ |
if (is_rt) { |
surface_type = GEN6_SURFTYPE_2D; |
} |
else { |
assert(num_layers % 6 == 0); |
depth = num_layers / 6; |
} |
} |
/* sanity check the size */ |
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); |
switch (surface_type) { |
case GEN6_SURFTYPE_1D: |
assert(width <= 8192 && height == 1 && depth <= 512); |
assert(first_layer < 512 && num_layers <= 512); |
break; |
case GEN6_SURFTYPE_2D: |
assert(width <= 8192 && height <= 8192 && depth <= 512); |
assert(first_layer < 512 && num_layers <= 512); |
break; |
case GEN6_SURFTYPE_3D: |
assert(width <= 2048 && height <= 2048 && depth <= 2048); |
assert(first_layer < 2048 && num_layers <= 512); |
if (!is_rt) |
assert(first_layer == 0); |
break; |
case GEN6_SURFTYPE_CUBE: |
assert(width <= 8192 && height <= 8192 && depth <= 85); |
assert(width == height); |
assert(first_layer < 512 && num_layers <= 512); |
if (is_rt) |
assert(first_layer == 0); |
break; |
default: |
assert(!"unexpected surface type"); |
break; |
} |
/* non-full array spacing is supported only on GEN7+ */ |
assert(img->walk != ILO_IMAGE_WALK_LOD); |
/* non-interleaved samples are supported only on GEN7+ */ |
if (img->sample_count > 1) |
assert(img->interleaved_samples); |
if (is_rt) { |
assert(num_levels == 1); |
lod = first_level; |
} |
else { |
lod = num_levels - 1; |
} |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 76: |
* |
* "Linear render target surface base addresses must be element-size |
* aligned, for non-YUV surface formats, or a multiple of 2 |
* element-sizes for YUV surface formats. Other linear surfaces have |
* no alignment requirements (byte alignment is sufficient.)" |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "For linear render target surfaces, the pitch must be a multiple |
* of the element size for non-YUV surface formats. Pitch must be a |
* multiple of 2 * element size for YUV surface formats." |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 86: |
* |
* "For linear surfaces, this field (X Offset) must be zero" |
*/ |
if (img->tiling == GEN6_TILING_NONE) { |
if (is_rt) { |
const int elem_size = util_format_get_blocksize(format); |
assert(pitch % elem_size == 0); |
} |
} |
STATIC_ASSERT(Elements(surf->payload) >= 6); |
dw = surf->payload; |
dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT | |
surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT | |
GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; |
if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) { |
dw[0] |= 1 << 9 | |
GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; |
} |
if (is_rt) |
dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; |
dw[1] = 0; |
dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | |
(width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | |
lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; |
assert(img->tiling != GEN8_TILING_W); |
dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | |
(pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | |
img->tiling; |
dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | |
first_layer << 17 | |
(num_layers - 1) << 8 | |
((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 : |
GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1); |
dw[5] = 0; |
assert(img->align_j == 2 || img->align_j == 4); |
if (img->align_j == 4) |
dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; |
} |
static void |
view_init_null_gen7(const struct ilo_dev *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf) |
{ |
uint32_t *dw; |
ILO_DEV_ASSERT(dev, 7, 8); |
assert(width >= 1 && height >= 1 && depth >= 1); |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 62: |
* |
* "A null surface is used in instances where an actual surface is not |
* bound. When a write message is generated to a null surface, no |
* actual surface is written to. When a read message (including any |
* sampling engine message) is generated to a null surface, the result |
* is all zeros. Note that a null surface type is allowed to be used |
* with all messages, even if it is not specificially indicated as |
* supported. All of the remaining fields in surface state are ignored |
* for null surfaces, with the following exceptions: |
* |
* * Width, Height, Depth, LOD, and Render Target View Extent fields |
* must match the depth buffer's corresponding state for all render |
* target surfaces, including null. |
* * All sampling engine and data port messages support null surfaces |
* with the above behavior, even if not mentioned as specifically |
* supported, except for the following: |
* * Data Port Media Block Read/Write messages. |
* * The Surface Type of a surface used as a render target (accessed |
* via the Data Port's Render Target Write message) must be the same |
* as the Surface Type of all other render targets and of the depth |
* buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth |
* buffer or render targets are SURFTYPE_NULL." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 65: |
* |
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be |
* true" |
*/ |
STATIC_ASSERT(Elements(surf->payload) >= 13); |
dw = surf->payload; |
dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | |
GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT; |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) |
dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT; |
else |
dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT; |
dw[1] = 0; |
dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | |
GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); |
dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH); |
dw[4] = 0; |
dw[5] = level; |
dw[6] = 0; |
dw[7] = 0; |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) |
memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); |
} |
static void |
view_init_for_buffer_gen7(const struct ilo_dev *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
const bool typed = (elem_format != PIPE_FORMAT_NONE); |
const bool structured = (!typed && struct_size > 1); |
const int elem_size = (typed) ? |
util_format_get_blocksize(elem_format) : 1; |
int width, height, depth, pitch; |
int surface_type, surface_format, num_entries; |
uint32_t *dw; |
ILO_DEV_ASSERT(dev, 7, 8); |
surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; |
surface_format = (typed) ? |
ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW; |
num_entries = size / struct_size; |
/* see if there is enough space to fit another element */ |
if (size % struct_size >= elem_size && !structured) |
num_entries++; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 67: |
* |
* "For SURFTYPE_BUFFER render targets, this field (Surface Base |
* Address) specifies the base address of first element of the |
* surface. The surface is interpreted as a simple array of that |
* single element type. The address must be naturally-aligned to the |
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements |
* must be 16-byte aligned) |
* |
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies |
* the base address of the first element of the surface, computed in |
* software by adding the surface base address to the byte offset of |
* the element in the buffer." |
*/ |
if (is_rt) |
assert(offset % elem_size == 0); |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 68: |
* |
* "For typed buffer and structured buffer surfaces, the number of |
* entries in the buffer ranges from 1 to 2^27. For raw buffer |
* surfaces, the number of entries in the buffer is the number of |
* bytes which can range from 1 to 2^30." |
*/ |
assert(num_entries >= 1 && |
num_entries <= 1 << ((typed || structured) ? 27 : 30)); |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 69: |
* |
* "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be |
* 11 if the Surface Format is RAW (the size of the buffer must be a |
* multiple of 4 bytes)." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 70: |
* |
* "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this |
* field (Surface Pitch) indicates the size of the structure." |
* |
* "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch |
* must be a multiple of 4 bytes." |
*/ |
if (structured) |
assert(struct_size % 4 == 0); |
else if (!typed) |
assert(num_entries % 4 == 0); |
pitch = struct_size; |
pitch--; |
num_entries--; |
/* bits [6:0] */ |
width = (num_entries & 0x0000007f); |
/* bits [20:7] */ |
height = (num_entries & 0x001fff80) >> 7; |
/* bits [30:21] */ |
depth = (num_entries & 0x7fe00000) >> 21; |
/* limit to [26:21] */ |
if (typed || structured) |
depth &= 0x3f; |
STATIC_ASSERT(Elements(surf->payload) >= 13); |
dw = surf->payload; |
dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | |
surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; |
if (render_cache_rw) |
dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
dw[8] = offset; |
memset(&dw[9], 0, sizeof(*dw) * (13 - 9)); |
} else { |
dw[1] = offset; |
} |
dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) | |
GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH); |
dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) | |
pitch; |
dw[4] = 0; |
dw[5] = 0; |
dw[6] = 0; |
dw[7] = 0; |
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { |
dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | |
GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | |
GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | |
GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); |
} |
} |
static void |
view_init_for_image_gen7(const struct ilo_dev *dev, |
const struct ilo_image *img, |
enum pipe_texture_target target, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, |
struct ilo_view_surface *surf) |
{ |
int surface_type, surface_format; |
int width, height, depth, pitch, lod; |
uint32_t *dw; |
ILO_DEV_ASSERT(dev, 7, 8); |
surface_type = ilo_gpe_gen6_translate_texture(target); |
assert(surface_type != GEN6_SURFTYPE_BUFFER); |
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil) |
format = PIPE_FORMAT_Z32_FLOAT; |
if (is_rt) |
surface_format = ilo_format_translate_render(dev, format); |
else |
surface_format = ilo_format_translate_texture(dev, format); |
assert(surface_format >= 0); |
width = img->width0; |
height = img->height0; |
depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers; |
pitch = img->bo_stride; |
if (surface_type == GEN6_SURFTYPE_CUBE) { |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 70: |
* |
* "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of |
* this field is [0,340], indicating the number of cube array |
* elements (equal to the number of underlying 2D array elements |
* divided by 6). For other surfaces, this field must be zero." |
* |
* When is_rt is true, we treat the texture as a 2D one to avoid the |
* restriction. |
*/ |
if (is_rt) { |
surface_type = GEN6_SURFTYPE_2D; |
} |
else { |
assert(num_layers % 6 == 0); |
depth = num_layers / 6; |
} |
} |
/* sanity check the size */ |
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); |
assert(first_layer < 2048 && num_layers <= 2048); |
switch (surface_type) { |
case GEN6_SURFTYPE_1D: |
assert(width <= 16384 && height == 1 && depth <= 2048); |
break; |
case GEN6_SURFTYPE_2D: |
assert(width <= 16384 && height <= 16384 && depth <= 2048); |
break; |
case GEN6_SURFTYPE_3D: |
assert(width <= 2048 && height <= 2048 && depth <= 2048); |
if (!is_rt) |
assert(first_layer == 0); |
break; |
case GEN6_SURFTYPE_CUBE: |
assert(width <= 16384 && height <= 16384 && depth <= 86); |
assert(width == height); |
if (is_rt) |
assert(first_layer == 0); |
break; |
default: |
assert(!"unexpected surface type"); |
break; |
} |
if (is_rt) { |
assert(num_levels == 1); |
lod = first_level; |
} |
else { |
lod = num_levels - 1; |
} |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 68: |
* |
* "The Base Address for linear render target surfaces and surfaces |
* accessed with the typed surface read/write data port messages must |
* be element-size aligned, for non-YUV surface formats, or a multiple |
* of 2 element-sizes for YUV surface formats. Other linear surfaces |
* have no alignment requirements (byte alignment is sufficient)." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 70: |
* |
* "For linear render target surfaces and surfaces accessed with the |
* typed data port messages, the pitch must be a multiple of the |
* element size for non-YUV surface formats. Pitch must be a multiple |
* of 2 * element size for YUV surface formats. For linear surfaces |
* with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple |
* of 4 bytes.For other linear surfaces, the pitch can be any multiple |
* of bytes." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 74: |
* |
* "For linear surfaces, this field (X Offset) must be zero." |
*/ |
if (img->tiling == GEN6_TILING_NONE) { |
if (is_rt) { |
const int elem_size = util_format_get_blocksize(format); |
assert(pitch % elem_size == 0); |
} |
} |
STATIC_ASSERT(Elements(surf->payload) >= 13); |
dw = surf->payload; |
dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | |
surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 63: |
* |
* "If this field (Surface Array) is enabled, the Surface Type must be |
* SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is |
* disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or |
* SURFTYPE_CUBE, the Depth field must be set to zero." |
* |
* For non-3D sampler surfaces, resinfo (the sampler message) always |
* returns zero for the number of layers when this field is not set. |
*/ |
if (surface_type != GEN6_SURFTYPE_3D) { |
switch (target) { |
case PIPE_TEXTURE_1D_ARRAY: |
case PIPE_TEXTURE_2D_ARRAY: |
case PIPE_TEXTURE_CUBE_ARRAY: |
dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY; |
break; |
default: |
assert(depth == 1); |
break; |
} |
} |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
switch (img->align_j) { |
case 4: |
dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; |
break; |
case 8: |
dw[0] |= GEN8_SURFACE_DW0_VALIGN_8; |
break; |
case 16: |
dw[0] |= GEN8_SURFACE_DW0_VALIGN_16; |
break; |
default: |
assert(!"unsupported valign"); |
break; |
} |
switch (img->align_i) { |
case 4: |
dw[0] |= GEN8_SURFACE_DW0_HALIGN_4; |
break; |
case 8: |
dw[0] |= GEN8_SURFACE_DW0_HALIGN_8; |
break; |
case 16: |
dw[0] |= GEN8_SURFACE_DW0_HALIGN_16; |
break; |
default: |
assert(!"unsupported halign"); |
break; |
} |
dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT; |
} else { |
assert(img->align_i == 4 || img->align_i == 8); |
assert(img->align_j == 2 || img->align_j == 4); |
if (img->align_j == 4) |
dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; |
if (img->align_i == 8) |
dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; |
assert(img->tiling != GEN8_TILING_W); |
dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT; |
if (img->walk == ILO_IMAGE_WALK_LOD) |
dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; |
else |
dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; |
} |
if (is_rt) |
dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; |
if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) |
dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) { |
assert(img->walk_layer_height % 4 == 0); |
dw[1] = img->walk_layer_height / 4; |
} else { |
dw[1] = 0; |
} |
dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | |
GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); |
dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) | |
(pitch - 1); |
dw[4] = first_layer << 18 | |
(num_layers - 1) << 7; |
/* |
* MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL |
* means the samples are interleaved. The layouts are the same when the |
* number of samples is 1. |
*/ |
if (img->interleaved_samples && img->sample_count > 1) { |
assert(!is_rt); |
dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; |
} |
else { |
dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS; |
} |
switch (img->sample_count) { |
case 0: |
case 1: |
default: |
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1; |
break; |
case 2: |
dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2; |
break; |
case 4: |
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4; |
break; |
case 8: |
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8; |
break; |
case 16: |
dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16; |
break; |
} |
dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) | |
lod; |
dw[6] = 0; |
dw[7] = 0; |
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { |
dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | |
GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | |
GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | |
GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); |
} |
if (ilo_dev_gen(dev) >= ILO_GEN(8)) |
memset(&dw[8], 0, sizeof(*dw) * (13 - 8)); |
} |
void |
ilo_gpe_init_view_surface_null(const struct ilo_dev *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf) |
{ |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
view_init_null_gen7(dev, |
width, height, depth, level, surf); |
} else { |
view_init_null_gen6(dev, |
width, height, depth, level, surf); |
} |
surf->bo = NULL; |
surf->scanout = false; |
} |
void |
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
view_init_for_buffer_gen7(dev, buf, offset, size, |
struct_size, elem_format, is_rt, render_cache_rw, surf); |
} else { |
view_init_for_buffer_gen6(dev, buf, offset, size, |
struct_size, elem_format, is_rt, render_cache_rw, surf); |
} |
/* do not increment reference count */ |
surf->bo = buf->bo; |
surf->scanout = false; |
} |
void |
ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev, |
const struct ilo_image *img, |
enum pipe_texture_target target, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, |
struct ilo_view_surface *surf) |
{ |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
view_init_for_image_gen7(dev, img, target, format, |
first_level, num_levels, first_layer, num_layers, |
is_rt, surf); |
} else { |
view_init_for_image_gen6(dev, img, target, format, |
first_level, num_levels, first_layer, num_layers, |
is_rt, surf); |
} |
surf->scanout = img->scanout; |
/* do not increment reference count */ |
surf->bo = img->bo; |
} |
static void |
sampler_init_border_color_gen6(const struct ilo_dev *dev, |
const union pipe_color_union *color, |
uint32_t *dw, int num_dwords) |
{ |
float rgba[4] = { |
color->f[0], color->f[1], color->f[2], color->f[3], |
}; |
ILO_DEV_ASSERT(dev, 6, 6); |
assert(num_dwords >= 12); |
/* |
* This state is not documented in the Sandy Bridge PRM, but in the |
* Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. |
*/ |
/* IEEE_FP */ |
dw[1] = fui(rgba[0]); |
dw[2] = fui(rgba[1]); |
dw[3] = fui(rgba[2]); |
dw[4] = fui(rgba[3]); |
/* FLOAT_16 */ |
dw[5] = util_float_to_half(rgba[0]) | |
util_float_to_half(rgba[1]) << 16; |
dw[6] = util_float_to_half(rgba[2]) | |
util_float_to_half(rgba[3]) << 16; |
/* clamp to [-1.0f, 1.0f] */ |
rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); |
rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); |
rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); |
rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); |
/* SNORM16 */ |
dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | |
(int16_t) util_iround(rgba[1] * 32767.0f) << 16; |
dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | |
(int16_t) util_iround(rgba[3] * 32767.0f) << 16; |
/* SNORM8 */ |
dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | |
(int8_t) util_iround(rgba[1] * 127.0f) << 8 | |
(int8_t) util_iround(rgba[2] * 127.0f) << 16 | |
(int8_t) util_iround(rgba[3] * 127.0f) << 24; |
/* clamp to [0.0f, 1.0f] */ |
rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); |
rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); |
rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); |
rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); |
/* UNORM8 */ |
dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | |
(uint8_t) util_iround(rgba[1] * 255.0f) << 8 | |
(uint8_t) util_iround(rgba[2] * 255.0f) << 16 | |
(uint8_t) util_iround(rgba[3] * 255.0f) << 24; |
/* UNORM16 */ |
dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | |
(uint16_t) util_iround(rgba[1] * 65535.0f) << 16; |
dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | |
(uint16_t) util_iround(rgba[3] * 65535.0f) << 16; |
} |
/** |
* Translate a pipe texture mipfilter to the matching hardware mipfilter. |
*/ |
static int |
gen6_translate_tex_mipfilter(unsigned filter) |
{ |
switch (filter) { |
case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; |
case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; |
case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; |
default: |
assert(!"unknown mipfilter"); |
return GEN6_MIPFILTER_NONE; |
} |
} |
/** |
* Translate a pipe texture filter to the matching hardware mapfilter. |
*/ |
static int |
gen6_translate_tex_filter(unsigned filter) |
{ |
switch (filter) { |
case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; |
case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; |
default: |
assert(!"unknown sampler filter"); |
return GEN6_MAPFILTER_NEAREST; |
} |
} |
/** |
* Translate a pipe texture coordinate wrapping mode to the matching hardware |
* wrapping mode. |
*/ |
static int |
gen6_translate_tex_wrap(unsigned wrap) |
{ |
switch (wrap) { |
case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER; |
case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; |
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; |
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; |
case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; |
case PIPE_TEX_WRAP_MIRROR_CLAMP: |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
default: |
assert(!"unknown sampler wrap mode"); |
return GEN6_TEXCOORDMODE_WRAP; |
} |
} |
/** |
* Translate a pipe shadow compare function to the matching hardware shadow |
* function. |
*/ |
static int |
gen6_translate_shadow_func(unsigned func) |
{ |
/* |
* For PIPE_FUNC_x, the reference value is on the left-hand side of the |
* comparison, and 1.0 is returned when the comparison is true. |
* |
* For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of |
* the comparison, and 0.0 is returned when the comparison is true. |
*/ |
switch (func) { |
case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS; |
case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL; |
case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; |
case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS; |
case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL; |
case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL; |
case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER; |
case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER; |
default: |
assert(!"unknown shadow compare function"); |
return GEN6_COMPAREFUNCTION_NEVER; |
} |
} |
void |
ilo_gpe_init_sampler_cso(const struct ilo_dev *dev, |
const struct pipe_sampler_state *state, |
struct ilo_sampler_cso *sampler) |
{ |
int mip_filter, min_filter, mag_filter, max_aniso; |
int lod_bias, max_lod, min_lod; |
int wrap_s, wrap_t, wrap_r, wrap_cube; |
uint32_t dw0, dw1, dw3; |
ILO_DEV_ASSERT(dev, 6, 8); |
memset(sampler, 0, sizeof(*sampler)); |
mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); |
min_filter = gen6_translate_tex_filter(state->min_img_filter); |
mag_filter = gen6_translate_tex_filter(state->mag_img_filter); |
sampler->anisotropic = state->max_anisotropy; |
if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) |
max_aniso = state->max_anisotropy / 2 - 1; |
else if (state->max_anisotropy > 16) |
max_aniso = GEN6_ANISORATIO_16; |
else |
max_aniso = GEN6_ANISORATIO_2; |
/* |
* |
* Here is how the hardware calculate per-pixel LOD, from my reading of the |
* PRMs: |
* |
* 1) LOD is set to log2(ratio of texels to pixels) if not specified in |
* other ways. The number of texels is measured using level |
* SurfMinLod. |
* 2) Bias is added to LOD. |
* 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is |
* compared with Base to determine whether magnification or |
* minification is needed. (if preclamp is disabled, LOD is compared |
* with Base before clamping) |
* 4) If magnification is needed, or no mipmapping is requested, LOD is |
* set to floor(MinLod). |
* 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. |
* |
* With Gallium interface, Base is always zero and |
* pipe_sampler_view::u.tex.first_level specifies SurfMinLod. |
*/ |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
const float scale = 256.0f; |
/* [-16.0, 16.0) in S4.8 */ |
lod_bias = (int) |
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); |
lod_bias &= 0x1fff; |
/* [0.0, 14.0] in U4.8 */ |
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); |
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); |
} |
else { |
const float scale = 64.0f; |
/* [-16.0, 16.0) in S4.6 */ |
lod_bias = (int) |
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); |
lod_bias &= 0x7ff; |
/* [0.0, 13.0] in U4.6 */ |
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); |
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); |
} |
/* |
* We want LOD to be clamped to determine magnification/minification, and |
* get set to zero when it is magnification or when mipmapping is disabled. |
* The hardware would set LOD to floor(MinLod) and that is a problem when |
* MinLod is greater than or equal to 1.0f. |
* |
* With Base being zero, it is always minification when MinLod is non-zero. |
* To achieve our goal, we just need to set MinLod to zero and set |
* MagFilter to MinFilter when mipmapping is disabled. |
*/ |
if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { |
min_lod = 0; |
mag_filter = min_filter; |
} |
/* determine wrap s/t/r */ |
wrap_s = gen6_translate_tex_wrap(state->wrap_s); |
wrap_t = gen6_translate_tex_wrap(state->wrap_t); |
wrap_r = gen6_translate_tex_wrap(state->wrap_r); |
if (ilo_dev_gen(dev) < ILO_GEN(8)) { |
/* |
* For nearest filtering, PIPE_TEX_WRAP_CLAMP means |
* PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, |
* PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while |
* additionally clamping the texture coordinates to [0.0, 1.0]. |
* |
* PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The |
* clamping has to be taken care of in the shaders. There are two |
* filters here, but let the minification one has a say. |
*/ |
const bool clamp_is_to_edge = |
(state->min_img_filter == PIPE_TEX_FILTER_NEAREST); |
if (clamp_is_to_edge) { |
if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) |
wrap_s = GEN6_TEXCOORDMODE_CLAMP; |
if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) |
wrap_t = GEN6_TEXCOORDMODE_CLAMP; |
if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) |
wrap_r = GEN6_TEXCOORDMODE_CLAMP; |
} else { |
if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) { |
wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler->saturate_s = true; |
} |
if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) { |
wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler->saturate_t = true; |
} |
if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) { |
wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler->saturate_r = true; |
} |
} |
} |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 107: |
* |
* "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP |
* and TEXCOORDMODE_CUBE settings are valid, and each TC component |
* must have the same Address Control mode." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 96: |
* |
* "This field (Cube Surface Control Mode) must be set to |
* CUBECTRLMODE_PROGRAMMED" |
* |
* Therefore, we cannot use "Cube Surface Control Mode" for semless cube |
* map filtering. |
*/ |
if (state->seamless_cube_map && |
(state->min_img_filter != PIPE_TEX_FILTER_NEAREST || |
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { |
wrap_cube = GEN6_TEXCOORDMODE_CUBE; |
} |
else { |
wrap_cube = GEN6_TEXCOORDMODE_CLAMP; |
} |
if (!state->normalized_coords) { |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 98: |
* |
* "The following state must be set as indicated if this field |
* (Non-normalized Coordinate Enable) is enabled: |
* |
* - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, |
* TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. |
* - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. |
* - Mag Mode Filter must be MAPFILTER_NEAREST or |
* MAPFILTER_LINEAR. |
* - Min Mode Filter must be MAPFILTER_NEAREST or |
* MAPFILTER_LINEAR. |
* - Mip Mode Filter must be MIPFILTER_NONE. |
* - Min LOD must be 0. |
* - Max LOD must be 0. |
* - MIP Count must be 0. |
* - Surface Min LOD must be 0. |
* - Texture LOD Bias must be 0." |
*/ |
assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP || |
wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER); |
assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP || |
wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER); |
assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP || |
wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER); |
assert(mag_filter == GEN6_MAPFILTER_NEAREST || |
mag_filter == GEN6_MAPFILTER_LINEAR); |
assert(min_filter == GEN6_MAPFILTER_NEAREST || |
min_filter == GEN6_MAPFILTER_LINEAR); |
/* work around a bug in util_blitter */ |
mip_filter = GEN6_MIPFILTER_NONE; |
assert(mip_filter == GEN6_MIPFILTER_NONE); |
} |
if (ilo_dev_gen(dev) >= ILO_GEN(7)) { |
dw0 = 1 << 28 | |
mip_filter << 20 | |
lod_bias << 1; |
sampler->dw_filter = mag_filter << 17 | |
min_filter << 14; |
sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | |
GEN6_MAPFILTER_ANISOTROPIC << 14 | |
1; |
dw1 = min_lod << 20 | |
max_lod << 8; |
if (state->compare_mode != PIPE_TEX_COMPARE_NONE) |
dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; |
dw3 = max_aniso << 19; |
/* round the coordinates for linear filtering */ |
if (min_filter != GEN6_MAPFILTER_NEAREST) { |
dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | |
GEN6_SAMPLER_DW3_V_MIN_ROUND | |
GEN6_SAMPLER_DW3_R_MIN_ROUND); |
} |
if (mag_filter != GEN6_MAPFILTER_NEAREST) { |
dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | |
GEN6_SAMPLER_DW3_V_MAG_ROUND | |
GEN6_SAMPLER_DW3_R_MAG_ROUND); |
} |
if (!state->normalized_coords) |
dw3 |= 1 << 10; |
sampler->dw_wrap = wrap_s << 6 | |
wrap_t << 3 | |
wrap_r; |
/* |
* As noted in the classic i965 driver, the HW may still reference |
* wrap_t and wrap_r for 1D textures. We need to set them to a safe |
* mode |
*/ |
sampler->dw_wrap_1d = wrap_s << 6 | |
GEN6_TEXCOORDMODE_WRAP << 3 | |
GEN6_TEXCOORDMODE_WRAP; |
sampler->dw_wrap_cube = wrap_cube << 6 | |
wrap_cube << 3 | |
wrap_cube; |
STATIC_ASSERT(Elements(sampler->payload) >= 7); |
sampler->payload[0] = dw0; |
sampler->payload[1] = dw1; |
sampler->payload[2] = dw3; |
memcpy(&sampler->payload[3], |
state->border_color.ui, sizeof(state->border_color.ui)); |
} |
else { |
dw0 = 1 << 28 | |
mip_filter << 20 | |
lod_bias << 3; |
if (state->compare_mode != PIPE_TEX_COMPARE_NONE) |
dw0 |= gen6_translate_shadow_func(state->compare_func); |
sampler->dw_filter = (min_filter != mag_filter) << 27 | |
mag_filter << 17 | |
min_filter << 14; |
sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | |
GEN6_MAPFILTER_ANISOTROPIC << 14; |
dw1 = min_lod << 22 | |
max_lod << 12; |
sampler->dw_wrap = wrap_s << 6 | |
wrap_t << 3 | |
wrap_r; |
sampler->dw_wrap_1d = wrap_s << 6 | |
GEN6_TEXCOORDMODE_WRAP << 3 | |
GEN6_TEXCOORDMODE_WRAP; |
sampler->dw_wrap_cube = wrap_cube << 6 | |
wrap_cube << 3 | |
wrap_cube; |
dw3 = max_aniso << 19; |
/* round the coordinates for linear filtering */ |
if (min_filter != GEN6_MAPFILTER_NEAREST) { |
dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | |
GEN6_SAMPLER_DW3_V_MIN_ROUND | |
GEN6_SAMPLER_DW3_R_MIN_ROUND); |
} |
if (mag_filter != GEN6_MAPFILTER_NEAREST) { |
dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | |
GEN6_SAMPLER_DW3_V_MAG_ROUND | |
GEN6_SAMPLER_DW3_R_MAG_ROUND); |
} |
if (!state->normalized_coords) |
dw3 |= 1; |
STATIC_ASSERT(Elements(sampler->payload) >= 15); |
sampler->payload[0] = dw0; |
sampler->payload[1] = dw1; |
sampler->payload[2] = dw3; |
sampler_init_border_color_gen6(dev, |
&state->border_color, &sampler->payload[3], 12); |
} |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/intel_winsys.h |
---|
0,0 → 1,329 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2014 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef INTEL_WINSYS_H |
#define INTEL_WINSYS_H |
#include "pipe/p_compiler.h" |
/* this is compatible with i915_drm.h's definitions */ |
enum intel_ring_type { |
INTEL_RING_RENDER = 1, |
INTEL_RING_BSD = 2, |
INTEL_RING_BLT = 3, |
INTEL_RING_VEBOX = 4, |
}; |
/* this is compatible with i915_drm.h's definitions */ |
enum intel_exec_flag { |
INTEL_EXEC_GEN7_SOL_RESET = 1 << 8, |
}; |
/* this is compatible with i915_drm.h's definitions */ |
enum intel_reloc_flag { |
INTEL_RELOC_FENCE = 1 << 0, |
INTEL_RELOC_GGTT = 1 << 1, |
INTEL_RELOC_WRITE = 1 << 2, |
}; |
/* this is compatible with i915_drm.h's definitions */ |
enum intel_tiling_mode { |
INTEL_TILING_NONE = 0, |
INTEL_TILING_X = 1, |
INTEL_TILING_Y = 2, |
}; |
struct winsys_handle; |
struct intel_winsys; |
struct intel_context; |
struct intel_bo; |
struct intel_winsys_info { |
int devid; |
/* the sizes of the aperture in bytes */ |
size_t aperture_total; |
size_t aperture_mappable; |
bool has_llc; |
bool has_address_swizzling; |
bool has_logical_context; |
bool has_ppgtt; |
/* valid registers for intel_winsys_read_reg() */ |
bool has_timestamp; |
/* valid flags for intel_winsys_submit_bo() */ |
bool has_gen7_sol_reset; |
}; |
void |
intel_winsys_destroy(struct intel_winsys *winsys); |
const struct intel_winsys_info * |
intel_winsys_get_info(const struct intel_winsys *winsys); |
/** |
* Create a logical context for use with the render ring. |
*/ |
struct intel_context * |
intel_winsys_create_context(struct intel_winsys *winsys); |
/** |
* Destroy a logical context. |
*/ |
void |
intel_winsys_destroy_context(struct intel_winsys *winsys, |
struct intel_context *ctx); |
/** |
* Read a register. Only registers that are considered safe, such as |
* |
* TIMESTAMP (0x2358) |
* |
* can be read. |
*/ |
int |
intel_winsys_read_reg(struct intel_winsys *winsys, |
uint32_t reg, uint64_t *val); |
/** |
* Return the numbers of submissions lost due to GPU reset. |
* |
* \param active_lost Number of lost active/guilty submissions |
* \param pending_lost Number of lost pending/innocent submissions |
*/ |
int |
intel_winsys_get_reset_stats(struct intel_winsys *winsys, |
struct intel_context *ctx, |
uint32_t *active_lost, |
uint32_t *pending_lost); |
/** |
* Allocate a buffer object. |
* |
* \param name Informative description of the bo. |
* \param size Size of the bo. |
* \param cpu_init Will be initialized by CPU. |
*/ |
struct intel_bo * |
intel_winsys_alloc_bo(struct intel_winsys *winsys, |
const char *name, |
unsigned long size, |
bool cpu_init); |
/** |
* Create a bo from a user memory pointer. Both \p userptr and \p size must |
* be page aligned. |
*/ |
struct intel_bo * |
intel_winsys_import_userptr(struct intel_winsys *winsys, |
const char *name, |
void *userptr, |
unsigned long size, |
unsigned long flags); |
/** |
* Create a bo from a winsys handle. |
*/ |
struct intel_bo * |
intel_winsys_import_handle(struct intel_winsys *winsys, |
const char *name, |
const struct winsys_handle *handle, |
unsigned long height, |
enum intel_tiling_mode *tiling, |
unsigned long *pitch); |
/** |
* Export \p bo as a winsys handle for inter-process sharing. \p tiling and |
* \p pitch must match those set by \p intel_bo_set_tiling(). |
*/ |
int |
intel_winsys_export_handle(struct intel_winsys *winsys, |
struct intel_bo *bo, |
enum intel_tiling_mode tiling, |
unsigned long pitch, |
unsigned long height, |
struct winsys_handle *handle); |
/** |
* Return true when buffer objects directly specified in \p bo_array, and |
* those indirectly referenced by them, can fit in the aperture space. |
*/ |
bool |
intel_winsys_can_submit_bo(struct intel_winsys *winsys, |
struct intel_bo **bo_array, |
int count); |
/** |
* Submit \p bo for execution. |
* |
* \p bo and all bos referenced by \p bo will be considered busy until all |
* commands are parsed and executed. \p ctx is ignored when the bo is not |
* submitted to the render ring. |
*/ |
int |
intel_winsys_submit_bo(struct intel_winsys *winsys, |
enum intel_ring_type ring, |
struct intel_bo *bo, int used, |
struct intel_context *ctx, |
unsigned long flags); |
/** |
* Decode the commands contained in \p bo. For debugging. |
* |
* \param bo Batch buffer to decode. |
* \param used Size of the commands in bytes. |
*/ |
void |
intel_winsys_decode_bo(struct intel_winsys *winsys, |
struct intel_bo *bo, int used); |
/** |
* Increase the reference count of \p bo. No-op when \p bo is NULL. |
*/ |
struct intel_bo * |
intel_bo_ref(struct intel_bo *bo); |
/** |
* Decrease the reference count of \p bo. When the reference count reaches |
* zero, \p bo is destroyed. No-op when \p bo is NULL. |
*/ |
void |
intel_bo_unref(struct intel_bo *bo); |
/** |
* Set the tiling of \p bo. The info is used by GTT mapping and bo export. |
*/ |
int |
intel_bo_set_tiling(struct intel_bo *bo, |
enum intel_tiling_mode tiling, |
unsigned long pitch); |
/** |
* Map \p bo for CPU access. Recursive mapping is allowed. |
* |
* map() maps the backing store into CPU address space, cached. It will block |
* if the bo is busy. This variant allows fastest random reads and writes, |
* but the caller needs to handle tiling or swizzling manually if the bo is |
* tiled or swizzled. If write is enabled and there is no shared last-level |
* cache (LLC), the CPU cache will be flushed, which is expensive. |
* |
* map_gtt() maps the bo for MMIO access, uncached but write-combined. It |
* will block if the bo is busy. This variant promises a reasonable speed for |
* sequential writes, but reads would be very slow. Callers always have a |
* linear view of the bo. |
* |
* map_async() and map_gtt_async() work similar to map() and map_gtt() |
* respectively, except that they do not block. |
*/ |
void * |
intel_bo_map(struct intel_bo *bo, bool write_enable); |
void * |
intel_bo_map_async(struct intel_bo *bo); |
void * |
intel_bo_map_gtt(struct intel_bo *bo); |
void * |
intel_bo_map_gtt_async(struct intel_bo *bo); |
/** |
* Unmap \p bo. |
*/ |
void |
intel_bo_unmap(struct intel_bo *bo); |
/** |
* Write data to \p bo. |
*/ |
int |
intel_bo_pwrite(struct intel_bo *bo, unsigned long offset, |
unsigned long size, const void *data); |
/** |
* Read data from the bo. |
*/ |
int |
intel_bo_pread(struct intel_bo *bo, unsigned long offset, |
unsigned long size, void *data); |
/** |
* Add \p target_bo to the relocation list. |
* |
* When \p bo is submitted for execution, and if \p target_bo has moved, |
* the kernel will patch \p bo at \p offset to \p target_bo->offset plus |
* \p target_offset. |
* |
* \p presumed_offset should be written to \p bo at \p offset. |
*/ |
int |
intel_bo_add_reloc(struct intel_bo *bo, uint32_t offset, |
struct intel_bo *target_bo, uint32_t target_offset, |
uint32_t flags, uint64_t *presumed_offset); |
/** |
* Return the current number of relocations. |
*/ |
int |
intel_bo_get_reloc_count(struct intel_bo *bo); |
/** |
* Truncate all relocations except the first \p start ones. |
* |
* Combined with \p intel_bo_get_reloc_count(), they can be used to undo the |
* \p intel_bo_add_reloc() calls that were just made. |
*/ |
void |
intel_bo_truncate_relocs(struct intel_bo *bo, int start); |
/** |
* Return true if \p target_bo is on the relocation list of \p bo, or on |
* the relocation list of some bo that is referenced by \p bo. |
*/ |
bool |
intel_bo_has_reloc(struct intel_bo *bo, struct intel_bo *target_bo); |
/** |
* Wait until \bo is idle, or \p timeout nanoseconds have passed. A |
* negative timeout means to wait indefinitely. |
* |
* \return 0 only when \p bo is idle |
*/ |
int |
intel_bo_wait(struct intel_bo *bo, int64_t timeout); |
/** |
* Return true if \p bo is busy. |
*/ |
static inline bool |
intel_bo_is_busy(struct intel_bo *bo) |
{ |
return (intel_bo_wait(bo, 0) != 0); |
} |
#endif /* INTEL_WINSYS_H */ |