Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5563 → Rev 5564

/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_buffer.h
0,0 → 1,90
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUFFER_H
#define ILO_BUFFER_H
 
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
 
struct ilo_buffer {
unsigned bo_size;
 
struct intel_bo *bo;
};
 
static inline void
ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
unsigned size, uint32_t bind, uint32_t flags)
{
buf->bo_size = size;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
* "For buffers, which have no inherent "height," padding requirements
* are different. A buffer must be padded to the next multiple of 256
* array elements, with an additional 16 bytes added beyond that to
* account for the L1 cache line."
*/
if (bind & PIPE_BIND_SAMPLER_VIEW)
buf->bo_size = align(buf->bo_size, 256) + 16;
 
if ((bind & PIPE_BIND_VERTEX_BUFFER) && ilo_dev_gen(dev) < ILO_GEN(7.5)) {
/*
* As noted in ilo_format_translate(), we treat some 3-component formats
* as 4-component formats to work around hardware limitations. Imagine
* the case where the vertex buffer holds a single
* PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. The
* hardware would fail to fetch it at boundary check because the vertex
* buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
* and that takes at least 8 bytes.
*
* For the workaround to work, we should add 2 to the bo size. But that
* would waste a page when the bo size is already page aligned. Let's
* round it to page size for now and revisit this when needed.
*/
buf->bo_size = align(buf->bo_size, 4096);
}
}
 
static inline void
ilo_buffer_cleanup(struct ilo_buffer *buf)
{
intel_bo_unref(buf->bo);
}
 
static inline void
ilo_buffer_set_bo(struct ilo_buffer *buf, struct intel_bo *bo)
{
intel_bo_unref(buf->bo);
buf->bo = intel_bo_ref(bo);
}
 
#endif /* ILO_BUFFER_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder.c
0,0 → 1,495
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "ilo_builder.h"
#include "ilo_builder_render.h" /* for ilo_builder_batch_patch_sba() */
 
enum ilo_builder_writer_flags {
/*
* When this bit is set, ilo_builder_begin() will not realllocate. New
* data will be appended instead.
*/
WRITER_FLAG_APPEND = 1 << 0,
 
/*
* When this bit is set, the writer grows when full. When not, callers
* must make sure the writer never needs to grow.
*/
WRITER_FLAG_GROW = 1 << 1,
 
/*
* The writer will be mapped directly.
*/
WRITER_FLAG_MAP = 1 << 2,
};
 
/**
* Set the initial size and flags of a writer.
*/
static void
ilo_builder_writer_init(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
 
switch (which) {
case ILO_BUILDER_WRITER_BATCH:
writer->size = sizeof(uint32_t) * 8192;
break;
case ILO_BUILDER_WRITER_INSTRUCTION:
/*
* The EUs pretch some instructions. But since the kernel invalidates
* the instruction cache between batch buffers, we can set
* WRITER_FLAG_APPEND without worrying the EUs would see invalid
* instructions prefetched.
*/
writer->flags = WRITER_FLAG_APPEND | WRITER_FLAG_GROW;
writer->size = 8192;
break;
default:
assert(!"unknown builder writer");
return;
break;
}
 
if (builder->dev->has_llc)
writer->flags |= WRITER_FLAG_MAP;
}
 
/**
* Free all resources used by a writer. Note that the initial size is not
* reset.
*/
static void
ilo_builder_writer_reset(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
 
if (writer->ptr) {
if (writer->flags & WRITER_FLAG_MAP)
intel_bo_unmap(writer->bo);
else
FREE(writer->ptr);
 
writer->ptr = NULL;
}
 
intel_bo_unref(writer->bo);
writer->bo = NULL;
 
writer->used = 0;
writer->stolen = 0;
 
if (writer->items) {
FREE(writer->items);
writer->item_alloc = 0;
writer->item_used = 0;
}
}
 
/**
* Discard everything written so far.
*/
void
ilo_builder_writer_discard(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
 
intel_bo_truncate_relocs(writer->bo, 0);
writer->used = 0;
writer->stolen = 0;
writer->item_used = 0;
}
 
static struct intel_bo *
alloc_writer_bo(struct intel_winsys *winsys,
enum ilo_builder_writer_type which,
unsigned size)
{
static const char *writer_names[ILO_BUILDER_WRITER_COUNT] = {
[ILO_BUILDER_WRITER_BATCH] = "batch",
[ILO_BUILDER_WRITER_INSTRUCTION] = "instruction",
};
 
return intel_winsys_alloc_bo(winsys, writer_names[which], size, true);
}
 
static void *
map_writer_bo(struct intel_bo *bo, unsigned flags)
{
assert(flags & WRITER_FLAG_MAP);
 
if (flags & WRITER_FLAG_APPEND)
return intel_bo_map_gtt_async(bo);
else
return intel_bo_map(bo, true);
}
 
/**
* Allocate and map the buffer for writing.
*/
static bool
ilo_builder_writer_alloc_and_map(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
 
/* allocate a new bo when not appending */
if (!(writer->flags & WRITER_FLAG_APPEND) || !writer->bo) {
struct intel_bo *bo;
 
bo = alloc_writer_bo(builder->winsys, which, writer->size);
if (bo) {
intel_bo_unref(writer->bo);
writer->bo = bo;
} else if (writer->bo) {
/* reuse the old bo */
ilo_builder_writer_discard(builder, which);
} else {
return false;
}
 
writer->used = 0;
writer->stolen = 0;
writer->item_used = 0;
}
 
/* map the bo or allocate the staging system memory */
if (writer->flags & WRITER_FLAG_MAP)
writer->ptr = map_writer_bo(writer->bo, writer->flags);
else if (!writer->ptr)
writer->ptr = MALLOC(writer->size);
 
return (writer->ptr != NULL);
}
 
/**
* Unmap the buffer for submission.
*/
static bool
ilo_builder_writer_unmap(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
unsigned offset;
int err = 0;
 
if (writer->flags & WRITER_FLAG_MAP) {
intel_bo_unmap(writer->bo);
writer->ptr = NULL;
return true;
}
 
offset = builder->begin_used[which];
if (writer->used > offset) {
err = intel_bo_pwrite(writer->bo, offset, writer->used - offset,
(char *) writer->ptr + offset);
}
 
if (writer->stolen && !err) {
const unsigned offset = writer->size - writer->stolen;
err = intel_bo_pwrite(writer->bo, offset, writer->stolen,
(const char *) writer->ptr + offset);
}
 
/* keep writer->ptr */
 
return !err;
}
 
/**
* Grow a mapped writer to at least \p new_size.
*/
bool
ilo_builder_writer_grow(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned new_size, bool preserve)
{
struct ilo_builder_writer *writer = &builder->writers[which];
struct intel_bo *new_bo;
void *new_ptr;
 
if (!(writer->flags & WRITER_FLAG_GROW))
return false;
 
/* stolen data may already be referenced and cannot be moved */
if (writer->stolen)
return false;
 
if (new_size < writer->size << 1)
new_size = writer->size << 1;
/* STATE_BASE_ADDRESS requires page-aligned buffers */
new_size = align(new_size, 4096);
 
new_bo = alloc_writer_bo(builder->winsys, which, new_size);
if (!new_bo)
return false;
 
/* map and copy the data over */
if (writer->flags & WRITER_FLAG_MAP) {
new_ptr = map_writer_bo(new_bo, writer->flags);
 
/*
* When WRITER_FLAG_APPEND and WRITER_FLAG_GROW are both set, we may end
* up copying between two GTT-mapped BOs. That is slow. The issue
* could be solved by adding intel_bo_map_async(), or callers may choose
* to manually grow the writer without preserving the data.
*/
if (new_ptr && preserve)
memcpy(new_ptr, writer->ptr, writer->used);
} else if (preserve) {
new_ptr = REALLOC(writer->ptr, writer->size, new_size);
} else {
new_ptr = MALLOC(new_size);
}
 
if (!new_ptr) {
intel_bo_unref(new_bo);
return false;
}
 
if (writer->flags & WRITER_FLAG_MAP)
intel_bo_unmap(writer->bo);
else if (!preserve)
FREE(writer->ptr);
 
intel_bo_unref(writer->bo);
 
writer->size = new_size;
writer->bo = new_bo;
writer->ptr = new_ptr;
 
return true;
}
 
/**
* Record an item for later decoding.
*/
bool
ilo_builder_writer_record(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
enum ilo_builder_item_type type,
unsigned offset, unsigned size)
{
struct ilo_builder_writer *writer = &builder->writers[which];
struct ilo_builder_item *item;
 
if (writer->item_used == writer->item_alloc) {
const unsigned new_alloc = (writer->item_alloc) ?
writer->item_alloc << 1 : 256;
struct ilo_builder_item *items;
 
items = REALLOC(writer->items,
sizeof(writer->items[0]) * writer->item_alloc,
sizeof(writer->items[0]) * new_alloc);
if (!items)
return false;
 
writer->items = items;
writer->item_alloc = new_alloc;
}
 
item = &writer->items[writer->item_used++];
item->type = type;
item->offset = offset;
item->size = size;
 
return true;
}
 
/**
* Initialize the builder.
*/
void
ilo_builder_init(struct ilo_builder *builder,
const struct ilo_dev *dev,
struct intel_winsys *winsys)
{
int i;
 
memset(builder, 0, sizeof(*builder));
 
builder->dev = dev;
builder->winsys = winsys;
 
/* gen6_SURFACE_STATE() may override this */
switch (ilo_dev_gen(dev)) {
case ILO_GEN(8):
builder->mocs = GEN8_MOCS_MT_WB | GEN8_MOCS_CT_L3;
break;
case ILO_GEN(7.5):
case ILO_GEN(7):
builder->mocs = GEN7_MOCS_L3_WB;
break;
default:
builder->mocs = 0;
break;
}
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
ilo_builder_writer_init(builder, i);
}
 
/**
* Reset the builder and free all resources used. After resetting, the
* builder behaves as if it is newly initialized, except for potentially
* larger initial bo sizes.
*/
void
ilo_builder_reset(struct ilo_builder *builder)
{
int i;
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
ilo_builder_writer_reset(builder, i);
}
 
/**
* Allocate and map the BOs. It may re-allocate or reuse existing BOs if
* there is any.
*
* Most builder functions can only be called after ilo_builder_begin() and
* before ilo_builder_end().
*/
bool
ilo_builder_begin(struct ilo_builder *builder)
{
int i;
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) {
if (!ilo_builder_writer_alloc_and_map(builder, i)) {
ilo_builder_reset(builder);
return false;
}
 
builder->begin_used[i] = builder->writers[i].used;
}
 
builder->unrecoverable_error = false;
builder->sba_instruction_pos = 0;
 
return true;
}
 
/**
* Unmap BOs and make sure the written data landed the BOs. The batch buffer
* ready for submission is returned.
*/
struct intel_bo *
ilo_builder_end(struct ilo_builder *builder, unsigned *used)
{
struct ilo_builder_writer *bat;
int i;
 
ilo_builder_batch_patch_sba(builder);
 
assert(ilo_builder_validate(builder, 0, NULL));
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) {
if (!ilo_builder_writer_unmap(builder, i))
builder->unrecoverable_error = true;
}
 
if (builder->unrecoverable_error)
return NULL;
 
bat = &builder->writers[ILO_BUILDER_WRITER_BATCH];
 
*used = bat->used;
 
return bat->bo;
}
 
/**
* Return true if the builder is in a valid state, after accounting for the
* additional BOs specified. The additional BOs can be listed to avoid
* snapshotting and restoring when they are known ahead of time.
*
* The number of additional BOs should not be more than a few. Like two, for
* copying between two BOs.
*
* Callers must make sure the builder is in a valid state when
* ilo_builder_end() is called.
*/
bool
ilo_builder_validate(struct ilo_builder *builder,
unsigned bo_count, struct intel_bo **bos)
{
const unsigned max_bo_count = 2;
struct intel_bo *bos_to_submit[ILO_BUILDER_WRITER_COUNT + max_bo_count];
int i;
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
bos_to_submit[i] = builder->writers[i].bo;
 
if (bo_count) {
assert(bo_count <= max_bo_count);
if (bo_count > max_bo_count)
return false;
 
memcpy(&bos_to_submit[ILO_BUILDER_WRITER_COUNT],
bos, sizeof(*bos) * bo_count);
i += bo_count;
}
 
return intel_winsys_can_submit_bo(builder->winsys, bos_to_submit, i);
}
 
/**
* Take a snapshot of the writer state.
*/
void
ilo_builder_batch_snapshot(const struct ilo_builder *builder,
struct ilo_builder_snapshot *snapshot)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const struct ilo_builder_writer *writer = &builder->writers[which];
 
snapshot->reloc_count = intel_bo_get_reloc_count(writer->bo);
snapshot->used = writer->used;
snapshot->stolen = writer->stolen;
snapshot->item_used = writer->item_used;
}
 
/**
* Restore the writer state to when the snapshot was taken, except that it
* does not (unnecessarily) shrink BOs or the item array.
*/
void
ilo_builder_batch_restore(struct ilo_builder *builder,
const struct ilo_builder_snapshot *snapshot)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
struct ilo_builder_writer *writer = &builder->writers[which];
 
intel_bo_truncate_relocs(writer->bo, snapshot->reloc_count);
writer->used = snapshot->used;
writer->stolen = snapshot->stolen;
writer->item_used = snapshot->item_used;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder.h
0,0 → 1,557
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_H
#define ILO_BUILDER_H
 
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_debug.h"
#include "ilo_dev.h"
 
enum ilo_builder_writer_type {
ILO_BUILDER_WRITER_BATCH,
ILO_BUILDER_WRITER_INSTRUCTION,
 
ILO_BUILDER_WRITER_COUNT,
};
 
enum ilo_builder_item_type {
/* for dynamic buffer */
ILO_BUILDER_ITEM_BLOB,
ILO_BUILDER_ITEM_CLIP_VIEWPORT,
ILO_BUILDER_ITEM_SF_VIEWPORT,
ILO_BUILDER_ITEM_SCISSOR_RECT,
ILO_BUILDER_ITEM_CC_VIEWPORT,
ILO_BUILDER_ITEM_COLOR_CALC,
ILO_BUILDER_ITEM_DEPTH_STENCIL,
ILO_BUILDER_ITEM_BLEND,
ILO_BUILDER_ITEM_SAMPLER,
ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR,
 
/* for surface buffer */
ILO_BUILDER_ITEM_SURFACE,
ILO_BUILDER_ITEM_BINDING_TABLE,
 
/* for instruction buffer */
ILO_BUILDER_ITEM_KERNEL,
 
ILO_BUILDER_ITEM_COUNT,
};
 
struct ilo_builder_item {
enum ilo_builder_item_type type;
unsigned offset;
unsigned size;
};
 
struct ilo_builder_writer {
/* internal flags */
unsigned flags;
 
unsigned size;
struct intel_bo *bo;
void *ptr;
 
/* data written to the bottom */
unsigned used;
/* data written to the top */
unsigned stolen;
 
/* for decoding */
struct ilo_builder_item *items;
unsigned item_alloc;
unsigned item_used;
};
 
/**
* A snapshot of the writer state.
*/
struct ilo_builder_snapshot {
unsigned reloc_count;
 
unsigned used;
unsigned stolen;
unsigned item_used;
};
 
struct ilo_builder {
const struct ilo_dev *dev;
struct intel_winsys *winsys;
uint32_t mocs;
 
struct ilo_builder_writer writers[ILO_BUILDER_WRITER_COUNT];
bool unrecoverable_error;
 
/* for writers that have their data appended */
unsigned begin_used[ILO_BUILDER_WRITER_COUNT];
 
/* for STATE_BASE_ADDRESS */
unsigned sba_instruction_pos;
};
 
void
ilo_builder_init(struct ilo_builder *builder,
const struct ilo_dev *dev,
struct intel_winsys *winsys);
 
void
ilo_builder_reset(struct ilo_builder *builder);
 
void
ilo_builder_decode(struct ilo_builder *builder);
 
bool
ilo_builder_begin(struct ilo_builder *builder);
 
struct intel_bo *
ilo_builder_end(struct ilo_builder *builder, unsigned *used);
 
bool
ilo_builder_validate(struct ilo_builder *builder,
unsigned bo_count, struct intel_bo **bos);
 
/**
* Return true if the builder has a relocation entry for \p bo.
*/
static inline bool
ilo_builder_has_reloc(const struct ilo_builder *builder,
struct intel_bo *bo)
{
int i;
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) {
const struct ilo_builder_writer *writer = &builder->writers[i];
if (intel_bo_has_reloc(writer->bo, bo))
return true;
}
 
return false;
}
 
void
ilo_builder_writer_discard(struct ilo_builder *builder,
enum ilo_builder_writer_type which);
 
bool
ilo_builder_writer_grow(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned new_size, bool preserve);
 
bool
ilo_builder_writer_record(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
enum ilo_builder_item_type type,
unsigned offset, unsigned size);
 
static inline void
ilo_builder_writer_checked_record(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
enum ilo_builder_item_type item,
unsigned offset, unsigned size)
{
if (unlikely(ilo_debug & (ILO_DEBUG_BATCH | ILO_DEBUG_HANG))) {
if (!ilo_builder_writer_record(builder, which, item, offset, size)) {
builder->unrecoverable_error = true;
builder->writers[which].item_used = 0;
}
}
}
 
/**
* Return an offset to a region that is aligned to \p alignment and has at
* least \p size bytes. The region is reserved from the bottom.
*/
static inline unsigned
ilo_builder_writer_reserve_bottom(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned alignment, unsigned size)
{
struct ilo_builder_writer *writer = &builder->writers[which];
unsigned offset;
 
assert(alignment && util_is_power_of_two(alignment));
offset = align(writer->used, alignment);
 
if (unlikely(offset + size > writer->size - writer->stolen)) {
if (!ilo_builder_writer_grow(builder, which,
offset + size + writer->stolen, true)) {
builder->unrecoverable_error = true;
ilo_builder_writer_discard(builder, which);
offset = 0;
}
 
assert(offset + size <= writer->size - writer->stolen);
}
 
return offset;
}
 
/**
* Similar to ilo_builder_writer_reserve_bottom(), but reserve from the top.
*/
static inline unsigned
ilo_builder_writer_reserve_top(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned alignment, unsigned size)
{
struct ilo_builder_writer *writer = &builder->writers[which];
unsigned offset;
 
assert(alignment && util_is_power_of_two(alignment));
offset = (writer->size - writer->stolen - size) & ~(alignment - 1);
 
if (unlikely(offset < writer->used ||
size > writer->size - writer->stolen)) {
if (!ilo_builder_writer_grow(builder, which,
align(writer->used, alignment) + size + writer->stolen, true)) {
builder->unrecoverable_error = true;
ilo_builder_writer_discard(builder, which);
}
 
offset = (writer->size - writer->stolen - size) & ~(alignment - 1);
assert(offset + size <= writer->size - writer->stolen);
}
 
return offset;
}
 
/**
* Add a relocation entry to the writer.
*/
static inline void
ilo_builder_writer_reloc(struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned offset, struct intel_bo *bo,
unsigned bo_offset, unsigned reloc_flags,
bool write_presumed_offset_hi)
{
struct ilo_builder_writer *writer = &builder->writers[which];
uint64_t presumed_offset;
int err;
 
if (write_presumed_offset_hi)
ILO_DEV_ASSERT(builder->dev, 8, 8);
else
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
assert(offset + sizeof(uint32_t) <= writer->used ||
(offset >= writer->size - writer->stolen &&
offset + sizeof(uint32_t) <= writer->size));
 
err = intel_bo_add_reloc(writer->bo, offset, bo, bo_offset,
reloc_flags, &presumed_offset);
if (unlikely(err))
builder->unrecoverable_error = true;
 
if (write_presumed_offset_hi) {
*((uint64_t *) ((char *) writer->ptr + offset)) = presumed_offset;
} else {
/* 32-bit addressing */
assert(presumed_offset == (uint64_t) ((uint32_t) presumed_offset));
*((uint32_t *) ((char *) writer->ptr + offset)) = presumed_offset;
}
}
 
/**
* Reserve a region from the dynamic buffer. Both the offset, in bytes, and
* the pointer to the reserved region are returned. The pointer is only valid
* until the next reserve call.
*
* Note that \p alignment is in bytes and \p len is in DWords.
*/
static inline uint32_t
ilo_builder_dynamic_pointer(struct ilo_builder *builder,
enum ilo_builder_item_type item,
unsigned alignment, unsigned len,
uint32_t **dw)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const unsigned size = len << 2;
const unsigned offset = ilo_builder_writer_reserve_top(builder,
which, alignment, size);
struct ilo_builder_writer *writer = &builder->writers[which];
 
/* all states are at least aligned to 32-bytes */
if (item != ILO_BUILDER_ITEM_BLOB)
assert(alignment % 32 == 0);
 
*dw = (uint32_t *) ((char *) writer->ptr + offset);
 
writer->stolen = writer->size - offset;
 
ilo_builder_writer_checked_record(builder, which, item, offset, size);
 
return offset;
}
 
/**
* Write a dynamic state to the dynamic buffer.
*/
static inline uint32_t
ilo_builder_dynamic_write(struct ilo_builder *builder,
enum ilo_builder_item_type item,
unsigned alignment, unsigned len,
const uint32_t *dw)
{
uint32_t offset, *dst;
 
offset = ilo_builder_dynamic_pointer(builder, item, alignment, len, &dst);
memcpy(dst, dw, len << 2);
 
return offset;
}
 
/**
* Reserve some space from the top (for prefetches).
*/
static inline void
ilo_builder_dynamic_pad_top(struct ilo_builder *builder, unsigned len)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const unsigned size = len << 2;
struct ilo_builder_writer *writer = &builder->writers[which];
 
if (writer->stolen < size) {
ilo_builder_writer_reserve_top(builder, which,
1, size - writer->stolen);
writer->stolen = size;
}
}
 
static inline unsigned
ilo_builder_dynamic_used(const struct ilo_builder *builder)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const struct ilo_builder_writer *writer = &builder->writers[which];
 
return writer->stolen >> 2;
}
 
/**
* Reserve a region from the surface buffer. Both the offset, in bytes, and
* the pointer to the reserved region are returned. The pointer is only valid
* until the next reserve call.
*
* Note that \p alignment is in bytes and \p len is in DWords.
*/
static inline uint32_t
ilo_builder_surface_pointer(struct ilo_builder *builder,
enum ilo_builder_item_type item,
unsigned alignment, unsigned len,
uint32_t **dw)
{
assert(item == ILO_BUILDER_ITEM_SURFACE ||
item == ILO_BUILDER_ITEM_BINDING_TABLE);
 
return ilo_builder_dynamic_pointer(builder, item, alignment, len, dw);
}
 
/**
* Add a relocation entry for a DWord of a surface state.
*/
static inline void
ilo_builder_surface_reloc(struct ilo_builder *builder,
uint32_t offset, unsigned dw_index,
struct intel_bo *bo, unsigned bo_offset,
unsigned reloc_flags)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
 
ilo_builder_writer_reloc(builder, which, offset + (dw_index << 2),
bo, bo_offset, reloc_flags, false);
}
 
static inline void
ilo_builder_surface_reloc64(struct ilo_builder *builder,
uint32_t offset, unsigned dw_index,
struct intel_bo *bo, unsigned bo_offset,
unsigned reloc_flags)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
 
ilo_builder_writer_reloc(builder, which, offset + (dw_index << 2),
bo, bo_offset, reloc_flags, true);
}
 
static inline unsigned
ilo_builder_surface_used(const struct ilo_builder *builder)
{
return ilo_builder_dynamic_used(builder);
}
 
/**
* Write a kernel to the instruction buffer. The offset, in bytes, of the
* kernel is returned.
*/
static inline uint32_t
ilo_builder_instruction_write(struct ilo_builder *builder,
unsigned size, const void *kernel)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_INSTRUCTION;
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 112:
*
* "Due to prefetch of the instruction stream, the EUs may attempt to
* access up to 8 instructions (128 bytes) beyond the end of the
* kernel program - possibly into the next memory page. Although
* these instructions will not be executed, software must account for
* the prefetch in order to avoid invalid page access faults."
*/
const unsigned reserved_size = size + 128;
/* kernels are aligned to 64 bytes */
const unsigned alignment = 64;
const unsigned offset = ilo_builder_writer_reserve_bottom(builder,
which, alignment, reserved_size);
struct ilo_builder_writer *writer = &builder->writers[which];
 
memcpy((char *) writer->ptr + offset, kernel, size);
 
writer->used = offset + size;
 
ilo_builder_writer_checked_record(builder, which,
ILO_BUILDER_ITEM_KERNEL, offset, size);
 
return offset;
}
 
/**
* Reserve a region from the batch buffer. Both the offset, in DWords, and
* the pointer to the reserved region are returned. The pointer is only valid
* until the next reserve call.
*
* Note that \p len is in DWords.
*/
static inline unsigned
ilo_builder_batch_pointer(struct ilo_builder *builder,
unsigned len, uint32_t **dw)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
/*
* We know the batch bo is always aligned. Using 1 here should allow the
* compiler to optimize away aligning.
*/
const unsigned alignment = 1;
const unsigned size = len << 2;
const unsigned offset = ilo_builder_writer_reserve_bottom(builder,
which, alignment, size);
struct ilo_builder_writer *writer = &builder->writers[which];
 
assert(offset % 4 == 0);
*dw = (uint32_t *) ((char *) writer->ptr + offset);
 
writer->used = offset + size;
 
return offset >> 2;
}
 
/**
* Write a command to the batch buffer.
*/
static inline unsigned
ilo_builder_batch_write(struct ilo_builder *builder,
unsigned len, const uint32_t *dw)
{
unsigned pos;
uint32_t *dst;
 
pos = ilo_builder_batch_pointer(builder, len, &dst);
memcpy(dst, dw, len << 2);
 
return pos;
}
 
/**
* Add a relocation entry for a DWord of a command.
*/
static inline void
ilo_builder_batch_reloc(struct ilo_builder *builder, unsigned pos,
struct intel_bo *bo, unsigned bo_offset,
unsigned reloc_flags)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
 
ilo_builder_writer_reloc(builder, which, pos << 2,
bo, bo_offset, reloc_flags, false);
}
 
static inline void
ilo_builder_batch_reloc64(struct ilo_builder *builder, unsigned pos,
struct intel_bo *bo, unsigned bo_offset,
unsigned reloc_flags)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
 
ilo_builder_writer_reloc(builder, which, pos << 2,
bo, bo_offset, reloc_flags, true);
}
 
static inline unsigned
ilo_builder_batch_used(const struct ilo_builder *builder)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const struct ilo_builder_writer *writer = &builder->writers[which];
 
return writer->used >> 2;
}
 
static inline unsigned
ilo_builder_batch_space(const struct ilo_builder *builder)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const struct ilo_builder_writer *writer = &builder->writers[which];
 
return (writer->size - writer->stolen - writer->used) >> 2;
}
 
static inline void
ilo_builder_batch_discard(struct ilo_builder *builder)
{
ilo_builder_writer_discard(builder, ILO_BUILDER_WRITER_BATCH);
}
 
static inline void
ilo_builder_batch_print_stats(const struct ilo_builder *builder)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const struct ilo_builder_writer *writer = &builder->writers[which];
 
ilo_printf("%d+%d bytes (%d%% full)\n",
writer->used, writer->stolen,
(writer->used + writer->stolen) * 100 / writer->size);
}
 
void
ilo_builder_batch_snapshot(const struct ilo_builder *builder,
struct ilo_builder_snapshot *snapshot);
 
void
ilo_builder_batch_restore(struct ilo_builder *builder,
const struct ilo_builder_snapshot *snapshot);
 
#endif /* ILO_BUILDER_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_3d.h
0,0 → 1,92
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_3D_H
#define ILO_BUILDER_3D_H
 
#include "genhw/genhw.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_builder_3d_top.h"
#include "ilo_builder_3d_bottom.h"
 
static inline void
gen6_3DPRIMITIVE(struct ilo_builder *builder,
const struct pipe_draw_info *info,
const struct ilo_ib_state *ib)
{
const uint8_t cmd_len = 6;
const int prim = gen6_3d_translate_pipe_prim(info->mode);
const int vb_access = (info->indexed) ?
GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
const uint32_t vb_start = info->start +
((info->indexed) ? ib->draw_start_offset : 0);
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
vb_access |
prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
(cmd_len - 2);
dw[1] = info->count;
dw[2] = vb_start;
dw[3] = info->instance_count;
dw[4] = info->start_instance;
dw[5] = info->index_bias;
}
 
static inline void
gen7_3DPRIMITIVE(struct ilo_builder *builder,
const struct pipe_draw_info *info,
const struct ilo_ib_state *ib)
{
const uint8_t cmd_len = 7;
const int prim = gen6_3d_translate_pipe_prim(info->mode);
const int vb_access = (info->indexed) ?
GEN7_3DPRIM_DW1_ACCESS_RANDOM : GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
const uint32_t vb_start = info->start +
((info->indexed) ? ib->draw_start_offset : 0);
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2);
dw[1] = vb_access | prim;
dw[2] = info->count;
dw[3] = vb_start;
dw[4] = info->instance_count;
dw[5] = info->start_instance;
dw[6] = info->index_bias;
}
 
#endif /* ILO_BUILDER_3D_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
0,0 → 1,1815
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_3D_BOTTOM_H
#define ILO_BUILDER_3D_BOTTOM_H
 
#include "genhw/genhw.h"
#include "../ilo_shader.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_format.h"
#include "ilo_builder.h"
#include "ilo_builder_3d_top.h"
 
static inline void
gen6_3DSTATE_CLIP(struct ilo_builder *builder,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
bool enable_guardband,
int num_viewports)
{
const uint8_t cmd_len = 4;
uint32_t dw1, dw2, dw3, *dw;
int interps;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
dw1 = rasterizer->clip.payload[0];
dw2 = rasterizer->clip.payload[1];
dw3 = rasterizer->clip.payload[2];
 
if (enable_guardband && rasterizer->clip.can_enable_guardband)
dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
 
interps = (fs) ? ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
 
if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
GEN6_INTERP_NONPERSPECTIVE_CENTROID |
GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
 
dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
(num_viewports - 1);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
dw[1] = dw1;
dw[2] = dw2;
dw[3] = dw3;
}
 
static inline void
gen6_disable_3DSTATE_CLIP(struct ilo_builder *builder)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
}
 
static inline void
gen7_internal_3dstate_sf(struct ilo_builder *builder,
uint8_t cmd_len, uint32_t *dw,
const struct ilo_rasterizer_sf *sf,
int num_samples)
{
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
assert(cmd_len == 7);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
 
if (!sf) {
dw[1] = 0;
dw[2] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
 
return;
}
 
/* see rasterizer_init_sf_gen6() */
STATIC_ASSERT(Elements(sf->payload) >= 3);
dw[1] = sf->payload[0];
dw[2] = sf->payload[1];
dw[3] = sf->payload[2];
 
if (num_samples > 1)
dw[2] |= sf->dw_msaa;
 
dw[4] = sf->dw_depth_offset_const;
dw[5] = sf->dw_depth_offset_scale;
dw[6] = sf->dw_depth_offset_clamp;
}
 
static inline void
gen8_internal_3dstate_sbe(struct ilo_builder *builder,
uint8_t cmd_len, uint32_t *dw,
const struct ilo_shader_state *fs,
int sprite_coord_mode)
{
const struct ilo_kernel_routing *routing;
int vue_offset, vue_len, out_count;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(cmd_len == 4);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
 
if (!fs) {
dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
dw[2] = 0;
dw[3] = 0;
return;
}
 
routing = ilo_shader_get_kernel_routing(fs);
 
vue_offset = routing->source_skip;
assert(vue_offset % 2 == 0);
vue_offset /= 2;
 
vue_len = (routing->source_len + 1) / 2;
if (!vue_len)
vue_len = 1;
 
out_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
assert(out_count <= 32);
 
dw[1] = out_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[1] |= GEN8_SBE_DW1_USE_URB_READ_LEN |
GEN8_SBE_DW1_USE_URB_READ_OFFSET |
vue_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT;
} else {
dw[1] |= vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
}
 
if (routing->swizzle_enable)
dw[1] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
 
switch (sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
break;
case PIPE_SPRITE_COORD_LOWER_LEFT:
dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
break;
}
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 268:
*
* "This field (Point Sprite Texture Coordinate Enable) must be
* programmed to 0 when non-point primitives are rendered."
*
* TODO We do not check that yet.
*/
dw[2] = routing->point_sprite_enable;
 
dw[3] = routing->const_interp_enable;
}
 
static inline void
gen8_internal_3dstate_sbe_swiz(struct ilo_builder *builder,
uint8_t cmd_len, uint32_t *dw,
const struct ilo_shader_state *fs)
{
const struct ilo_kernel_routing *routing;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(cmd_len == 11);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2);
 
if (!fs) {
memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1));
return;
}
 
routing = ilo_shader_get_kernel_routing(fs);
 
STATIC_ASSERT(sizeof(routing->swizzles) >= sizeof(*dw) * 8);
memcpy(&dw[1], routing->swizzles, sizeof(*dw) * 8);
 
/* WrapShortest enables */
dw[9] = 0;
dw[10] = 0;
}
 
static inline void
gen6_3DSTATE_SF(struct ilo_builder *builder,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
int sample_count)
{
const uint8_t cmd_len = 20;
uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11];
uint32_t gen7_3dstate_sf[7];
const struct ilo_rasterizer_sf *sf;
int sprite_coord_mode;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
sf = (rasterizer) ? &rasterizer->sf : NULL;
sprite_coord_mode = (rasterizer) ? rasterizer->state.sprite_coord_mode : 0;
 
gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe),
gen8_3dstate_sbe, fs, sprite_coord_mode);
gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz),
gen8_3dstate_sbe_swiz, fs);
gen7_internal_3dstate_sf(builder, Elements(gen7_3dstate_sf),
gen7_3dstate_sf, sf, sample_count);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
dw[1] = gen8_3dstate_sbe[1];
memcpy(&dw[2], &gen7_3dstate_sf[1], sizeof(*dw) * 6);
memcpy(&dw[8], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8);
dw[16] = gen8_3dstate_sbe[2];
dw[17] = gen8_3dstate_sbe[3];
dw[18] = gen8_3dstate_sbe_swiz[9];
dw[19] = gen8_3dstate_sbe_swiz[10];
}
 
static inline void
gen7_3DSTATE_SF(struct ilo_builder *builder,
const struct ilo_rasterizer_sf *sf,
enum pipe_format zs_format,
int sample_count)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
gen7_internal_3dstate_sf(builder, cmd_len, dw, sf, sample_count);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
int hw_format;
 
/* separate stencil */
switch (zs_format) {
case PIPE_FORMAT_Z16_UNORM:
hw_format = GEN6_ZFORMAT_D16_UNORM;
break;
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
hw_format = GEN6_ZFORMAT_D32_FLOAT;
break;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
hw_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
break;
default:
/* FLOAT surface is assumed when there is no depth buffer */
hw_format = GEN6_ZFORMAT_D32_FLOAT;
break;
}
 
dw[1] |= hw_format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
}
}
 
static inline void
gen8_3DSTATE_SF(struct ilo_builder *builder,
const struct ilo_rasterizer_sf *sf)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
 
/* see rasterizer_init_sf_gen8() */
STATIC_ASSERT(Elements(sf->payload) >= 3);
dw[1] = sf->payload[0];
dw[2] = sf->payload[1];
dw[3] = sf->payload[2];
}
 
static inline void
gen7_3DSTATE_SBE(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
int sprite_coord_mode)
{
const uint8_t cmd_len = 14;
uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11];
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe),
gen8_3dstate_sbe, fs, sprite_coord_mode);
gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz),
gen8_3dstate_sbe_swiz, fs);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
dw[1] = gen8_3dstate_sbe[1];
memcpy(&dw[2], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8);
dw[10] = gen8_3dstate_sbe[2];
dw[11] = gen8_3dstate_sbe[3];
dw[12] = gen8_3dstate_sbe_swiz[9];
dw[13] = gen8_3dstate_sbe_swiz[10];
}
 
static inline void
gen8_3DSTATE_SBE(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
int sprite_coord_mode)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
gen8_internal_3dstate_sbe(builder, cmd_len, dw, fs, sprite_coord_mode);
}
 
static inline void
gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder,
const struct ilo_shader_state *fs)
{
const uint8_t cmd_len = 11;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
gen8_internal_3dstate_sbe_swiz(builder, cmd_len, dw, fs);
}
 
static inline void
gen8_3DSTATE_RASTER(struct ilo_builder *builder,
const struct ilo_rasterizer_sf *sf)
{
const uint8_t cmd_len = 5;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_RASTER) | (cmd_len - 2);
dw[1] = sf->dw_raster;
dw[2] = sf->dw_depth_offset_const;
dw[3] = sf->dw_depth_offset_scale;
dw[4] = sf->dw_depth_offset_clamp;
}
 
static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
const struct ilo_rasterizer_state *rasterizer,
bool dual_blend, bool cc_may_kill)
{
const uint8_t cmd_len = 9;
const int num_samples = 1;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5, dw6, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
cso = ilo_shader_get_kernel_cso(fs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
dw6 = cso->payload[3];
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "This bit (Statistics Enable) must be disabled if either of these
* bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
* Enable or Depth Buffer Resolve Enable."
*/
dw4 |= GEN6_WM_DW4_STATISTICS;
 
if (cc_may_kill)
dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE;
 
if (dual_blend)
dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
 
dw5 |= rasterizer->wm.payload[0];
 
dw6 |= rasterizer->wm.payload[1];
 
if (num_samples > 1) {
dw6 |= rasterizer->wm.dw_msaa_rast |
rasterizer->wm.dw_msaa_disp;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(fs);
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
dw[6] = dw6;
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
}
 
static inline void
gen6_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op)
{
const uint8_t cmd_len = 9;
const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = hiz_op;
/* honor the valid range even if dispatching is disabled */
dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
dw[6] = 0;
dw[7] = 0;
dw[8] = 0;
}
 
static inline void
gen7_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
const struct ilo_rasterizer_state *rasterizer,
bool cc_may_kill)
{
const uint8_t cmd_len = 3;
const int num_samples = 1;
const struct ilo_shader_cso *cso;
uint32_t dw1, dw2, *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
/* see rasterizer_init_wm_gen7() */
dw1 = rasterizer->wm.payload[0];
dw2 = rasterizer->wm.payload[1];
 
/* see fs_init_cso_gen7() */
cso = ilo_shader_get_kernel_cso(fs);
dw1 |= cso->payload[3];
 
dw1 |= GEN7_WM_DW1_STATISTICS;
 
if (cc_may_kill)
dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL;
 
if (num_samples > 1) {
dw1 |= rasterizer->wm.dw_msaa_rast;
dw2 |= rasterizer->wm.dw_msaa_disp;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = dw1;
dw[2] = dw2;
}
 
static inline void
gen8_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
const struct ilo_rasterizer_state *rasterizer)
{
const uint8_t cmd_len = 2;
const struct ilo_shader_cso *cso;
uint32_t dw1, interps, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
/* see rasterizer_get_wm_gen8() */
dw1 = rasterizer->wm.payload[0];
dw1 |= GEN7_WM_DW1_STATISTICS;
 
/* see fs_init_cso_gen8() */
cso = ilo_shader_get_kernel_cso(fs);
interps = cso->payload[4];
 
assert(!(dw1 & interps));
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = dw1 | interps;
}
 
static inline void
gen7_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op)
{
const uint8_t cmd_len = 3;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = hiz_op;
dw[2] = 0;
}
 
static inline void
gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_builder *builder,
const struct ilo_dsa_state *dsa)
{
const uint8_t cmd_len = 3;
uint32_t dw1, dw2, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
dw1 = dsa->payload[0];
dw2 = dsa->payload[1];
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_DEPTH_STENCIL) | (cmd_len - 2);
dw[1] = dw1;
dw[2] = dw2;
}
 
static inline void
gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, uint32_t op,
uint16_t width, uint16_t height, int sample_count)
{
const uint8_t cmd_len = 5;
const uint32_t sample_mask = ((1 << sample_count) - 1) | 0x1;
uint32_t dw1, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
dw1 = op;
 
switch (sample_count) {
case 0:
case 1:
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1;
break;
case 2:
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_2;
break;
case 4:
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_4;
break;
case 8:
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_8;
break;
case 16:
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_16;
break;
default:
assert(!"unsupported sample count");
dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1;
break;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2);
dw[1] = dw1;
dw[2] = 0;
/* exclusive? */
dw[3] = height << 16 | width;
dw[4] = sample_mask;
}
 
static inline void
gen8_disable_3DSTATE_WM_HZ_OP(struct ilo_builder *builder)
{
const uint8_t cmd_len = 5;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
}
 
static inline void
gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_CHROMAKEY) | (cmd_len - 2);
dw[1] = 0;
}
 
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
bool dual_blend)
{
const uint8_t cmd_len = 8;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5, *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
/* see fs_init_cso_gen7() */
cso = ilo_shader_get_kernel_cso(fs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
 
if (dual_blend)
dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(fs);
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
}
 
static inline void
gen7_disable_3DSTATE_PS(struct ilo_builder *builder)
{
const uint8_t cmd_len = 8;
int max_threads;
uint32_t dw4, *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
/* GPU hangs if none of the dispatch enable bits is set */
dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
 
/* see brwCreateContext() */
switch (ilo_dev_gen(builder->dev)) {
case ILO_GEN(7.5):
max_threads = (builder->dev->gt == 3) ? 408 :
(builder->dev->gt == 2) ? 204 : 102;
dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
break;
case ILO_GEN(7):
default:
max_threads = (builder->dev->gt == 2) ? 172 : 48;
dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
break;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = dw4;
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
}
 
static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_shader_state *fs)
{
const uint8_t cmd_len = 12;
const struct ilo_shader_cso *cso;
uint32_t dw3, dw6, dw7, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
/* see fs_init_cso_gen8() */
cso = ilo_shader_get_kernel_cso(fs);
dw3 = cso->payload[0];
dw6 = cso->payload[1];
dw7 = cso->payload[2];
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(fs);
dw[2] = 0;
dw[3] = dw3;
dw[4] = 0; /* scratch */
dw[5] = 0;
dw[6] = dw6;
dw[7] = dw7;
dw[8] = 0; /* kernel 1 */
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
dw[11] = 0;
}
 
static inline void
gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder,
const struct ilo_shader_state *fs,
bool cc_may_kill, bool per_sample)
{
const uint8_t cmd_len = 2;
const struct ilo_shader_cso *cso;
uint32_t dw1, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
/* see fs_init_cso_gen8() */
cso = ilo_shader_get_kernel_cso(fs);
dw1 = cso->payload[3];
 
if (cc_may_kill)
dw1 |= GEN8_PSX_DW1_DISPATCH_ENABLE | GEN8_PSX_DW1_KILL_PIXEL;
if (per_sample)
dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2);
dw[1] = dw1;
}
 
static inline void
gen8_3DSTATE_PS_BLEND(struct ilo_builder *builder,
const struct ilo_blend_state *blend,
const struct ilo_fb_state *fb,
const struct ilo_dsa_state *dsa)
{
const uint8_t cmd_len = 2;
uint32_t dw1, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
dw1 = 0;
if (blend->alpha_to_coverage && fb->num_samples > 1)
dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE;
 
if (fb->state.nr_cbufs && fb->state.cbufs[0]) {
const struct ilo_fb_blend_caps *caps = &fb->blend_caps[0];
 
dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT;
if (caps->can_blend) {
if (caps->dst_alpha_forced_one)
dw1 |= blend->dw_ps_blend_dst_alpha_forced_one;
else
dw1 |= blend->dw_ps_blend;
}
 
if (caps->can_alpha_test)
dw1 |= dsa->dw_ps_blend_alpha;
} else {
dw1 |= dsa->dw_ps_blend_alpha;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_BLEND) | (cmd_len - 2);
dw[1] = dw1;
}
 
static inline void
gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen6_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS,
bufs, sizes, num_bufs);
}
 
static inline void
gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS,
bufs, sizes, num_bufs);
}
 
static inline void
gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder,
uint32_t binding_table)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS,
binding_table);
}
 
static inline void
gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder,
uint32_t sampler_state)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS,
sampler_state);
}
 
static inline void
gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
int num_samples, const uint32_t *pattern,
bool pixel_location_center)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3;
uint32_t dw1, dw2, dw3, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER :
GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
 
switch (num_samples) {
case 0:
case 1:
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
dw2 = 0;
dw3 = 0;
break;
case 4:
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
dw2 = pattern[0];
dw3 = 0;
break;
case 8:
assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7));
dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
dw2 = pattern[0];
dw3 = pattern[1];
break;
default:
assert(!"unsupported sample count");
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
dw2 = 0;
dw3 = 0;
break;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
dw[1] = dw1;
dw[2] = dw2;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
dw[3] = dw3;
}
 
static inline void
gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
int num_samples,
bool pixel_location_center)
{
const uint8_t cmd_len = 2;
uint32_t dw1, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER :
GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
 
switch (num_samples) {
case 0:
case 1:
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
break;
case 2:
dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2;
break;
case 4:
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
break;
case 8:
dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
break;
case 16:
dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16;
break;
default:
assert(!"unsupported sample count");
dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
break;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
dw[1] = dw1;
}
 
static inline void
gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder,
const uint32_t *pattern_1x,
const uint32_t *pattern_2x,
const uint32_t *pattern_4x,
const uint32_t *pattern_8x,
const uint32_t *pattern_16x)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SAMPLE_PATTERN) | (cmd_len - 2);
dw[1] = pattern_16x[3];
dw[2] = pattern_16x[2];
dw[3] = pattern_16x[1];
dw[4] = pattern_16x[0];
dw[5] = pattern_8x[1];
dw[6] = pattern_8x[0];
dw[7] = pattern_4x[0];
dw[8] = pattern_1x[0] << 16 |
pattern_2x[0];
}
 
static inline void
gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
unsigned sample_mask)
{
const uint8_t cmd_len = 2;
const unsigned valid_mask = 0xf;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
sample_mask &= valid_mask;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2);
dw[1] = sample_mask;
}
 
static inline void
gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
unsigned sample_mask,
int num_samples)
{
const uint8_t cmd_len = 2;
const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 294:
*
* "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
* (Sample Mask) must be zero.
*
* If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
* must be zero."
*/
sample_mask &= valid_mask;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2);
dw[1] = sample_mask;
}
 
static inline void
gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder,
unsigned x, unsigned y,
unsigned width, unsigned height)
{
const uint8_t cmd_len = 4;
unsigned xmax = x + width - 1;
unsigned ymax = y + height - 1;
unsigned rect_limit;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
rect_limit = 16383;
}
else {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 230:
*
* "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
* must be an even number"
*/
assert(y % 2 == 0);
 
rect_limit = 8191;
}
 
if (x > rect_limit) x = rect_limit;
if (y > rect_limit) y = rect_limit;
if (xmax > rect_limit) xmax = rect_limit;
if (ymax > rect_limit) ymax = rect_limit;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | (cmd_len - 2);
dw[1] = y << 16 | x;
dw[2] = ymax << 16 | xmax;
/*
* There is no need to set the origin. It is intended to support front
* buffer rendering.
*/
dw[3] = 0;
}
 
static inline void
gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder,
int x_offset, int y_offset)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(x_offset >= 0 && x_offset <= 31);
assert(y_offset >= 0 && y_offset <= 31);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | (cmd_len - 2);
dw[1] = x_offset << 8 | y_offset;
}
 
static inline void
gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder,
const struct pipe_poly_stipple *pattern)
{
const uint8_t cmd_len = 33;
uint32_t *dw;
int i;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | (cmd_len - 2);
dw++;
 
STATIC_ASSERT(Elements(pattern->stipple) == 32);
for (i = 0; i < 32; i++)
dw[i] = pattern->stipple[i];
}
 
static inline void
gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder,
unsigned pattern, unsigned factor)
{
const uint8_t cmd_len = 3;
unsigned inverse;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert((pattern & 0xffff) == pattern);
assert(factor >= 1 && factor <= 256);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | (cmd_len - 2);
dw[1] = pattern;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
/* in U1.16 */
inverse = 65536 / factor;
 
dw[2] = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
factor;
}
else {
/* in U1.13 */
inverse = 8192 / factor;
 
dw[2] = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
factor;
}
}
 
static inline void
gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder)
{
const uint8_t cmd_len = 3;
const uint32_t dw[3] = {
GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2),
0 << GEN6_AA_LINE_DW1_BIAS__SHIFT | 0,
0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT | 0,
};
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
ilo_builder_batch_write(builder, cmd_len, dw);
}
 
static inline void
gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
const struct ilo_zs_surface *zs,
bool aligned_8x4)
{
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER);
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 7;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
dw[1] = zs->payload[0];
dw[2] = 0;
 
/* see ilo_gpe_init_zs_surface() */
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[3] = 0;
dw[4] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2];
dw[5] = zs->payload[3];
dw[6] = zs->payload[4];
dw[7] = zs->payload[5];
 
dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
 
if (zs->bo) {
ilo_builder_batch_reloc64(builder, pos + 2, zs->bo,
zs->payload[1], INTEL_RELOC_WRITE);
}
} else {
dw[3] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2];
dw[4] = zs->payload[3];
dw[5] = zs->payload[4];
dw[6] = zs->payload[5];
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
dw[4] |= builder->mocs << GEN7_DEPTH_DW4_MOCS__SHIFT;
else
dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
 
if (zs->bo) {
ilo_builder_batch_reloc(builder, pos + 2, zs->bo,
zs->payload[1], INTEL_RELOC_WRITE);
}
}
}
 
static inline void
gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
const struct ilo_zs_surface *zs)
{
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER);
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
/* see ilo_gpe_init_zs_surface() */
dw[1] = zs->payload[6];
dw[2] = 0;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
 
dw[3] = 0;
dw[4] = zs->payload[8];
 
if (zs->separate_s8_bo) {
ilo_builder_batch_reloc64(builder, pos + 2,
zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
}
} else {
dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
 
if (zs->separate_s8_bo) {
ilo_builder_batch_reloc(builder, pos + 2,
zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
}
}
}
 
static inline void
gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
const struct ilo_zs_surface *zs)
{
const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER);
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
/* see ilo_gpe_init_zs_surface() */
dw[1] = zs->payload[9];
dw[2] = 0;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
 
dw[3] = 0;
dw[4] = zs->payload[11];
 
if (zs->hiz_bo) {
ilo_builder_batch_reloc64(builder, pos + 2,
zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
}
} else {
dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
 
if (zs->hiz_bo) {
ilo_builder_batch_reloc(builder, pos + 2,
zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
}
}
}
 
static inline void
gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
uint32_t clear_val)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
GEN6_CLEAR_PARAMS_DW0_VALID |
(cmd_len - 2);
dw[1] = clear_val;
}
 
static inline void
gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
uint32_t clear_val)
{
const uint8_t cmd_len = 3;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | (cmd_len - 2);
dw[1] = clear_val;
dw[2] = GEN7_CLEAR_PARAMS_DW2_VALID;
}
 
static inline void
gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder,
uint32_t clip_viewport,
uint32_t sf_viewport,
uint32_t cc_viewport)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) |
GEN6_VP_PTR_DW0_CLIP_CHANGED |
GEN6_VP_PTR_DW0_SF_CHANGED |
GEN6_VP_PTR_DW0_CC_CHANGED |
(cmd_len - 2);
dw[1] = clip_viewport;
dw[2] = sf_viewport;
dw[3] = cc_viewport;
}
 
static inline void
gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder,
uint32_t scissor_rect)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) |
(cmd_len - 2);
dw[1] = scissor_rect;
}
 
static inline void
gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
uint32_t blend_state,
uint32_t depth_stencil_state,
uint32_t color_calc_state)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | (cmd_len - 2);
dw[1] = blend_state | GEN6_CC_PTR_DW1_BLEND_CHANGED;
dw[2] = depth_stencil_state | GEN6_CC_PTR_DW2_ZS_CHANGED;
dw[3] = color_calc_state | GEN6_CC_PTR_DW3_CC_CHANGED;
}
 
static inline void
gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder,
uint32_t sf_clip_viewport)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
sf_clip_viewport);
}
 
static inline void
gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder,
uint32_t cc_viewport)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
cc_viewport);
}
 
static inline void
gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
uint32_t color_calc_state)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
color_calc_state |= 1;
 
gen7_3dstate_pointer(builder,
GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state);
}
 
static inline void
gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder,
uint32_t depth_stencil_state)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
depth_stencil_state);
}
 
static inline void
gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder,
uint32_t blend_state)
{
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
blend_state |= 1;
 
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS,
blend_state);
}
 
static inline uint32_t
gen6_CLIP_VIEWPORT(struct ilo_builder *builder,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports)
{
const int state_align = 32;
const int state_len = 4 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 193:
*
* "The viewport-related state is stored as an array of up to 16
* elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->min_gbx);
dw[1] = fui(vp->max_gbx);
dw[2] = fui(vp->min_gby);
dw[3] = fui(vp->max_gby);
 
dw += 4;
}
 
return state_offset;
}
 
static inline uint32_t
gen6_SF_VIEWPORT(struct ilo_builder *builder,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports)
{
const int state_align = 32;
const int state_len = 8 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 262:
*
* "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
* stored as an array of up to 16 elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->m00);
dw[1] = fui(vp->m11);
dw[2] = fui(vp->m22);
dw[3] = fui(vp->m30);
dw[4] = fui(vp->m31);
dw[5] = fui(vp->m32);
dw[6] = 0;
dw[7] = 0;
 
dw += 8;
}
 
return state_offset;
}
 
static inline uint32_t
gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports)
{
const int state_align = 64;
const int state_len = 16 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 270:
*
* "The viewport-specific state used by both the SF and CL units
* (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
* of which contains the DWords described below. The start of each
* element is spaced 16 DWords apart. The location of first element of
* the array, as specified by both Pointer to SF_VIEWPORT and Pointer
* to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
*/
assert(num_viewports && num_viewports <= 16);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->m00);
dw[1] = fui(vp->m11);
dw[2] = fui(vp->m22);
dw[3] = fui(vp->m30);
dw[4] = fui(vp->m31);
dw[5] = fui(vp->m32);
dw[6] = 0;
dw[7] = 0;
 
dw[8] = fui(vp->min_gbx);
dw[9] = fui(vp->max_gbx);
dw[10] = fui(vp->min_gby);
dw[11] = fui(vp->max_gby);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[12] = fui(vp->min_x);
dw[13] = fui(vp->max_x - 1.0f);
dw[14] = fui(vp->min_y);
dw[15] = fui(vp->max_y - 1.0f);
} else {
dw[12] = 0;
dw[13] = 0;
dw[14] = 0;
dw[15] = 0;
}
 
dw += 16;
}
 
return state_offset;
}
 
static inline uint32_t
gen6_CC_VIEWPORT(struct ilo_builder *builder,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports)
{
const int state_align = 32;
const int state_len = 2 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 385:
*
* "The viewport state is stored as an array of up to 16 elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->min_z);
dw[1] = fui(vp->max_z);
 
dw += 2;
}
 
return state_offset;
}
 
static inline uint32_t
gen6_SCISSOR_RECT(struct ilo_builder *builder,
const struct ilo_scissor_state *scissor,
unsigned num_viewports)
{
const int state_align = 32;
const int state_len = 2 * num_viewports;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 263:
*
* "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
* stored as an array of up to 16 elements..."
*/
assert(num_viewports && num_viewports <= 16);
assert(Elements(scissor->payload) >= state_len);
 
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT,
state_align, state_len, scissor->payload);
}
 
static inline uint32_t
gen6_COLOR_CALC_STATE(struct ilo_builder *builder,
const struct pipe_stencil_ref *stencil_ref,
ubyte alpha_ref,
const struct pipe_blend_color *blend_color)
{
const int state_align = 64;
const int state_len = 6;
uint32_t state_offset, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw);
 
dw[0] = stencil_ref->ref_value[0] << 24 |
stencil_ref->ref_value[1] << 16 |
GEN6_CC_DW0_ALPHATEST_UNORM8;
dw[1] = alpha_ref;
dw[2] = fui(blend_color->color[0]);
dw[3] = fui(blend_color->color[1]);
dw[4] = fui(blend_color->color[2]);
dw[5] = fui(blend_color->color[3]);
 
return state_offset;
}
 
static inline uint32_t
gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder,
const struct ilo_dsa_state *dsa)
{
const int state_align = 64;
const int state_len = 3;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
STATIC_ASSERT(Elements(dsa->payload) >= state_len);
 
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL,
state_align, state_len, dsa->payload);
}
 
static inline uint32_t
gen6_BLEND_STATE(struct ilo_builder *builder,
const struct ilo_blend_state *blend,
const struct ilo_fb_state *fb,
const struct ilo_dsa_state *dsa)
{
const int state_align = 64;
int state_len;
uint32_t state_offset, *dw;
unsigned num_targets, i;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 376:
*
* "The blend state is stored as an array of up to 8 elements..."
*/
num_targets = fb->state.nr_cbufs;
assert(num_targets <= 8);
 
if (!num_targets) {
if (!dsa->dw_blend_alpha)
return 0;
/* to be able to reference alpha func */
num_targets = 1;
}
 
state_len = 2 * num_targets;
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
 
for (i = 0; i < num_targets; i++) {
const struct ilo_blend_cso *cso = &blend->cso[i];
 
dw[0] = cso->payload[0];
dw[1] = cso->payload[1] | blend->dw_shared;
 
if (i < fb->state.nr_cbufs && fb->state.cbufs[i]) {
const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
 
if (caps->can_blend) {
if (caps->dst_alpha_forced_one)
dw[0] |= cso->dw_blend_dst_alpha_forced_one;
else
dw[0] |= cso->dw_blend;
}
 
if (caps->can_logicop)
dw[1] |= blend->dw_logicop;
 
if (caps->can_alpha_test)
dw[1] |= dsa->dw_blend_alpha;
} else {
dw[1] |= GEN6_RT_DW1_WRITE_DISABLE_A |
GEN6_RT_DW1_WRITE_DISABLE_R |
GEN6_RT_DW1_WRITE_DISABLE_G |
GEN6_RT_DW1_WRITE_DISABLE_B |
dsa->dw_blend_alpha;
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 356:
*
* "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
* Dither both must be disabled."
*
* There is no such limitation on GEN7, or for AlphaToOne. But GL
* requires that anyway.
*/
if (fb->num_samples > 1)
dw[1] |= blend->dw_alpha_mod;
 
dw += 2;
}
 
return state_offset;
}
 
static inline uint32_t
gen8_BLEND_STATE(struct ilo_builder *builder,
const struct ilo_blend_state *blend,
const struct ilo_fb_state *fb,
const struct ilo_dsa_state *dsa)
{
const int state_align = 64;
const int state_len = 1 + 2 * fb->state.nr_cbufs;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
assert(fb->state.nr_cbufs <= 8);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
 
dw[0] = blend->dw_shared;
if (fb->num_samples > 1)
dw[0] |= blend->dw_alpha_mod;
if (!fb->state.nr_cbufs || fb->blend_caps[0].can_alpha_test)
dw[0] |= dsa->dw_blend_alpha;
dw++;
 
for (i = 0; i < fb->state.nr_cbufs; i++) {
const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
const struct ilo_blend_cso *cso = &blend->cso[i];
 
dw[0] = cso->payload[0];
dw[1] = cso->payload[1];
 
if (fb->state.cbufs[i]) {
if (caps->can_blend) {
if (caps->dst_alpha_forced_one)
dw[0] |= cso->dw_blend_dst_alpha_forced_one;
else
dw[0] |= cso->dw_blend;
}
 
if (caps->can_logicop)
dw[1] |= blend->dw_logicop;
} else {
dw[0] |= GEN8_RT_DW0_WRITE_DISABLE_A |
GEN8_RT_DW0_WRITE_DISABLE_R |
GEN8_RT_DW0_WRITE_DISABLE_G |
GEN8_RT_DW0_WRITE_DISABLE_B;
}
 
dw += 2;
}
 
return state_offset;
}
 
#endif /* ILO_BUILDER_3D_BOTTOM_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
0,0 → 1,1899
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_3D_TOP_H
#define ILO_BUILDER_3D_TOP_H
 
#include "genhw/genhw.h"
#include "../ilo_resource.h"
#include "../ilo_shader.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_state_3d.h"
#include "ilo_builder.h"
 
static inline void
gen6_3DSTATE_URB(struct ilo_builder *builder,
int vs_total_size, int gs_total_size,
int vs_entry_size, int gs_entry_size)
{
const uint8_t cmd_len = 3;
const int row_size = 128; /* 1024 bits */
int vs_alloc_size, gs_alloc_size;
int vs_num_entries, gs_num_entries;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
/* in 1024-bit URB rows */
vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
 
/* the valid range is [1, 5] */
if (!vs_alloc_size)
vs_alloc_size = 1;
if (!gs_alloc_size)
gs_alloc_size = 1;
assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
 
/* the valid range is [24, 256] in multiples of 4 */
vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
if (vs_num_entries > 256)
vs_num_entries = 256;
assert(vs_num_entries >= 24);
 
/* the valid range is [0, 256] in multiples of 4 */
gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
if (gs_num_entries > 256)
gs_num_entries = 256;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
(gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
}
 
static inline void
gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
int subop, int offset, int size)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = 2;
const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) &&
builder->dev->gt == 3) ||
ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1;
uint32_t *dw;
int end;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
/* VS, HS, DS, GS, and PS variants */
assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 68:
*
* "(A table that says the maximum size of each constant buffer is
* 16KB")
*
* From the Ivy Bridge PRM, volume 2 part 1, page 115:
*
* "The sum of the Constant Buffer Offset and the Constant Buffer Size
* may not exceed the maximum value of the Constant Buffer Size."
*
* Thus, the valid range of buffer end is [0KB, 16KB].
*/
end = (offset + size) / 1024;
if (end > 16 * slice_count) {
assert(!"invalid constant buffer end");
end = 16 * slice_count;
}
 
/* the valid range of buffer offset is [0KB, 15KB] */
offset = (offset + 1023) / 1024;
if (offset > 15 * slice_count) {
assert(!"invalid constant buffer offset");
offset = 15 * slice_count;
}
 
if (offset > end) {
assert(!size);
offset = end;
}
 
/* the valid range of buffer size is [0KB, 15KB] */
size = end - offset;
if (size > 15 * slice_count) {
assert(!"invalid constant buffer size");
size = 15 * slice_count;
}
 
assert(offset % slice_count == 0 && size % slice_count == 0);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
size;
}
 
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
int offset, int size)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
}
 
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
int offset, int size)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
}
 
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
int offset, int size)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
}
 
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
int offset, int size)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
}
 
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
int offset, int size)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
}
 
static inline void
gen7_3dstate_urb(struct ilo_builder *builder,
int subop, int offset, int size,
int entry_size)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = 2;
const int row_size = 64; /* 512 bits */
int alloc_size, num_entries, min_entries, max_entries;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
/* VS, HS, DS, and GS variants */
assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
 
/* in multiples of 8KB */
assert(offset % 8192 == 0);
offset /= 8192;
 
/* in multiple of 512-bit rows */
alloc_size = (entry_size + row_size - 1) / row_size;
if (!alloc_size)
alloc_size = 1;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 34:
*
* "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
* cause performance to decrease due to banking in the URB. Element
* sizes of 16 to 20 should be programmed with six 512-bit URB rows."
*/
if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
alloc_size = 6;
 
/* in multiples of 8 */
num_entries = (size / row_size / alloc_size) & ~7;
 
switch (subop) {
case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
switch (ilo_dev_gen(builder->dev)) {
case ILO_GEN(8):
max_entries = 2560;
min_entries = 64;
break;
case ILO_GEN(7.5):
max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
min_entries = (builder->dev->gt >= 2) ? 64 : 32;
break;
case ILO_GEN(7):
default:
max_entries = (builder->dev->gt == 2) ? 704 : 512;
min_entries = 32;
break;
}
 
assert(num_entries >= min_entries);
if (num_entries > max_entries)
num_entries = max_entries;
break;
case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
max_entries = (builder->dev->gt == 2) ? 64 : 32;
if (num_entries > max_entries)
num_entries = max_entries;
break;
case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
if (num_entries)
assert(num_entries >= 138);
break;
case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
switch (ilo_dev_gen(builder->dev)) {
case ILO_GEN(8):
max_entries = 960;
break;
case ILO_GEN(7.5):
max_entries = (builder->dev->gt >= 2) ? 640 : 256;
break;
case ILO_GEN(7):
default:
max_entries = (builder->dev->gt == 2) ? 320 : 192;
break;
}
 
if (num_entries > max_entries)
num_entries = max_entries;
break;
default:
break;
}
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT |
(alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
num_entries;
}
 
static inline void
gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
int offset, int size, int entry_size)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
offset, size, entry_size);
}
 
static inline void
gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
int offset, int size, int entry_size)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
offset, size, entry_size);
}
 
static inline void
gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
int offset, int size, int entry_size)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
offset, size, entry_size);
}
 
static inline void
gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
int offset, int size, int entry_size)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
offset, size, entry_size);
}
 
static inline void
gen75_3DSTATE_VF(struct ilo_builder *builder,
bool enable_cut_index,
uint32_t cut_index)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7.5, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
if (enable_cut_index)
dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
 
dw[1] = cut_index;
}
 
static inline void
gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
bool enable)
{
const uint8_t cmd_len = 1;
const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
enable;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
ilo_builder_batch_write(builder, cmd_len, &dw0);
}
 
/**
* Translate a pipe primitive type to the matching hardware primitive type.
*/
static inline int
gen6_3d_translate_pipe_prim(unsigned prim)
{
static const int prim_mapping[ILO_PRIM_MAX] = {
[PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
[PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
[PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
[PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
[PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
[PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
[PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
[PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
[PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
[PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
[ILO_PRIM_RECTANGLES] = GEN6_3DPRIM_RECTLIST,
};
 
assert(prim_mapping[prim]);
 
return prim_mapping[prim];
}
 
static inline void
gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim)
{
const uint8_t cmd_len = 2;
const int prim = gen6_3d_translate_pipe_prim(pipe_prim);
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_TOPOLOGY) | (cmd_len - 2);
dw[1] = prim;
}
 
static inline void
gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder,
int vb_index, uint32_t step_rate)
{
const uint8_t cmd_len = 3;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_INSTANCING) | (cmd_len - 2);
dw[1] = vb_index;
if (step_rate)
dw[1] |= GEN8_INSTANCING_DW1_ENABLE;
dw[2] = step_rate;
}
 
static inline void
gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder,
bool vid_enable, int vid_ve, int vid_comp,
bool iid_enable, int iid_ve, int iid_comp)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_SGVS) | (cmd_len - 2);
dw[1] = 0;
 
if (iid_enable) {
dw[1] |= GEN8_SGVS_DW1_IID_ENABLE |
vid_comp << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT |
vid_ve << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT;
}
 
if (vid_enable) {
dw[1] |= GEN8_SGVS_DW1_VID_ENABLE |
vid_comp << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT |
vid_ve << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT;
}
}
 
static inline void
gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
const struct ilo_ve_state *ve,
const struct ilo_vb_state *vb)
{
uint8_t cmd_len;
uint32_t *dw;
unsigned pos, hw_idx;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 82:
*
* "From 1 to 33 VBs can be specified..."
*/
assert(ve->vb_count <= 33);
 
if (!ve->vb_count)
return;
 
cmd_len = 1 + 4 * ve->vb_count;
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
dw++;
pos++;
 
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
const unsigned instance_divisor = ve->instance_divisors[hw_idx];
const unsigned pipe_idx = ve->vb_mapping[hw_idx];
const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
 
dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
dw[0] |= builder->mocs << GEN8_VB_DW0_MOCS__SHIFT;
else
dw[0] |= builder->mocs << GEN6_VB_DW0_MOCS__SHIFT;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED;
 
if (instance_divisor)
dw[0] |= GEN6_VB_DW0_ACCESS_INSTANCEDATA;
else
dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA;
 
/* use null vb if there is no buffer or the stride is out of range */
if (!cso->buffer || cso->stride > 2048) {
dw[0] |= GEN6_VB_DW0_IS_NULL;
dw[1] = 0;
dw[2] = 0;
dw[3] = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ?
0 : instance_divisor;
 
continue;
}
 
dw[0] |= cso->stride << GEN6_VB_DW0_PITCH__SHIFT;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
const uint32_t start_offset = cso->buffer_offset;
 
ilo_builder_batch_reloc64(builder, pos + 1,
buf->bo, start_offset, 0);
dw[3] = buf->bo_size;
} else {
const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
const uint32_t start_offset = cso->buffer_offset;
const uint32_t end_offset = buf->bo_size - 1;
 
dw[3] = instance_divisor;
 
ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
}
 
dw += 4;
pos += 4;
}
}
 
/* the user vertex buffer must be uploaded with gen6_user_vertex_buffer() */
static inline void
gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
uint32_t vb_begin, uint32_t vb_end,
uint32_t stride)
{
const struct ilo_builder_writer *bat =
&builder->writers[ILO_BUILDER_WRITER_BATCH];
const uint8_t cmd_len = 1 + 4;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
dw++;
pos++;
 
/* VERTEX_BUFFER_STATE */
dw[0] = 0 << GEN6_VB_DW0_INDEX__SHIFT |
GEN6_VB_DW0_ACCESS_VERTEXDATA |
stride << GEN6_VB_DW0_PITCH__SHIFT;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED;
 
dw[3] = 0;
 
ilo_builder_batch_reloc(builder, pos + 1, bat->bo, vb_begin, 0);
ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0);
}
 
static inline void
gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
const struct ilo_ve_state *ve)
{
uint8_t cmd_len;
uint32_t *dw;
unsigned i;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 92:
*
* "At least one VERTEX_ELEMENT_STATE structure must be included."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 93:
*
* "Up to 34 (DevSNB+) vertex elements are supported."
*/
assert(ve->count + ve->prepend_nosrc_cso >= 1);
assert(ve->count + ve->prepend_nosrc_cso <= 34);
 
STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
 
cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
dw++;
 
if (ve->prepend_nosrc_cso) {
memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload));
dw += 2;
}
 
for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) {
memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload));
dw += 2;
}
 
if (ve->last_cso_edgeflag)
memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload));
}
 
static inline void
gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
const struct ilo_ib_state *ib,
bool enable_cut_index)
{
const uint8_t cmd_len = 3;
struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
uint32_t start_offset, end_offset;
int format;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
if (!buf)
return;
 
/* this is moved to the new 3DSTATE_VF */
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
assert(!enable_cut_index);
 
switch (ib->hw_index_size) {
case 4:
format = GEN6_IB_DW0_FORMAT_DWORD;
break;
case 2:
format = GEN6_IB_DW0_FORMAT_WORD;
break;
case 1:
format = GEN6_IB_DW0_FORMAT_BYTE;
break;
default:
assert(!"unknown index size");
format = GEN6_IB_DW0_FORMAT_BYTE;
break;
}
 
/*
* set start_offset to 0 here and adjust pipe_draw_info::start with
* ib->draw_start_offset in 3DPRIMITIVE
*/
start_offset = 0;
end_offset = buf->bo_size;
 
/* end_offset must also be aligned and is inclusive */
end_offset -= (end_offset % ib->hw_index_size);
end_offset--;
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) |
builder->mocs << GEN6_IB_DW0_MOCS__SHIFT |
format;
if (enable_cut_index)
dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
 
ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
}
 
static inline void
gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
const struct ilo_ib_state *ib)
{
const uint8_t cmd_len = 5;
struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
int format;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
if (!buf)
return;
 
switch (ib->hw_index_size) {
case 4:
format = GEN8_IB_DW1_FORMAT_DWORD;
break;
case 2:
format = GEN8_IB_DW1_FORMAT_WORD;
break;
case 1:
format = GEN8_IB_DW1_FORMAT_BYTE;
break;
default:
assert(!"unknown index size");
format = GEN8_IB_DW1_FORMAT_BYTE;
break;
}
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2);
dw[1] = format |
builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
dw[4] = buf->bo_size;
 
/* ignore ib->offset here in favor of adjusting 3DPRIMITIVE */
ilo_builder_batch_reloc64(builder, pos + 2, buf->bo, 0, 0);
}
 
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_shader_state *vs)
{
const uint8_t cmd_len = 6;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
cso = ilo_shader_get_kernel_cso(vs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(vs);
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
}
 
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_shader_state *vs,
uint32_t clip_plane_enable)
{
const uint8_t cmd_len = 9;
const struct ilo_shader_cso *cso;
uint32_t dw3, dw6, dw7, dw8, *dw;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
cso = ilo_shader_get_kernel_cso(vs);
dw3 = cso->payload[0];
dw6 = cso->payload[1];
dw7 = cso->payload[2];
dw8 = clip_plane_enable << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(vs);
dw[2] = 0;
dw[3] = dw3;
dw[4] = 0; /* scratch */
dw[5] = 0;
dw[6] = dw6;
dw[7] = dw7;
dw[8] = dw8;
}
 
static inline void
gen6_disable_3DSTATE_VS(struct ilo_builder *builder)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
}
 
static inline void
gen7_disable_3DSTATE_HS(struct ilo_builder *builder)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 7;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[7] = 0;
dw[8] = 0;
}
}
 
static inline void
gen7_3DSTATE_TE(struct ilo_builder *builder)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
}
 
static inline void
gen7_disable_3DSTATE_DS(struct ilo_builder *builder)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 6;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
dw[5] = 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[6] = 0;
dw[7] = 0;
dw[8] = 0;
}
}
 
static inline void
gen6_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_shader_state *gs)
{
const uint8_t cmd_len = 7;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5, dw6, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
cso = ilo_shader_get_kernel_cso(gs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
dw6 = cso->payload[3];
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(gs);
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
dw[6] = dw6;
}
 
static inline void
gen6_so_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_shader_state *vs,
int verts_per_prim)
{
const uint8_t cmd_len = 7;
struct ilo_shader_cso cso;
enum ilo_kernel_param param;
uint32_t dw2, dw4, dw5, dw6, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
assert(ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO));
 
switch (verts_per_prim) {
case 1:
param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
break;
case 2:
param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
break;
default:
param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
break;
}
 
/* cannot use VS's CSO */
ilo_gpe_init_gs_cso(builder->dev, vs, &cso);
dw2 = cso.payload[0];
dw4 = cso.payload[1];
dw5 = cso.payload[2];
dw6 = cso.payload[3];
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(vs) +
ilo_shader_get_kernel_param(vs, param);
dw[2] = dw2;
dw[3] = 0;
dw[4] = dw4;
dw[5] = dw5;
dw[6] = dw6;
}
 
static inline void
gen6_disable_3DSTATE_GS(struct ilo_builder *builder)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
/* honor the valid range of URB read length */
dw[4] = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
dw[5] = GEN6_GS_DW5_STATISTICS;
dw[6] = 0;
}
 
static inline void
gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
int index, unsigned svbi,
unsigned max_svbi,
bool load_vertex_count)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
assert(index >= 0 && index < 4);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | (cmd_len - 2);
 
dw[1] = index << GEN6_SVBI_DW1_INDEX__SHIFT;
if (load_vertex_count)
dw[1] |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
 
dw[2] = svbi;
dw[3] = max_svbi;
}
 
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_shader_state *gs)
{
const uint8_t cmd_len = 7;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5, *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
cso = ilo_shader_get_kernel_cso(gs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = ilo_shader_get_kernel_offset(gs);
dw[2] = dw2;
dw[3] = 0; /* scratch */
dw[4] = dw4;
dw[5] = dw5;
dw[6] = 0;
}
 
static inline void
gen7_disable_3DSTATE_GS(struct ilo_builder *builder)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 7;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw[4] = 0;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[7] = GEN8_GS_DW7_STATISTICS;
dw[8] = 0;
dw[9] = 0;
} else {
dw[5] = GEN7_GS_DW5_STATISTICS;
dw[6] = 0;
}
}
 
static inline void
gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
int render_stream,
bool render_disable,
int vertex_attrib_count,
const int *buf_strides)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
uint32_t *dw;
int buf_mask;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2);
 
dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
if (render_disable)
dw[1] |= GEN7_SO_DW1_RENDER_DISABLE;
 
if (buf_strides) {
buf_mask = ((bool) buf_strides[3]) << 3 |
((bool) buf_strides[2]) << 2 |
((bool) buf_strides[1]) << 1 |
((bool) buf_strides[0]);
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[3] = buf_strides[1] << 16 | buf_strides[0];
dw[4] = buf_strides[3] << 16 | buf_strides[1];
}
} else {
buf_mask = 0;
}
 
if (buf_mask) {
int read_len;
 
dw[1] |= GEN7_SO_DW1_SO_ENABLE |
GEN7_SO_DW1_STATISTICS;
/* API_OPENGL */
if (true)
dw[1] |= GEN7_SO_DW1_REORDER_TRAILING;
if (ilo_dev_gen(builder->dev) < ILO_GEN(8))
dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
 
read_len = (vertex_attrib_count + 1) / 2;
if (!read_len)
read_len = 1;
 
dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
} else {
dw[2] = 0;
}
}
 
static inline void
gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
const struct pipe_stream_output_info *so_info)
{
/*
* Note that "DWord Length" has 9 bits for this command and the type of
* cmd_len cannot be uint8_t.
*/
uint16_t cmd_len;
struct {
int buf_selects;
int decl_count;
uint16_t decls[128];
} streams[4];
unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
int hw_decl_count, i;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
memset(streams, 0, sizeof(streams));
memset(buf_offsets, 0, sizeof(buf_offsets));
 
for (i = 0; i < so_info->num_outputs; i++) {
unsigned decl, st, buf, reg, mask;
 
st = so_info->output[i].stream;
buf = so_info->output[i].output_buffer;
 
/* pad with holes */
while (buf_offsets[buf] < so_info->output[i].dst_offset) {
int num_dwords;
 
num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
if (num_dwords > 4)
num_dwords = 4;
 
decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
GEN7_SO_DECL_HOLE_FLAG |
((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
 
assert(streams[st].decl_count < Elements(streams[st].decls));
streams[st].decls[streams[st].decl_count++] = decl;
buf_offsets[buf] += num_dwords;
}
assert(buf_offsets[buf] == so_info->output[i].dst_offset);
 
reg = so_info->output[i].register_index;
mask = ((1 << so_info->output[i].num_components) - 1) <<
so_info->output[i].start_component;
 
decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
 
assert(streams[st].decl_count < Elements(streams[st].decls));
 
streams[st].buf_selects |= 1 << buf;
streams[st].decls[streams[st].decl_count++] = decl;
buf_offsets[buf] += so_info->output[i].num_components;
}
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
streams[2].decl_count, streams[3].decl_count);
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 201:
*
* "Errata: All 128 decls for all four streams must be included
* whenever this command is issued. The "Num Entries [n]" fields
* still contain the actual numbers of valid decls."
*/
hw_decl_count = 128;
}
 
cmd_len = 3 + 2 * hw_decl_count;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
dw += 3;
 
for (i = 0; i < hw_decl_count; i++) {
dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
dw += 2;
}
}
 
static inline void
gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride,
const struct pipe_stream_output_target *so_target)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4;
struct ilo_buffer *buf;
int start, end;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
buf = ilo_buffer(so_target->buffer);
 
/* DWord-aligned */
assert(stride % 4 == 0);
assert(so_target->buffer_offset % 4 == 0);
 
stride &= ~3;
start = so_target->buffer_offset & ~3;
end = (start + so_target->buffer_size) & ~3;
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2);
dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
stride;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[1] |= builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
 
dw[4] = end - start;
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
 
ilo_builder_batch_reloc64(builder, pos + 2,
buf->bo, start, INTEL_RELOC_WRITE);
} else {
dw[1] |= builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT;
 
ilo_builder_batch_reloc(builder, pos + 2,
buf->bo, start, INTEL_RELOC_WRITE);
ilo_builder_batch_reloc(builder, pos + 3,
buf->bo, end, INTEL_RELOC_WRITE);
}
}
 
static inline void
gen7_disable_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2);
dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
dw[2] = 0;
dw[3] = 0;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[4] = 0;
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
}
}
 
static inline void
gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder,
uint32_t vs_binding_table,
uint32_t gs_binding_table,
uint32_t ps_binding_table)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
GEN6_BINDING_TABLE_PTR_DW0_VS_CHANGED |
GEN6_BINDING_TABLE_PTR_DW0_GS_CHANGED |
GEN6_BINDING_TABLE_PTR_DW0_PS_CHANGED |
(cmd_len - 2);
dw[1] = vs_binding_table;
dw[2] = gs_binding_table;
dw[3] = ps_binding_table;
}
 
static inline void
gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder,
uint32_t vs_sampler_state,
uint32_t gs_sampler_state,
uint32_t ps_sampler_state)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
GEN6_SAMPLER_PTR_DW0_VS_CHANGED |
GEN6_SAMPLER_PTR_DW0_GS_CHANGED |
GEN6_SAMPLER_PTR_DW0_PS_CHANGED |
(cmd_len - 2);
dw[1] = vs_sampler_state;
dw[2] = gs_sampler_state;
dw[3] = ps_sampler_state;
}
 
static inline void
gen7_3dstate_pointer(struct ilo_builder *builder,
int subop, uint32_t pointer)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
dw[1] = pointer;
}
 
static inline void
gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(struct ilo_builder *builder,
uint32_t binding_table)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS,
binding_table);
}
 
static inline void
gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(struct ilo_builder *builder,
uint32_t binding_table)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS,
binding_table);
}
 
static inline void
gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(struct ilo_builder *builder,
uint32_t binding_table)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS,
binding_table);
}
 
static inline void
gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder,
uint32_t binding_table)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS,
binding_table);
}
 
static inline void
gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder,
uint32_t sampler_state)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS,
sampler_state);
}
 
static inline void
gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS(struct ilo_builder *builder,
uint32_t sampler_state)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS,
sampler_state);
}
 
static inline void
gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS(struct ilo_builder *builder,
uint32_t sampler_state)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS,
sampler_state);
}
 
static inline void
gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder,
uint32_t sampler_state)
{
gen7_3dstate_pointer(builder,
GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS,
sampler_state);
}
 
static inline void
gen6_3dstate_constant(struct ilo_builder *builder, int subop,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = 5;
unsigned buf_enabled = 0x0;
uint32_t buf_dw[4], *dw;
int max_read_length, total_read_length;
int i;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
assert(num_bufs <= 4);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 138:
*
* "(3DSTATE_CONSTANT_VS) The sum of all four read length fields (each
* incremented to represent the actual read length) must be less than
* or equal to 32"
*
* From the Sandy Bridge PRM, volume 2 part 1, page 161:
*
* "(3DSTATE_CONSTANT_GS) The sum of all four read length fields (each
* incremented to represent the actual read length) must be less than
* or equal to 64"
*
* From the Sandy Bridge PRM, volume 2 part 1, page 287:
*
* "(3DSTATE_CONSTANT_PS) The sum of all four read length fields (each
* incremented to represent the actual read length) must be less than
* or equal to 64"
*/
switch (subop) {
case GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS:
max_read_length = 32;
break;
case GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS:
case GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS:
max_read_length = 64;
break;
default:
assert(!"unknown pcb subop");
max_read_length = 0;
break;
}
 
total_read_length = 0;
for (i = 0; i < 4; i++) {
if (i < num_bufs && sizes[i]) {
/* in 256-bit units */
const int read_len = (sizes[i] + 31) / 32;
 
assert(bufs[i] % 32 == 0);
assert(read_len <= 32);
 
buf_enabled |= 1 << i;
buf_dw[i] = bufs[i] | (read_len - 1);
 
total_read_length += read_len;
} else {
buf_dw[i] = 0;
}
}
 
assert(total_read_length <= max_read_length);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2) |
buf_enabled << GEN6_CONSTANT_DW0_BUFFER_ENABLES__SHIFT |
builder->mocs << GEN6_CONSTANT_DW0_MOCS__SHIFT;
 
memcpy(&dw[1], buf_dw, sizeof(buf_dw));
}
 
static inline void
gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen6_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
bufs, sizes, num_bufs);
}
 
static inline void
gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen6_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
bufs, sizes, num_bufs);
}
 
static inline void
gen7_3dstate_constant(struct ilo_builder *builder,
int subop,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 11 : 7;
uint32_t payload[6], *dw;
int total_read_length, i;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
/* VS, HS, DS, GS, and PS variants */
assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS &&
subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS &&
subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK);
 
assert(num_bufs <= 4);
 
payload[0] = 0;
payload[1] = 0;
 
total_read_length = 0;
for (i = 0; i < 4; i++) {
int read_len;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 112:
*
* "Constant buffers must be enabled in order from Constant Buffer 0
* to Constant Buffer 3 within this command. For example, it is
* not allowed to enable Constant Buffer 1 by programming a
* non-zero value in the VS Constant Buffer 1 Read Length without a
* non-zero value in VS Constant Buffer 0 Read Length."
*/
if (i >= num_bufs || !sizes[i]) {
for (; i < 4; i++) {
assert(i >= num_bufs || !sizes[i]);
payload[2 + i] = 0;
}
break;
}
 
/* read lengths are in 256-bit units */
read_len = (sizes[i] + 31) / 32;
/* the lower 5 bits are used for memory object control state */
assert(bufs[i] % 32 == 0);
 
payload[i / 2] |= read_len << ((i % 2) ? 16 : 0);
payload[2 + i] = bufs[i];
 
total_read_length += read_len;
}
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 113:
*
* "The sum of all four read length fields must be less than or equal
* to the size of 64"
*/
assert(total_read_length <= 64);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = cmd | (cmd_len - 2);
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[1] = payload[0];
dw[2] = payload[1];
dw[3] = payload[2];
dw[4] = 0;
dw[5] = payload[3];
dw[6] = 0;
dw[7] = payload[4];
dw[8] = 0;
dw[9] = payload[5];
dw[10] = 0;
} else {
payload[2] |= builder->mocs << GEN7_CONSTANT_DW_ADDR_MOCS__SHIFT;
 
memcpy(&dw[1], payload, sizeof(payload));
}
}
 
static inline void
gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
bufs, sizes, num_bufs);
}
 
static inline void
gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS,
bufs, sizes, num_bufs);
}
 
static inline void
gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS,
bufs, sizes, num_bufs);
}
 
static inline void
gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
const uint32_t *bufs, const int *sizes,
int num_bufs)
{
gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
bufs, sizes, num_bufs);
}
 
static inline uint32_t
gen6_BINDING_TABLE_STATE(struct ilo_builder *builder,
const uint32_t *surface_states,
int num_surface_states)
{
const int state_align = 32;
const int state_len = num_surface_states;
uint32_t state_offset, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 69:
*
* "It is stored as an array of up to 256 elements..."
*/
assert(num_surface_states <= 256);
 
if (!num_surface_states)
return 0;
 
state_offset = ilo_builder_surface_pointer(builder,
ILO_BUILDER_ITEM_BINDING_TABLE, state_align, state_len, &dw);
memcpy(dw, surface_states, state_len << 2);
 
return state_offset;
}
 
static inline uint32_t
gen6_SURFACE_STATE(struct ilo_builder *builder,
const struct ilo_view_surface *surf,
bool for_render)
{
int state_align, state_len;
uint32_t state_offset, *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
state_align = 64;
state_len = 13;
 
state_offset = ilo_builder_surface_pointer(builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->payload, state_len << 2);
 
if (surf->bo) {
const uint32_t mocs = (surf->scanout) ?
(GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs;
 
dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
 
ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
surf->payload[8], (for_render) ? INTEL_RELOC_WRITE : 0);
}
} else {
state_align = 32;
state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6;
 
state_offset = ilo_builder_surface_pointer(builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->payload, state_len << 2);
 
if (surf->bo) {
/*
* For scanouts, we should not enable caching in LLC. Since we only
* enable that on Gen8+, we are fine here.
*/
dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
 
ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
}
}
 
return state_offset;
}
 
static inline uint32_t
gen6_so_SURFACE_STATE(struct ilo_builder *builder,
const struct pipe_stream_output_target *so,
const struct pipe_stream_output_info *so_info,
int so_index)
{
struct ilo_buffer *buf = ilo_buffer(so->buffer);
unsigned bo_offset, struct_size;
enum pipe_format elem_format;
struct ilo_view_surface surf;
 
ILO_DEV_ASSERT(builder->dev, 6, 6);
 
bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
 
switch (so_info->output[so_index].num_components) {
case 1:
elem_format = PIPE_FORMAT_R32_FLOAT;
break;
case 2:
elem_format = PIPE_FORMAT_R32G32_FLOAT;
break;
case 3:
elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
break;
case 4:
elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
break;
default:
assert(!"unexpected SO components length");
elem_format = PIPE_FORMAT_R32_FLOAT;
break;
}
 
ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset,
so->buffer_size, struct_size, elem_format, false, true, &surf);
 
return gen6_SURFACE_STATE(builder, &surf, false);
}
 
static inline uint32_t
gen6_SAMPLER_STATE(struct ilo_builder *builder,
const struct ilo_sampler_cso * const *samplers,
const struct pipe_sampler_view * const *views,
const uint32_t *sampler_border_colors,
int num_samplers)
{
const int state_align = 32;
const int state_len = 4 * num_samplers;
uint32_t state_offset, *dw;
int i;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 101:
*
* "The sampler state is stored as an array of up to 16 elements..."
*/
assert(num_samplers <= 16);
 
if (!num_samplers)
return 0;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 132:
*
* "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in
* multiples of 4) the vertex shader 0 kernel uses. Used only for
* prefetching the associated sampler state entries.
*
* It also applies to other shader stages.
*/
ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
 
for (i = 0; i < num_samplers; i++) {
const struct ilo_sampler_cso *sampler = samplers[i];
const struct pipe_sampler_view *view = views[i];
const uint32_t border_color = sampler_border_colors[i];
uint32_t dw_filter, dw_wrap;
 
/* there may be holes */
if (!sampler || !view) {
/* disabled sampler */
dw[0] = 1 << 31;
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw += 4;
 
continue;
}
 
/* determine filter and wrap modes */
switch (view->texture->target) {
case PIPE_TEXTURE_1D:
dw_filter = (sampler->anisotropic) ?
sampler->dw_filter_aniso : sampler->dw_filter;
dw_wrap = sampler->dw_wrap_1d;
break;
case PIPE_TEXTURE_3D:
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 103:
*
* "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
* surfaces of type SURFTYPE_3D."
*/
dw_filter = sampler->dw_filter;
dw_wrap = sampler->dw_wrap;
break;
case PIPE_TEXTURE_CUBE:
dw_filter = (sampler->anisotropic) ?
sampler->dw_filter_aniso : sampler->dw_filter;
dw_wrap = sampler->dw_wrap_cube;
break;
default:
dw_filter = (sampler->anisotropic) ?
sampler->dw_filter_aniso : sampler->dw_filter;
dw_wrap = sampler->dw_wrap;
break;
}
 
dw[0] = sampler->payload[0];
dw[1] = sampler->payload[1];
assert(!(border_color & 0x1f));
dw[2] = border_color;
dw[3] = sampler->payload[2];
 
dw[0] |= dw_filter;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
dw[3] |= dw_wrap;
}
else {
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 21:
*
* "[DevSNB] Errata: Incorrect behavior is observed in cases
* where the min and mag mode filters are different and
* SurfMinLOD is nonzero. The determination of MagMode uses the
* following equation instead of the one in the above
* pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
*
* As a way to work around that, we set Base to
* view->u.tex.first_level.
*/
dw[0] |= view->u.tex.first_level << 22;
 
dw[1] |= dw_wrap;
}
 
dw += 4;
}
 
return state_offset;
}
 
static inline uint32_t
gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
const struct ilo_sampler_cso *sampler)
{
const int state_align =
(ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 64 : 32;
const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(Elements(sampler->payload) >= 3 + state_len);
 
/* see ilo_gpe_init_sampler_cso() */
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
state_align, state_len, &sampler->payload[3]);
}
 
static inline uint32_t
gen6_push_constant_buffer(struct ilo_builder *builder,
int size, void **pcb)
{
/*
* For all VS, GS, FS, and CS push constant buffers, they must be aligned
* to 32 bytes, and their sizes are specified in 256-bit units.
*/
const int state_align = 32;
const int state_len = align(size, 32) / 4;
uint32_t state_offset;
char *buf;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf);
 
/* zero out the unused range */
if (size < state_len * 4)
memset(&buf[size], 0, state_len * 4 - size);
 
if (pcb)
*pcb = buf;
 
return state_offset;
}
 
static inline uint32_t
gen6_user_vertex_buffer(struct ilo_builder *builder,
int size, const void *vertices)
{
const int state_align = 8;
const int state_len = size / 4;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
assert(size % 4 == 0);
 
return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
state_align, state_len, vertices);
}
 
#endif /* ILO_BUILDER_3D_TOP_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_blt.h
0,0 → 1,322
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_BLT_H
#define ILO_BUILDER_BLT_H
 
#include "genhw/genhw.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_builder.h"
 
enum gen6_blt_mask {
GEN6_BLT_MASK_8,
GEN6_BLT_MASK_16,
GEN6_BLT_MASK_32,
GEN6_BLT_MASK_32_LO,
GEN6_BLT_MASK_32_HI,
};
 
struct gen6_blt_bo {
struct intel_bo *bo;
uint32_t offset;
int16_t pitch;
};
 
struct gen6_blt_xy_bo {
struct intel_bo *bo;
uint32_t offset;
int16_t pitch;
 
enum gen_surface_tiling tiling;
int16_t x, y;
};
 
/*
* From the Sandy Bridge PRM, volume 1 part 5, page 7:
*
* "The BLT engine is capable of transferring very large quantities of
* graphics data. Any graphics data read from and written to the
* destination is permitted to represent a number of pixels that occupies
* up to 65,536 scan lines and up to 32,768 bytes per scan line at the
* destination. The maximum number of pixels that may be represented per
* scan line's worth of graphics data depends on the color depth."
*/
static const int gen6_blt_max_bytes_per_scanline = 32768;
static const int gen6_blt_max_scanlines = 65536;
 
static inline uint32_t
gen6_blt_translate_value_mask(enum gen6_blt_mask value_mask)
{
switch (value_mask) {
case GEN6_BLT_MASK_8: return GEN6_BLITTER_BR13_FORMAT_8;
case GEN6_BLT_MASK_16: return GEN6_BLITTER_BR13_FORMAT_565;
default: return GEN6_BLITTER_BR13_FORMAT_8888;
}
}
 
static inline uint32_t
gen6_blt_translate_value_cpp(enum gen6_blt_mask value_mask)
{
switch (value_mask) {
case GEN6_BLT_MASK_8: return 1;
case GEN6_BLT_MASK_16: return 2;
default: return 4;
}
}
 
static inline uint32_t
gen6_blt_translate_write_mask(enum gen6_blt_mask write_mask)
{
switch (write_mask) {
case GEN6_BLT_MASK_32: return GEN6_BLITTER_BR00_WRITE_RGB |
GEN6_BLITTER_BR00_WRITE_A;
case GEN6_BLT_MASK_32_LO: return GEN6_BLITTER_BR00_WRITE_RGB;
case GEN6_BLT_MASK_32_HI: return GEN6_BLITTER_BR00_WRITE_A;
default: return 0;
}
}
 
static inline void
gen6_COLOR_BLT(struct ilo_builder *builder,
const struct gen6_blt_bo *dst, uint32_t pattern,
uint16_t width, uint16_t height, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 6 : 5;
const int cpp = gen6_blt_translate_value_cpp(value_mask);
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(width < gen6_blt_max_bytes_per_scanline);
assert(height < gen6_blt_max_scanlines);
/* offsets are naturally aligned and pitches are dword-aligned */
assert(dst->offset % cpp == 0 && dst->pitch % 4 == 0);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_BLITTER_CMD(COLOR_BLT) |
gen6_blt_translate_write_mask(write_mask) |
(cmd_len - 2);
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT |
gen6_blt_translate_value_mask(value_mask) |
dst->pitch;
dw[2] = height << 16 | width;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[5] = pattern;
 
ilo_builder_batch_reloc64(builder, pos + 3,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
} else {
dw[4] = pattern;
 
ilo_builder_batch_reloc(builder, pos + 3,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
}
}
 
static inline void
gen6_XY_COLOR_BLT(struct ilo_builder *builder,
const struct gen6_blt_xy_bo *dst, uint32_t pattern,
uint16_t width, uint16_t height, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 7 : 6;
const int cpp = gen6_blt_translate_value_cpp(value_mask);
int dst_align = 4, dst_pitch_shift = 0;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(width * cpp < gen6_blt_max_bytes_per_scanline);
assert(height < gen6_blt_max_scanlines);
/* INT16_MAX */
assert(dst->x + width <= 32767 && dst->y + height <= 32767);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_BLITTER_CMD(XY_COLOR_BLT) |
gen6_blt_translate_write_mask(write_mask) |
(cmd_len - 2);
 
if (dst->tiling != GEN6_TILING_NONE) {
dw[0] |= GEN6_BLITTER_BR00_DST_TILED;
 
assert(dst->tiling == GEN6_TILING_X || dst->tiling == GEN6_TILING_Y);
dst_align = (dst->tiling == GEN6_TILING_Y) ? 128 : 512;
/* in dwords when tiled */
dst_pitch_shift = 2;
}
 
assert(dst->offset % dst_align == 0 && dst->pitch % dst_align == 0);
 
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT |
gen6_blt_translate_value_mask(value_mask) |
dst->pitch >> dst_pitch_shift;
dw[2] = dst->y << 16 | dst->x;
dw[3] = (dst->y + height) << 16 | (dst->x + width);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[6] = pattern;
 
ilo_builder_batch_reloc64(builder, pos + 4,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
} else {
dw[5] = pattern;
 
ilo_builder_batch_reloc(builder, pos + 4,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
}
}
 
static inline void
gen6_SRC_COPY_BLT(struct ilo_builder *builder,
const struct gen6_blt_bo *dst,
const struct gen6_blt_bo *src,
uint16_t width, uint16_t height, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 6;
const int cpp = gen6_blt_translate_value_cpp(value_mask);
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(width < gen6_blt_max_bytes_per_scanline);
assert(height < gen6_blt_max_scanlines);
/* offsets are naturally aligned and pitches are dword-aligned */
assert(dst->offset % cpp == 0 && dst->pitch % 4 == 0);
assert(src->offset % cpp == 0 && src->pitch % 4 == 0);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_BLITTER_CMD(SRC_COPY_BLT) |
gen6_blt_translate_write_mask(write_mask) |
(cmd_len - 2);
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT |
gen6_blt_translate_value_mask(value_mask) |
dst->pitch;
dw[2] = height << 16 | width;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[5] = src->pitch;
 
ilo_builder_batch_reloc64(builder, pos + 3,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
ilo_builder_batch_reloc64(builder, pos + 6, src->bo, src->offset, 0);
} else {
dw[4] = src->pitch;
 
ilo_builder_batch_reloc(builder, pos + 3,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
ilo_builder_batch_reloc(builder, pos + 5, src->bo, src->offset, 0);
}
}
 
static inline void
gen6_XY_SRC_COPY_BLT(struct ilo_builder *builder,
const struct gen6_blt_xy_bo *dst,
const struct gen6_blt_xy_bo *src,
uint16_t width, uint16_t height, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 8;
const int cpp = gen6_blt_translate_value_cpp(value_mask);
int dst_align = 4, dst_pitch_shift = 0;
int src_align = 4, src_pitch_shift = 0;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(width * cpp < gen6_blt_max_bytes_per_scanline);
assert(height < gen6_blt_max_scanlines);
/* INT16_MAX */
assert(dst->x + width <= 32767 && dst->y + height <= 32767);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_BLITTER_CMD(XY_SRC_COPY_BLT) |
gen6_blt_translate_write_mask(write_mask) |
(cmd_len - 2);
 
if (dst->tiling != GEN6_TILING_NONE) {
dw[0] |= GEN6_BLITTER_BR00_DST_TILED;
 
assert(dst->tiling == GEN6_TILING_X || dst->tiling == GEN6_TILING_Y);
dst_align = (dst->tiling == GEN6_TILING_Y) ? 128 : 512;
/* in dwords when tiled */
dst_pitch_shift = 2;
}
 
if (src->tiling != GEN6_TILING_NONE) {
dw[0] |= GEN6_BLITTER_BR00_SRC_TILED;
 
assert(src->tiling == GEN6_TILING_X || src->tiling == GEN6_TILING_Y);
src_align = (src->tiling == GEN6_TILING_Y) ? 128 : 512;
/* in dwords when tiled */
src_pitch_shift = 2;
}
 
assert(dst->offset % dst_align == 0 && dst->pitch % dst_align == 0);
assert(src->offset % src_align == 0 && src->pitch % src_align == 0);
 
dw[1] = rop << GEN6_BLITTER_BR13_ROP__SHIFT |
gen6_blt_translate_value_mask(value_mask) |
dst->pitch >> dst_pitch_shift;
dw[2] = dst->y << 16 | dst->x;
dw[3] = (dst->y + height) << 16 | (dst->x + width);
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[6] = src->y << 16 | src->x;
dw[7] = src->pitch >> src_pitch_shift;
 
ilo_builder_batch_reloc64(builder, pos + 4,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
ilo_builder_batch_reloc64(builder, pos + 8, src->bo, src->offset, 0);
} else {
dw[5] = src->y << 16 | src->x;
dw[6] = src->pitch >> src_pitch_shift;
 
ilo_builder_batch_reloc(builder, pos + 4,
dst->bo, dst->offset, INTEL_RELOC_WRITE);
ilo_builder_batch_reloc(builder, pos + 7, src->bo, src->offset, 0);
}
}
 
#endif /* ILO_BUILDER_BLT_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_decode.c
0,0 → 1,685
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include <stdio.h>
#include <stdarg.h>
#include "genhw/genhw.h"
#include "../shader/toy_compiler.h"
 
#include "intel_winsys.h"
#include "ilo_builder.h"
 
static const uint32_t *
writer_pointer(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned offset)
{
const struct ilo_builder_writer *writer = &builder->writers[which];
return (const uint32_t *) ((const char *) writer->ptr + offset);
}
 
static uint32_t _util_printf_format(5, 6)
writer_dw(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
unsigned offset, unsigned dw_index,
const char *format, ...)
{
const uint32_t *dw = writer_pointer(builder, which, offset);
va_list ap;
char desc[16];
int len;
 
ilo_printf("0x%08x: 0x%08x: ",
offset + (dw_index << 2), dw[dw_index]);
 
va_start(ap, format);
len = vsnprintf(desc, sizeof(desc), format, ap);
va_end(ap);
 
if (len >= sizeof(desc)) {
len = sizeof(desc) - 1;
desc[len] = '\0';
}
 
if (desc[len - 1] == '\n') {
desc[len - 1] = '\0';
ilo_printf("%8s: \n", desc);
} else {
ilo_printf("%8s: ", desc);
}
 
return dw[dw_index];
}
 
static void
writer_decode_blob(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t);
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i += 4) {
const uint32_t *dw = writer_pointer(builder, which, offset);
 
writer_dw(builder, which, offset, 0, "BLOB%d", i / 4);
 
switch (count - i) {
case 1:
ilo_printf("(%10.4f, %10c, %10c, %10c) "
"(0x%08x, %10c, %10c, %10c)\n",
uif(dw[0]), 'X', 'X', 'X',
dw[0], 'X', 'X', 'X');
break;
case 2:
ilo_printf("(%10.4f, %10.4f, %10c, %10c) "
"(0x%08x, 0x%08x, %10c, %10c)\n",
uif(dw[0]), uif(dw[1]), 'X', 'X',
dw[0], dw[1], 'X', 'X');
break;
case 3:
ilo_printf("(%10.4f, %10.4f, %10.4f, %10c) "
"(0x%08x, 0x%08x, 0x%08x, %10c)\n",
uif(dw[0]), uif(dw[1]), uif(dw[2]), 'X',
dw[0], dw[1], dw[2], 'X');
break;
default:
ilo_printf("(%10.4f, %10.4f, %10.4f, %10.4f) "
"(0x%08x, 0x%08x, 0x%08x, 0x%08x)\n",
uif(dw[0]), uif(dw[1]), uif(dw[2]), uif(dw[3]),
dw[0], dw[1], dw[2], dw[3]);
break;
}
 
offset += state_size * 4;
}
}
 
static void
writer_decode_clip_viewport(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 4;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
uint32_t dw;
 
dw = writer_dw(builder, which, offset, 0, "CLIP VP%d", i);
ilo_printf("xmin = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 1, "CLIP VP%d", i);
ilo_printf("xmax = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 2, "CLIP VP%d", i);
ilo_printf("ymin = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 3, "CLIP VP%d", i);
ilo_printf("ymax = %f\n", uif(dw));
 
offset += state_size;
}
}
 
static void
writer_decode_sf_clip_viewport_gen7(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 16;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
uint32_t dw;
 
dw = writer_dw(builder, which, offset, 0, "SF_CLIP VP%d", i);
ilo_printf("m00 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 1, "SF_CLIP VP%d", i);
ilo_printf("m11 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 2, "SF_CLIP VP%d", i);
ilo_printf("m22 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 3, "SF_CLIP VP%d", i);
ilo_printf("m30 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 4, "SF_CLIP VP%d", i);
ilo_printf("m31 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 5, "SF_CLIP VP%d", i);
ilo_printf("m32 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 8, "SF_CLIP VP%d", i);
ilo_printf("guardband xmin = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 9, "SF_CLIP VP%d", i);
ilo_printf("guardband xmax = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 10, "SF_CLIP VP%d", i);
ilo_printf("guardband ymin = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 11, "SF_CLIP VP%d", i);
ilo_printf("guardband ymax = %f\n", uif(dw));
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw = writer_dw(builder, which, offset, 12, "SF_CLIP VP%d", i);
ilo_printf("extent xmin = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 13, "SF_CLIP VP%d", i);
ilo_printf("extent xmax = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 14, "SF_CLIP VP%d", i);
ilo_printf("extent ymin = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 15, "SF_CLIP VP%d", i);
ilo_printf("extent ymax = %f\n", uif(dw));
}
 
offset += state_size;
}
}
 
static void
writer_decode_sf_viewport_gen6(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 8;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
uint32_t dw;
 
dw = writer_dw(builder, which, offset, 0, "SF VP%d", i);
ilo_printf("m00 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 1, "SF VP%d", i);
ilo_printf("m11 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 2, "SF VP%d", i);
ilo_printf("m22 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 3, "SF VP%d", i);
ilo_printf("m30 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 4, "SF VP%d", i);
ilo_printf("m31 = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 5, "SF VP%d", i);
ilo_printf("m32 = %f\n", uif(dw));
 
offset += state_size;
}
}
 
static void
writer_decode_sf_viewport(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
writer_decode_sf_clip_viewport_gen7(builder, which, item);
else
writer_decode_sf_viewport_gen6(builder, which, item);
}
 
static void
writer_decode_scissor_rect(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 2;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
uint32_t dw;
 
dw = writer_dw(builder, which, offset, 0, "SCISSOR%d", i);
ilo_printf("xmin %d, ymin %d\n",
GEN_EXTRACT(dw, GEN6_SCISSOR_DW0_MIN_X),
GEN_EXTRACT(dw, GEN6_SCISSOR_DW0_MIN_Y));
 
dw = writer_dw(builder, which, offset, 1, "SCISSOR%d", i);
ilo_printf("xmax %d, ymax %d\n",
GEN_EXTRACT(dw, GEN6_SCISSOR_DW1_MAX_X),
GEN_EXTRACT(dw, GEN6_SCISSOR_DW1_MAX_Y));
 
offset += state_size;
}
}
 
static void
writer_decode_cc_viewport(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 2;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
uint32_t dw;
 
dw = writer_dw(builder, which, offset, 0, "CC VP%d", i);
ilo_printf("min_depth = %f\n", uif(dw));
 
dw = writer_dw(builder, which, offset, 1, "CC VP%d", i);
ilo_printf("max_depth = %f\n", uif(dw));
 
offset += state_size;
}
}
 
static void
writer_decode_color_calc(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
uint32_t dw;
 
dw = writer_dw(builder, which, item->offset, 0, "CC");
ilo_printf("alpha test format %s, round disable %d, "
"stencil ref %d, bf stencil ref %d\n",
GEN_EXTRACT(dw, GEN6_CC_DW0_ALPHATEST) ? "FLOAT32" : "UNORM8",
(bool) (dw & GEN6_CC_DW0_ROUND_DISABLE_DISABLE),
GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL0_REF),
GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL1_REF));
 
writer_dw(builder, which, item->offset, 1, "CC\n");
 
dw = writer_dw(builder, which, item->offset, 2, "CC");
ilo_printf("constant red %f\n", uif(dw));
 
dw = writer_dw(builder, which, item->offset, 3, "CC");
ilo_printf("constant green %f\n", uif(dw));
 
dw = writer_dw(builder, which, item->offset, 4, "CC");
ilo_printf("constant blue %f\n", uif(dw));
 
dw = writer_dw(builder, which, item->offset, 5, "CC");
ilo_printf("constant alpha %f\n", uif(dw));
}
 
static void
writer_decode_depth_stencil(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
uint32_t dw;
 
dw = writer_dw(builder, which, item->offset, 0, "D_S");
ilo_printf("stencil %sable, func %d, write %sable\n",
(dw & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) ? "en" : "dis",
GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL0_FUNC),
(dw & GEN6_ZS_DW0_STENCIL_WRITE_ENABLE) ? "en" : "dis");
 
dw = writer_dw(builder, which, item->offset, 1, "D_S");
ilo_printf("stencil test mask 0x%x, write mask 0x%x\n",
GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_VALUEMASK),
GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_WRITEMASK));
 
dw = writer_dw(builder, which, item->offset, 2, "D_S");
ilo_printf("depth test %sable, func %d, write %sable\n",
(dw & GEN6_ZS_DW2_DEPTH_TEST_ENABLE) ? "en" : "dis",
GEN_EXTRACT(dw, GEN6_ZS_DW2_DEPTH_FUNC),
(dw & GEN6_ZS_DW2_DEPTH_WRITE_ENABLE) ? "en" : "dis");
}
 
static void
writer_decode_blend(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 2;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
writer_dw(builder, which, offset, 0, "BLEND\n");
offset += 4;
}
 
for (i = 0; i < count; i++) {
writer_dw(builder, which, offset, 0, "BLEND%d\n", i);
writer_dw(builder, which, offset, 1, "BLEND%d\n", i);
 
offset += state_size;
}
}
 
static void
writer_decode_sampler(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 4;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
writer_dw(builder, which, offset, 0, "WM SAMP%d", i);
ilo_printf("filtering\n");
 
writer_dw(builder, which, offset, 1, "WM SAMP%d", i);
ilo_printf("wrapping, lod\n");
 
writer_dw(builder, which, offset, 2, "WM SAMP%d", i);
ilo_printf("default color pointer\n");
 
writer_dw(builder, which, offset, 3, "WM SAMP%d", i);
ilo_printf("chroma key, aniso\n");
 
offset += state_size;
}
}
 
static void
writer_decode_interface_descriptor(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 8;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
writer_dw(builder, which, offset, 0, "IDRT[%d]", i);
ilo_printf("kernel\n");
 
writer_dw(builder, which, offset, 1, "IDRT[%d]", i);
ilo_printf("spf, fp mode\n");
 
writer_dw(builder, which, offset, 2, "IDRT[%d]", i);
ilo_printf("sampler\n");
 
writer_dw(builder, which, offset, 3, "IDRT[%d]", i);
ilo_printf("binding table\n");
 
writer_dw(builder, which, offset, 4, "IDRT[%d]", i);
ilo_printf("curbe read len\n");
 
writer_dw(builder, which, offset, 5, "IDRT[%d]", i);
ilo_printf("rounding mode, slm size\n");
 
writer_dw(builder, which, offset, 6, "IDRT[%d]", i);
ilo_printf("cross-thread curbe read len\n");
 
writer_dw(builder, which, offset, 7, "IDRT[%d]", i);
ilo_printf("mbz\n");
 
offset += state_size;
}
}
 
static void
writer_decode_surface_gen7(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
uint32_t dw;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw = writer_dw(builder, which, item->offset, 0, "SURF");
ilo_printf("type 0x%x, format 0x%x, tiling %d, %s array\n",
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_TYPE),
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_FORMAT),
GEN_EXTRACT(dw, GEN8_SURFACE_DW0_TILING),
(dw & GEN7_SURFACE_DW0_IS_ARRAY) ? "is" : "not");
 
writer_dw(builder, which, item->offset, 1, "SURF");
ilo_printf("qpitch\n");
} else {
dw = writer_dw(builder, which, item->offset, 0, "SURF");
ilo_printf("type 0x%x, format 0x%x, tiling %d, %s array\n",
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_TYPE),
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_FORMAT),
GEN_EXTRACT(dw, GEN7_SURFACE_DW0_TILING),
(dw & GEN7_SURFACE_DW0_IS_ARRAY) ? "is" : "not");
 
writer_dw(builder, which, item->offset, 1, "SURF");
ilo_printf("offset\n");
}
 
dw = writer_dw(builder, which, item->offset, 2, "SURF");
ilo_printf("%dx%d size\n",
GEN_EXTRACT(dw, GEN7_SURFACE_DW2_WIDTH),
GEN_EXTRACT(dw, GEN7_SURFACE_DW2_HEIGHT));
 
dw = writer_dw(builder, which, item->offset, 3, "SURF");
ilo_printf("depth %d, pitch %d\n",
GEN_EXTRACT(dw, GEN7_SURFACE_DW3_DEPTH),
GEN_EXTRACT(dw, GEN7_SURFACE_DW3_PITCH));
 
dw = writer_dw(builder, which, item->offset, 4, "SURF");
ilo_printf("min array element %d, array extent %d\n",
GEN_EXTRACT(dw, GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT),
GEN_EXTRACT(dw, GEN7_SURFACE_DW4_RT_VIEW_EXTENT));
 
dw = writer_dw(builder, which, item->offset, 5, "SURF");
ilo_printf("mip base %d, mips %d, x,y offset: %d,%d\n",
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_MIN_LOD),
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_MIP_COUNT_LOD),
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_X_OFFSET),
GEN_EXTRACT(dw, GEN7_SURFACE_DW5_Y_OFFSET));
 
writer_dw(builder, which, item->offset, 6, "SURF\n");
writer_dw(builder, which, item->offset, 7, "SURF\n");
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
writer_dw(builder, which, item->offset, 8, "SURF\n");
writer_dw(builder, which, item->offset, 9, "SURF\n");
writer_dw(builder, which, item->offset, 10, "SURF\n");
writer_dw(builder, which, item->offset, 11, "SURF\n");
writer_dw(builder, which, item->offset, 12, "SURF\n");
}
}
 
static void
writer_decode_surface_gen6(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
uint32_t dw;
 
dw = writer_dw(builder, which, item->offset, 0, "SURF");
ilo_printf("type 0x%x, format 0x%x\n",
GEN_EXTRACT(dw, GEN6_SURFACE_DW0_TYPE),
GEN_EXTRACT(dw, GEN6_SURFACE_DW0_FORMAT));
 
writer_dw(builder, which, item->offset, 1, "SURF");
ilo_printf("offset\n");
 
dw = writer_dw(builder, which, item->offset, 2, "SURF");
ilo_printf("%dx%d size, %d mips\n",
GEN_EXTRACT(dw, GEN6_SURFACE_DW2_WIDTH),
GEN_EXTRACT(dw, GEN6_SURFACE_DW2_HEIGHT),
GEN_EXTRACT(dw, GEN6_SURFACE_DW2_MIP_COUNT_LOD));
 
dw = writer_dw(builder, which, item->offset, 3, "SURF");
ilo_printf("pitch %d, tiling %d\n",
GEN_EXTRACT(dw, GEN6_SURFACE_DW3_PITCH),
GEN_EXTRACT(dw, GEN6_SURFACE_DW3_TILING));
 
dw = writer_dw(builder, which, item->offset, 4, "SURF");
ilo_printf("mip base %d\n",
GEN_EXTRACT(dw, GEN6_SURFACE_DW4_MIN_LOD));
 
dw = writer_dw(builder, which, item->offset, 5, "SURF");
ilo_printf("x,y offset: %d,%d\n",
GEN_EXTRACT(dw, GEN6_SURFACE_DW5_X_OFFSET),
GEN_EXTRACT(dw, GEN6_SURFACE_DW5_Y_OFFSET));
}
 
static void
writer_decode_surface(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
writer_decode_surface_gen7(builder, which, item);
else
writer_decode_surface_gen6(builder, which, item);
}
 
static void
writer_decode_binding_table(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const unsigned state_size = sizeof(uint32_t) * 1;
const unsigned count = item->size / state_size;
unsigned offset = item->offset;
unsigned i;
 
for (i = 0; i < count; i++) {
writer_dw(builder, which, offset, 0, "BIND");
ilo_printf("BINDING_TABLE_STATE[%d]\n", i);
 
offset += state_size;
}
}
 
static void
writer_decode_kernel(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item)
{
const void *kernel;
 
ilo_printf("0x%08x:\n", item->offset);
kernel = (const void *) writer_pointer(builder, which, item->offset);
toy_compiler_disassemble(builder->dev, kernel, item->size, true);
}
 
static const struct {
void (*func)(const struct ilo_builder *builder,
enum ilo_builder_writer_type which,
const struct ilo_builder_item *item);
} writer_decode_table[ILO_BUILDER_ITEM_COUNT] = {
[ILO_BUILDER_ITEM_BLOB] = { writer_decode_blob },
[ILO_BUILDER_ITEM_CLIP_VIEWPORT] = { writer_decode_clip_viewport },
[ILO_BUILDER_ITEM_SF_VIEWPORT] = { writer_decode_sf_viewport },
[ILO_BUILDER_ITEM_SCISSOR_RECT] = { writer_decode_scissor_rect },
[ILO_BUILDER_ITEM_CC_VIEWPORT] = { writer_decode_cc_viewport },
[ILO_BUILDER_ITEM_COLOR_CALC] = { writer_decode_color_calc },
[ILO_BUILDER_ITEM_DEPTH_STENCIL] = { writer_decode_depth_stencil },
[ILO_BUILDER_ITEM_BLEND] = { writer_decode_blend },
[ILO_BUILDER_ITEM_SAMPLER] = { writer_decode_sampler },
[ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR] = { writer_decode_interface_descriptor },
[ILO_BUILDER_ITEM_SURFACE] = { writer_decode_surface },
[ILO_BUILDER_ITEM_BINDING_TABLE] = { writer_decode_binding_table },
[ILO_BUILDER_ITEM_KERNEL] = { writer_decode_kernel },
};
 
static void
ilo_builder_writer_decode_items(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
int i;
 
if (!writer->item_used)
return;
 
writer->ptr = intel_bo_map(writer->bo, false);
if (!writer->ptr)
return;
 
for (i = 0; i < writer->item_used; i++) {
const struct ilo_builder_item *item = &writer->items[i];
 
writer_decode_table[item->type].func(builder, which, item);
}
 
intel_bo_unmap(writer->bo);
writer->ptr = NULL;
}
 
static void
ilo_builder_writer_decode(struct ilo_builder *builder,
enum ilo_builder_writer_type which)
{
struct ilo_builder_writer *writer = &builder->writers[which];
 
assert(writer->bo && !writer->ptr);
 
switch (which) {
case ILO_BUILDER_WRITER_BATCH:
ilo_printf("decoding batch buffer: %d bytes\n", writer->used);
if (writer->used)
intel_winsys_decode_bo(builder->winsys, writer->bo, writer->used);
 
ilo_printf("decoding dynamic/surface buffer: %d states\n",
writer->item_used);
ilo_builder_writer_decode_items(builder, which);
break;
case ILO_BUILDER_WRITER_INSTRUCTION:
if (true) {
ilo_printf("skipping instruction buffer: %d kernels\n",
writer->item_used);
} else {
ilo_printf("decoding instruction buffer: %d kernels\n",
writer->item_used);
 
ilo_builder_writer_decode_items(builder, which);
}
break;
default:
break;
}
}
 
/**
* Decode the builder according to the recorded items. This can be called
* only after a successful ilo_builder_end().
*/
void
ilo_builder_decode(struct ilo_builder *builder)
{
int i;
 
assert(!builder->unrecoverable_error);
 
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
ilo_builder_writer_decode(builder, i);
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_media.h
0,0 → 1,277
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_MEDIA_H
#define ILO_BUILDER_MEDIA_H
 
#include "genhw/genhw.h"
#include "../ilo_shader.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_builder.h"
 
struct gen6_idrt_data {
const struct ilo_shader_state *cs;
 
uint32_t sampler_offset;
uint32_t binding_table_offset;
 
unsigned curbe_size;
unsigned thread_group_size;
};
 
static inline void
gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
unsigned curbe_alloc, bool use_slm)
{
const uint8_t cmd_len = 8;
const unsigned idrt_alloc =
((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
int max_threads;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
max_threads = builder->dev->thread_count;
 
curbe_alloc = align(curbe_alloc, 32);
assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
dw[1] = 0; /* scratch */
 
dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
 
dw[3] = 0;
 
dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
(curbe_alloc / 32);
 
dw[5] = 0;
dw[6] = 0;
dw[7] = 0;
}
 
static inline void
gen6_MEDIA_CURBE_LOAD(struct ilo_builder *builder,
uint32_t offset, unsigned size)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
assert(offset % 32 == 0 && size % 32 == 0);
/* GPU hangs if size is zero */
assert(size);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_CURBE_LOAD) | (cmd_len - 2);
dw[1] = 0;
dw[2] = size;
dw[3] = offset;
}
 
static inline void
gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(struct ilo_builder *builder,
uint32_t offset, unsigned size)
{
const uint8_t cmd_len = 4;
const unsigned idrt_alloc =
((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
assert(offset % 32 == 0 && size % 32 == 0);
assert(size && size <= idrt_alloc);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_INTERFACE_DESCRIPTOR_LOAD) |
(cmd_len - 2);
dw[1] = 0;
dw[2] = size;
dw[3] = offset;
}
 
static inline void
gen6_MEDIA_STATE_FLUSH(struct ilo_builder *builder)
{
const uint8_t cmd_len = 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_STATE_FLUSH) | (cmd_len - 2);
dw[1] = 0;
}
 
static inline void
gen7_GPGPU_WALKER(struct ilo_builder *builder,
const unsigned thread_group_offset[3],
const unsigned thread_group_dim[3],
unsigned thread_group_size,
unsigned simd_size)
{
const uint8_t cmd_len = 11;
uint32_t right_execmask, bottom_execmask;
unsigned thread_count;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
assert(simd_size == 16 || simd_size == 8);
 
thread_count = (thread_group_size + simd_size - 1) / simd_size;
assert(thread_count <= 64);
 
right_execmask = thread_group_size % simd_size;
if (right_execmask)
right_execmask = (1 << right_execmask) - 1;
else
right_execmask = (1 << simd_size) - 1;
 
bottom_execmask = 0xffffffff;
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_RENDER_CMD(MEDIA, GPGPU_WALKER) | (cmd_len - 2);
dw[1] = 0; /* always first IDRT */
 
dw[2] = (thread_count - 1) << GEN7_GPGPU_DW2_THREAD_MAX_X__SHIFT;
if (simd_size == 16)
dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD16;
else
dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD8;
 
dw[3] = thread_group_offset[0];
dw[4] = thread_group_dim[0];
dw[5] = thread_group_offset[1];
dw[6] = thread_group_dim[1];
dw[7] = thread_group_offset[2];
dw[8] = thread_group_dim[2];
 
dw[9] = right_execmask;
dw[10] = bottom_execmask;
}
 
static inline uint32_t
gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
const struct gen6_idrt_data *data,
int idrt_count)
{
/*
* From the Sandy Bridge PRM, volume 2 part 2, page 34:
*
* "(Interface Descriptor Total Length) This field must have the same
* alignment as the Interface Descriptor Data Start Address.
*
* It must be DQWord (32-byte) aligned..."
*
* From the Sandy Bridge PRM, volume 2 part 2, page 35:
*
* "(Interface Descriptor Data Start Address) Specifies the 32-byte
* aligned address of the Interface Descriptor data."
*/
const int state_align = 32;
const int state_len = (32 / 4) * idrt_count;
uint32_t state_offset, *dw;
int i;
 
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw);
 
for (i = 0; i < idrt_count; i++) {
const struct gen6_idrt_data *idrt = &data[i];
const struct ilo_shader_state *cs = idrt->cs;
unsigned sampler_count, bt_size, slm_size;
 
sampler_count =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
assert(sampler_count <= 16);
sampler_count = (sampler_count + 3) / 4;
 
bt_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
if (bt_size > 31)
bt_size = 31;
 
slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
 
assert(idrt->curbe_size / 32 <= 63);
 
dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
dw[1] = 0;
dw[2] = idrt->sampler_offset |
sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
dw[3] = idrt->binding_table_offset |
bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
 
dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
 
if (slm_size) {
assert(slm_size <= 64 * 1024);
slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
 
dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
idrt->thread_group_size <<
GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
}
} else {
dw[5] = 0;
}
 
dw[6] = 0;
dw[7] = 0;
 
dw += 8;
}
 
return state_offset;
}
 
#endif /* ILO_BUILDER_MEDIA_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_mi.h
0,0 → 1,220
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_MI_H
#define ILO_BUILDER_MI_H
 
#include "genhw/genhw.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_builder.h"
 
static inline void
gen6_MI_STORE_DATA_IMM(struct ilo_builder *builder,
struct intel_bo *bo, uint32_t bo_offset,
uint64_t val)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 6 : 5;
uint32_t reloc_flags = INTEL_RELOC_WRITE;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(bo_offset % 8 == 0);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_MI_CMD(MI_STORE_DATA_IMM) | (cmd_len - 2);
/* must use GGTT on GEN6 as in PIPE_CONTROL */
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) {
dw[0] |= GEN6_MI_STORE_DATA_IMM_DW0_USE_GGTT;
reloc_flags |= INTEL_RELOC_GGTT;
}
 
dw[1] = 0; /* MBZ */
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[4] = (uint32_t) val;
dw[5] = (uint32_t) (val >> 32);
 
ilo_builder_batch_reloc64(builder, pos + 2, bo, bo_offset, reloc_flags);
} else {
dw[3] = (uint32_t) val;
dw[4] = (uint32_t) (val >> 32);
 
ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, reloc_flags);
}
}
 
static inline void
gen6_MI_LOAD_REGISTER_IMM(struct ilo_builder *builder,
uint32_t reg, uint32_t val)
{
const uint8_t cmd_len = 3;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(reg % 4 == 0);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_MI_CMD(MI_LOAD_REGISTER_IMM) | (cmd_len - 2);
dw[1] = reg;
dw[2] = val;
}
 
static inline void
gen6_MI_STORE_REGISTER_MEM(struct ilo_builder *builder, uint32_t reg,
struct intel_bo *bo, uint32_t bo_offset)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 3;
uint32_t reloc_flags = INTEL_RELOC_WRITE;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
assert(reg % 4 == 0 && bo_offset % 4 == 0);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_MI_CMD(MI_STORE_REGISTER_MEM) | (cmd_len - 2);
dw[1] = reg;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
ilo_builder_batch_reloc64(builder, pos + 2, bo, bo_offset, reloc_flags);
} else {
/* must use GGTT on Gen6 as in PIPE_CONTROL */
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) {
dw[0] |= GEN6_MI_STORE_REGISTER_MEM_DW0_USE_GGTT;
reloc_flags |= INTEL_RELOC_GGTT;
}
 
ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, reloc_flags);
}
}
 
static inline void
gen6_MI_FLUSH_DW(struct ilo_builder *builder)
{
const uint8_t cmd_len = 4;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_MI_CMD(MI_FLUSH_DW) | (cmd_len - 2);
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
}
 
static inline void
gen6_MI_REPORT_PERF_COUNT(struct ilo_builder *builder,
struct intel_bo *bo, uint32_t bo_offset,
uint32_t report_id)
{
const uint8_t cmd_len = 3;
uint32_t reloc_flags = INTEL_RELOC_WRITE;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
assert(bo_offset % 64 == 0);
 
/* must use GGTT on GEN6 as in PIPE_CONTROL */
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) {
bo_offset |= GEN6_MI_REPORT_PERF_COUNT_DW1_USE_GGTT;
reloc_flags |= INTEL_RELOC_GGTT;
}
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_MI_CMD(MI_REPORT_PERF_COUNT) | (cmd_len - 2);
dw[2] = report_id;
 
ilo_builder_batch_reloc(builder, pos + 1, bo, bo_offset, reloc_flags);
}
 
static inline void
gen7_MI_LOAD_REGISTER_MEM(struct ilo_builder *builder, uint32_t reg,
struct intel_bo *bo, uint32_t bo_offset)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 3;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 7, 8);
 
assert(reg % 4 == 0 && bo_offset % 4 == 0);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN7_MI_CMD(MI_LOAD_REGISTER_MEM) | (cmd_len - 2);
dw[1] = reg;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
ilo_builder_batch_reloc64(builder, pos + 2, bo, bo_offset, 0);
else
ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, 0);
}
 
/**
* Add a MI_BATCH_BUFFER_END to the batch buffer. Pad with MI_NOOP if
* necessary.
*/
static inline void
gen6_mi_batch_buffer_end(struct ilo_builder *builder)
{
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 107:
*
* "The batch buffer must be QWord aligned and a multiple of QWords in
* length."
*/
const bool pad = !(builder->writers[ILO_BUILDER_WRITER_BATCH].used & 0x7);
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
if (pad) {
ilo_builder_batch_pointer(builder, 2, &dw);
dw[0] = GEN6_MI_CMD(MI_BATCH_BUFFER_END);
dw[1] = GEN6_MI_CMD(MI_NOOP);
} else {
ilo_builder_batch_pointer(builder, 1, &dw);
dw[0] = GEN6_MI_CMD(MI_BATCH_BUFFER_END);
}
}
 
#endif /* ILO_BUILDER_MI_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_builder_render.h
0,0 → 1,303
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BUILDER_RENDER_H
#define ILO_BUILDER_RENDER_H
 
#include "genhw/genhw.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
#include "ilo_builder.h"
 
static inline void
gen6_STATE_SIP(struct ilo_builder *builder, uint32_t sip)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 3 : 2;
uint32_t *dw;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(COMMON, STATE_SIP) | (cmd_len - 2);
dw[1] = sip;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
dw[2] = 0;
}
 
static inline void
gen6_PIPELINE_SELECT(struct ilo_builder *builder, int pipeline)
{
const uint8_t cmd_len = 1;
const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, PIPELINE_SELECT) |
pipeline;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
switch (pipeline) {
case GEN6_PIPELINE_SELECT_DW0_SELECT_3D:
case GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA:
break;
case GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU:
assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7));
break;
default:
assert(!"unknown pipeline");
break;
}
 
ilo_builder_batch_write(builder, cmd_len, &dw0);
}
 
static inline void
gen6_PIPE_CONTROL(struct ilo_builder *builder, uint32_t dw1,
struct intel_bo *bo, uint32_t bo_offset,
uint64_t imm)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 6 : 5;
uint32_t reloc_flags = INTEL_RELOC_WRITE;
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 8);
 
if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "1 of the following must also be set (when CS stall is set):
*
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)
* * Render Target Cache Flush Enable ([12] of DW1)
* * Notify Enable ([8] of DW1)"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 61:
*
* "One of the following must also be set (when CS stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)"
*/
uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
GEN6_PIPE_CONTROL_DEPTH_STALL;
 
/* post-sync op */
bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT |
GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
 
if (ilo_dev_gen(builder->dev) == ILO_GEN(6))
bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE;
 
assert(dw1 & bit_test);
}
 
if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "Following bits must be clear (when Depth Stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)"
*/
assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
}
 
switch (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) {
case GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT:
case GEN6_PIPE_CONTROL_WRITE_TIMESTAMP:
assert(!imm);
break;
default:
break;
}
 
assert(bo_offset % 8 == 0);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(3D, PIPE_CONTROL) | (cmd_len - 2);
dw[1] = dw1;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[4] = (uint32_t) imm;
dw[5] = (uint32_t) (imm >> 32);
 
if (bo) {
ilo_builder_batch_reloc64(builder, pos + 2,
bo, bo_offset, reloc_flags);
} else {
dw[2] = 0;
dw[3] = 0;
}
 
} else {
dw[3] = (uint32_t) imm;
dw[4] = (uint32_t) (imm >> 32);
 
if (bo) {
/*
* From the Sandy Bridge PRM, volume 1 part 3, page 19:
*
* "[DevSNB] PPGTT memory writes by MI_* (such as
* MI_STORE_DATA_IMM) and PIPE_CONTROL are not supported."
*/
if (ilo_dev_gen(builder->dev) == ILO_GEN(6)) {
bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT;
reloc_flags |= INTEL_RELOC_GGTT;
}
 
ilo_builder_batch_reloc(builder, pos + 2,
bo, bo_offset, reloc_flags);
} else {
dw[2] = 0;
}
}
}
 
static inline void
ilo_builder_batch_patch_sba(struct ilo_builder *builder)
{
const struct ilo_builder_writer *inst =
&builder->writers[ILO_BUILDER_WRITER_INSTRUCTION];
 
if (!builder->sba_instruction_pos)
return;
 
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
ilo_builder_batch_reloc64(builder, builder->sba_instruction_pos,
inst->bo,
builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
0);
} else {
ilo_builder_batch_reloc(builder, builder->sba_instruction_pos, inst->bo,
builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
0);
}
}
 
/**
* Add a STATE_BASE_ADDRESS to the batch buffer. The relocation entry for the
* instruction buffer is not added until ilo_builder_end() or next
* gen6_state_base_address().
*/
static inline void
gen6_state_base_address(struct ilo_builder *builder, bool init_all)
{
const uint8_t cmd_len = 10;
const struct ilo_builder_writer *bat =
&builder->writers[ILO_BUILDER_WRITER_BATCH];
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) | (cmd_len - 2);
dw[1] = builder->mocs << GEN6_SBA_MOCS__SHIFT |
builder->mocs << GEN6_SBA_DW1_GENERAL_STATELESS_MOCS__SHIFT |
init_all;
 
ilo_builder_batch_reloc(builder, pos + 2, bat->bo,
builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
0);
ilo_builder_batch_reloc(builder, pos + 3, bat->bo,
builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
0);
 
dw[4] = builder->mocs << GEN6_SBA_MOCS__SHIFT | init_all;
 
/*
* Since the instruction writer has WRITER_FLAG_APPEND set, it is tempting
* not to set Instruction Base Address. The problem is that we do not know
* if the bo has been or will be moved by the kernel. We need a relocation
* entry because of that.
*
* And since we also set WRITER_FLAG_GROW, we have to wait until
* ilo_builder_end(), when the final bo is known, to add the relocation
* entry.
*/
ilo_builder_batch_patch_sba(builder);
builder->sba_instruction_pos = pos + 5;
 
/* skip range checks */
dw[6] = init_all;
dw[7] = 0xfffff000 + init_all;
dw[8] = 0xfffff000 + init_all;
dw[9] = init_all;
}
 
static inline void
gen8_state_base_address(struct ilo_builder *builder, bool init_all)
{
const uint8_t cmd_len = 16;
const struct ilo_builder_writer *bat =
&builder->writers[ILO_BUILDER_WRITER_BATCH];
uint32_t *dw;
unsigned pos;
 
ILO_DEV_ASSERT(builder->dev, 8, 8);
 
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
dw[0] = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) | (cmd_len - 2);
dw[1] = builder->mocs << GEN8_SBA_MOCS__SHIFT | init_all;
dw[2] = 0;
dw[3] = builder->mocs << GEN8_SBA_DW3_STATELESS_MOCS__SHIFT;
ilo_builder_batch_reloc64(builder, pos + 4, bat->bo,
builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
0);
ilo_builder_batch_reloc64(builder, pos + 6, bat->bo,
builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
0);
dw[8] = builder->mocs << GEN8_SBA_MOCS__SHIFT | init_all;
dw[9] = 0;
 
ilo_builder_batch_patch_sba(builder);
builder->sba_instruction_pos = pos + 10;
 
/* skip range checks */
dw[12] = 0xfffff000 + init_all;
dw[13] = 0xfffff000 + init_all;
dw[14] = 0xfffff000 + init_all;
dw[15] = 0xfffff000 + init_all;
}
 
#endif /* ILO_BUILDER_RENDER_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_core.h
0,0 → 1,46
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_CORE_H
#define ILO_CORE_H
 
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
 
#include "util/u_debug.h"
#include "util/list.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_pointer.h"
 
#define ILO_PRIM_RECTANGLES PIPE_PRIM_MAX
#define ILO_PRIM_MAX (PIPE_PRIM_MAX + 1)
 
#endif /* ILO_CORE_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_debug.c
0,0 → 1,51
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "ilo_debug.h"
 
static const struct debug_named_value ilo_debug_flags[] = {
{ "batch", ILO_DEBUG_BATCH, "Dump batch/dynamic/surface/instruction buffers" },
{ "vs", ILO_DEBUG_VS, "Dump vertex shaders" },
{ "gs", ILO_DEBUG_GS, "Dump geometry shaders" },
{ "fs", ILO_DEBUG_FS, "Dump fragment shaders" },
{ "cs", ILO_DEBUG_CS, "Dump compute shaders" },
{ "draw", ILO_DEBUG_DRAW, "Show draw information" },
{ "submit", ILO_DEBUG_SUBMIT, "Show batch buffer submissions" },
{ "hang", ILO_DEBUG_HANG, "Detect GPU hangs" },
{ "nohw", ILO_DEBUG_NOHW, "Do not send commands to HW" },
{ "nocache", ILO_DEBUG_NOCACHE, "Always invalidate HW caches" },
{ "nohiz", ILO_DEBUG_NOHIZ, "Disable HiZ" },
DEBUG_NAMED_VALUE_END
};
 
int ilo_debug;
 
void
ilo_debug_init(const char *name)
{
ilo_debug = debug_get_flags_option(name, ilo_debug_flags, 0);
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_debug.h
0,0 → 1,103
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_DEBUG_H
#define ILO_DEBUG_H
 
#include "ilo_core.h"
 
/* enable debug flags affecting hot pathes only with debug builds */
#ifdef DEBUG
#define ILO_DEBUG_HOT 1
#else
#define ILO_DEBUG_HOT 0
#endif
 
enum ilo_debug {
ILO_DEBUG_BATCH = 1 << 0,
ILO_DEBUG_VS = 1 << 1,
ILO_DEBUG_GS = 1 << 2,
ILO_DEBUG_FS = 1 << 3,
ILO_DEBUG_CS = 1 << 4,
ILO_DEBUG_DRAW = ILO_DEBUG_HOT << 5,
ILO_DEBUG_SUBMIT = 1 << 6,
ILO_DEBUG_HANG = 1 << 7,
 
/* flags that affect the behaviors of the driver */
ILO_DEBUG_NOHW = 1 << 20,
ILO_DEBUG_NOCACHE = 1 << 21,
ILO_DEBUG_NOHIZ = 1 << 22,
};
 
extern int ilo_debug;
 
void
ilo_debug_init(const char *name);
 
/**
* Print a message, for dumping or debugging.
*/
static inline void _util_printf_format(1, 2)
ilo_printf(const char *format, ...)
{
va_list ap;
 
va_start(ap, format);
_debug_vprintf(format, ap);
va_end(ap);
}
 
/**
* Print a critical error.
*/
static inline void _util_printf_format(1, 2)
ilo_err(const char *format, ...)
{
va_list ap;
 
va_start(ap, format);
_debug_vprintf(format, ap);
va_end(ap);
}
 
/**
* Print a warning, silenced for release builds.
*/
static inline void _util_printf_format(1, 2)
ilo_warn(const char *format, ...)
{
#ifdef DEBUG
va_list ap;
 
va_start(ap, format);
_debug_vprintf(format, ap);
va_end(ap);
#else
#endif
}
 
#endif /* ILO_DEBUG_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_dev.c
0,0 → 1,186
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "genhw/genhw.h"
#include "intel_winsys.h"
 
#include "ilo_debug.h"
#include "ilo_dev.h"
 
/**
* Initialize the \p dev from \p winsys. \p winsys is considered owned by \p
* dev and will be destroyed in \p ilo_dev_cleanup().
*/
bool
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys)
{
const struct intel_winsys_info *info;
 
info = intel_winsys_get_info(winsys);
 
dev->winsys = winsys;
dev->devid = info->devid;
dev->aperture_total = info->aperture_total;
dev->aperture_mappable = info->aperture_mappable;
dev->has_llc = info->has_llc;
dev->has_address_swizzling = info->has_address_swizzling;
dev->has_logical_context = info->has_logical_context;
dev->has_ppgtt = info->has_ppgtt;
dev->has_timestamp = info->has_timestamp;
dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
 
if (!dev->has_logical_context) {
ilo_err("missing hardware logical context support\n");
return false;
}
 
/*
* PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
* writes on GEN6.
*
* From the Sandy Bridge PRM, volume 1 part 3, page 101:
*
* "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
* code is in a secure environment, independent of address space.
* Under this condition, this bit only specifies the address space
* (GGTT or PPGTT). All commands are executed "as-is""
*
* We need PPGTT to be enabled on GEN6 too.
*/
if (!dev->has_ppgtt) {
/* experiments show that it does not really matter... */
ilo_warn("PPGTT disabled\n");
}
 
if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) {
dev->gen_opaque = ILO_GEN(8);
dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1;
/* XXX random values */
if (dev->gt == 3) {
dev->eu_count = 48;
dev->thread_count = 336;
dev->urb_size = 384 * 1024;
} else if (dev->gt == 2) {
dev->eu_count = 24;
dev->thread_count = 168;
dev->urb_size = 384 * 1024;
} else {
dev->eu_count = 12;
dev->thread_count = 84;
dev->urb_size = 192 * 1024;
}
} else if (gen_is_hsw(info->devid)) {
/*
* From the Haswell PRM, volume 4, page 8:
*
* "Description GT3 GT2 GT1.5 GT1
* (...)
* EUs (Total) 40 20 12 10
* Threads (Total) 280 140 84 70
* (...)
* URB Size (max, within L3$) 512KB 256KB 256KB 128KB
*/
dev->gen_opaque = ILO_GEN(7.5);
dev->gt = gen_get_hsw_gt(info->devid);
if (dev->gt == 3) {
dev->eu_count = 40;
dev->thread_count = 280;
dev->urb_size = 512 * 1024;
} else if (dev->gt == 2) {
dev->eu_count = 20;
dev->thread_count = 140;
dev->urb_size = 256 * 1024;
} else {
dev->eu_count = 10;
dev->thread_count = 70;
dev->urb_size = 128 * 1024;
}
} else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 18:
*
* "Device # of EUs #Threads/EU
* Ivy Bridge (GT2) 16 8
* Ivy Bridge (GT1) 6 6"
*
* From the Ivy Bridge PRM, volume 4 part 2, page 17:
*
* "URB Size URB Rows URB Rows when SLM Enabled
* 128k 4096 2048
* 256k 8096 4096"
*/
dev->gen_opaque = ILO_GEN(7);
dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
if (dev->gt == 2) {
dev->eu_count = 16;
dev->thread_count = 128;
dev->urb_size = 256 * 1024;
} else {
dev->eu_count = 6;
dev->thread_count = 36;
dev->urb_size = 128 * 1024;
}
} else if (gen_is_snb(info->devid)) {
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 22:
*
* "Device # of EUs #Threads/EU
* SNB GT2 12 5
* SNB GT1 6 4"
*
* From the Sandy Bridge PRM, volume 4 part 2, page 18:
*
* "[DevSNB]: The GT1 product's URB provides 32KB of storage,
* arranged as 1024 256-bit rows. The GT2 product's URB provides
* 64KB of storage, arranged as 2048 256-bit rows. A row
* corresponds in size to an EU GRF register. Read/write access to
* the URB is generally supported on a row-granular basis."
*/
dev->gen_opaque = ILO_GEN(6);
dev->gt = gen_get_snb_gt(info->devid);
if (dev->gt == 2) {
dev->eu_count = 12;
dev->thread_count = 60;
dev->urb_size = 64 * 1024;
} else {
dev->eu_count = 6;
dev->thread_count = 24;
dev->urb_size = 32 * 1024;
}
} else {
ilo_err("unknown GPU generation\n");
return false;
}
 
return true;
}
 
void
ilo_dev_cleanup(struct ilo_dev *dev)
{
intel_winsys_destroy(dev->winsys);
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_dev.h
0,0 → 1,81
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_DEV_H
#define ILO_DEV_H
 
#include "ilo_core.h"
 
#define ILO_GEN(gen) ((int) (gen * 100))
 
#define ILO_DEV_ASSERT(dev, min_gen, max_gen) \
ilo_dev_assert(dev, ILO_GEN(min_gen), ILO_GEN(max_gen))
 
struct intel_winsys;
 
struct ilo_dev {
struct intel_winsys *winsys;
 
/* these mirror intel_winsys_info */
int devid;
size_t aperture_total;
size_t aperture_mappable;
bool has_llc;
bool has_address_swizzling;
bool has_logical_context;
bool has_ppgtt;
bool has_timestamp;
bool has_gen7_sol_reset;
 
/* use ilo_dev_gen() to access */
int gen_opaque;
 
int gt;
int eu_count;
int thread_count;
int urb_size;
};
 
bool
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys);
 
void
ilo_dev_cleanup(struct ilo_dev *dev);
 
static inline int
ilo_dev_gen(const struct ilo_dev *dev)
{
return dev->gen_opaque;
}
 
static inline void
ilo_dev_assert(const struct ilo_dev *dev, int min_opqaue, int max_opqaue)
{
assert(dev->gen_opaque >= min_opqaue && dev->gen_opaque <= max_opqaue);
}
 
#endif /* ILO_DEV_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_fence.h
0,0 → 1,73
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_FENCE_H
#define ILO_FENCE_H
 
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
 
struct ilo_fence {
struct intel_bo *seq_bo;
};
 
static inline void
ilo_fence_init(struct ilo_fence *fence, const struct ilo_dev *dev)
{
/* no-op */
}
 
static inline void
ilo_fence_cleanup(struct ilo_fence *fence)
{
intel_bo_unref(fence->seq_bo);
}
 
/**
* Set the sequence bo for waiting. The fence is considered signaled when
* there is no sequence bo.
*/
static inline void
ilo_fence_set_seq_bo(struct ilo_fence *fence, struct intel_bo *seq_bo)
{
intel_bo_unref(fence->seq_bo);
fence->seq_bo = intel_bo_ref(seq_bo);
}
 
/**
* Wait for the fence to be signaled or until \p timeout nanoseconds has
* passed. It will wait indefinitely when \p timeout is negative.
*/
static inline bool
ilo_fence_wait(struct ilo_fence *fence, int64_t timeout)
{
return (!fence->seq_bo || intel_bo_wait(fence->seq_bo, timeout) == 0);
}
 
#endif /* ILO_FENCE_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_format.c
0,0 → 1,755
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "genhw/genhw.h"
#include "ilo_format.h"
 
struct ilo_vf_cap {
int vertex_element;
};
 
struct ilo_sol_cap {
int buffer;
};
 
struct ilo_sampler_cap {
int sampling;
int filtering;
int shadow_map;
int chroma_key;
};
 
struct ilo_dp_cap {
int rt_write;
int rt_write_blending;
int typed_write;
int media_color_processing;
};
 
/*
* This table is based on:
*
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97
* - the Ivy Bridge PRM, volume 2 part 1, page 97-99
* - the Haswell PRM, volume 7, page 467-470
*/
static const struct ilo_vf_cap ilo_vf_caps[] = {
#define CAP(vertex_element) { ILO_GEN(vertex_element) }
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_UNORM] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_SNORM] = CAP( 1),
[GEN6_FORMAT_R64G64_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_SSCALED] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_USCALED] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_SFIXED] = CAP(7.5),
[GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32B32_SINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32_UINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32_UNORM] = CAP( 1),
[GEN6_FORMAT_R32G32B32_SNORM] = CAP( 1),
[GEN6_FORMAT_R32G32B32_SSCALED] = CAP( 1),
[GEN6_FORMAT_R32G32B32_USCALED] = CAP( 1),
[GEN6_FORMAT_R32G32B32_SFIXED] = CAP(7.5),
[GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1),
[GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1),
[GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1),
[GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1),
[GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32_SINT] = CAP( 1),
[GEN6_FORMAT_R32G32_UINT] = CAP( 1),
[GEN6_FORMAT_R32G32_UNORM] = CAP( 1),
[GEN6_FORMAT_R32G32_SNORM] = CAP( 1),
[GEN6_FORMAT_R64_FLOAT] = CAP( 1),
[GEN6_FORMAT_R16G16B16A16_SSCALED] = CAP( 1),
[GEN6_FORMAT_R16G16B16A16_USCALED] = CAP( 1),
[GEN6_FORMAT_R32G32_SSCALED] = CAP( 1),
[GEN6_FORMAT_R32G32_USCALED] = CAP( 1),
[GEN6_FORMAT_R32G32_SFIXED] = CAP(7.5),
[GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1),
[GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1),
[GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1),
[GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1),
[GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1),
[GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1),
[GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1),
[GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1),
[GEN6_FORMAT_R16G16_UNORM] = CAP( 1),
[GEN6_FORMAT_R16G16_SNORM] = CAP( 1),
[GEN6_FORMAT_R16G16_SINT] = CAP( 1),
[GEN6_FORMAT_R16G16_UINT] = CAP( 1),
[GEN6_FORMAT_R16G16_FLOAT] = CAP( 1),
[GEN6_FORMAT_B10G10R10A2_UNORM] = CAP(7.5),
[GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32_SINT] = CAP( 1),
[GEN6_FORMAT_R32_UINT] = CAP( 1),
[GEN6_FORMAT_R32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32_UNORM] = CAP( 1),
[GEN6_FORMAT_R32_SNORM] = CAP( 1),
[GEN6_FORMAT_R10G10B10X2_USCALED] = CAP( 1),
[GEN6_FORMAT_R8G8B8A8_SSCALED] = CAP( 1),
[GEN6_FORMAT_R8G8B8A8_USCALED] = CAP( 1),
[GEN6_FORMAT_R16G16_SSCALED] = CAP( 1),
[GEN6_FORMAT_R16G16_USCALED] = CAP( 1),
[GEN6_FORMAT_R32_SSCALED] = CAP( 1),
[GEN6_FORMAT_R32_USCALED] = CAP( 1),
[GEN6_FORMAT_R8G8_UNORM] = CAP( 1),
[GEN6_FORMAT_R8G8_SNORM] = CAP( 1),
[GEN6_FORMAT_R8G8_SINT] = CAP( 1),
[GEN6_FORMAT_R8G8_UINT] = CAP( 1),
[GEN6_FORMAT_R16_UNORM] = CAP( 1),
[GEN6_FORMAT_R16_SNORM] = CAP( 1),
[GEN6_FORMAT_R16_SINT] = CAP( 1),
[GEN6_FORMAT_R16_UINT] = CAP( 1),
[GEN6_FORMAT_R16_FLOAT] = CAP( 1),
[GEN6_FORMAT_R8G8_SSCALED] = CAP( 1),
[GEN6_FORMAT_R8G8_USCALED] = CAP( 1),
[GEN6_FORMAT_R16_SSCALED] = CAP( 1),
[GEN6_FORMAT_R16_USCALED] = CAP( 1),
[GEN6_FORMAT_R8_UNORM] = CAP( 1),
[GEN6_FORMAT_R8_SNORM] = CAP( 1),
[GEN6_FORMAT_R8_SINT] = CAP( 1),
[GEN6_FORMAT_R8_UINT] = CAP( 1),
[GEN6_FORMAT_R8_SSCALED] = CAP( 1),
[GEN6_FORMAT_R8_USCALED] = CAP( 1),
[GEN6_FORMAT_R8G8B8_UNORM] = CAP( 1),
[GEN6_FORMAT_R8G8B8_SNORM] = CAP( 1),
[GEN6_FORMAT_R8G8B8_SSCALED] = CAP( 1),
[GEN6_FORMAT_R8G8B8_USCALED] = CAP( 1),
[GEN6_FORMAT_R64G64B64A64_FLOAT] = CAP( 1),
[GEN6_FORMAT_R64G64B64_FLOAT] = CAP( 1),
[GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 6),
[GEN6_FORMAT_R16G16B16_UNORM] = CAP( 1),
[GEN6_FORMAT_R16G16B16_SNORM] = CAP( 1),
[GEN6_FORMAT_R16G16B16_SSCALED] = CAP( 1),
[GEN6_FORMAT_R16G16B16_USCALED] = CAP( 1),
[GEN6_FORMAT_R16G16B16_UINT] = CAP(7.5),
[GEN6_FORMAT_R16G16B16_SINT] = CAP(7.5),
[GEN6_FORMAT_R32_SFIXED] = CAP(7.5),
[GEN6_FORMAT_R10G10B10A2_SNORM] = CAP(7.5),
[GEN6_FORMAT_R10G10B10A2_USCALED] = CAP(7.5),
[GEN6_FORMAT_R10G10B10A2_SSCALED] = CAP(7.5),
[GEN6_FORMAT_R10G10B10A2_SINT] = CAP(7.5),
[GEN6_FORMAT_B10G10R10A2_SNORM] = CAP(7.5),
[GEN6_FORMAT_B10G10R10A2_USCALED] = CAP(7.5),
[GEN6_FORMAT_B10G10R10A2_SSCALED] = CAP(7.5),
[GEN6_FORMAT_B10G10R10A2_UINT] = CAP(7.5),
[GEN6_FORMAT_B10G10R10A2_SINT] = CAP(7.5),
[GEN6_FORMAT_R8G8B8_UINT] = CAP(7.5),
[GEN6_FORMAT_R8G8B8_SINT] = CAP(7.5),
#undef CAP
};
 
/*
* This table is based on:
*
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97
* - the Ivy Bridge PRM, volume 2 part 1, page 195
* - the Haswell PRM, volume 7, page 535
*/
static const struct ilo_sol_cap ilo_sol_caps[] = {
#define CAP(buffer) { ILO_GEN(buffer) }
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32B32_SINT] = CAP( 1),
[GEN6_FORMAT_R32G32B32_UINT] = CAP( 1),
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1),
[GEN6_FORMAT_R32G32_SINT] = CAP( 1),
[GEN6_FORMAT_R32G32_UINT] = CAP( 1),
[GEN6_FORMAT_R32_SINT] = CAP( 1),
[GEN6_FORMAT_R32_UINT] = CAP( 1),
[GEN6_FORMAT_R32_FLOAT] = CAP( 1),
#undef CAP
};
 
/*
* This table is based on:
*
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97
* - the Ivy Bridge PRM, volume 4 part 1, page 84-87
*/
static const struct ilo_sampler_cap ilo_sampler_caps[] = {
#define CAP(sampling, filtering, shadow_map, chroma_key) \
{ ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) }
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32G32B32X32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_R32G32B32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_R32G32B32_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32G32B32_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_X32_TYPELESS_G8X24_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_L32A32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_R16G16B16X16_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16G16B16X16_FLOAT] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_A32X32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_L32X32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_I32X32_FLOAT] = CAP( 1, 5, 0, 0),
[GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R32_FLOAT] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_R24_UNORM_X8_TYPELESS] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_X24_TYPELESS_G8_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_L16A16_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_I24X8_UNORM] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_L24X8_UNORM] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_A24X8_UNORM] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_I32_FLOAT] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_L32_FLOAT] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_A32_FLOAT] = CAP( 1, 5, 1, 0),
[GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_B8G8R8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8B8X8_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8B8X8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R9G9B9E5_SHAREDEXP] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B10G10R10X2_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_L16A16_FLOAT] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16_UNORM] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_R16_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_A8P8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0),
[GEN6_FORMAT_A8P8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0),
[GEN6_FORMAT_I16_UNORM] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_L16_UNORM] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_A16_UNORM] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_L8A8_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_I16_FLOAT] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_L16_FLOAT] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_A16_FLOAT] = CAP( 1, 1, 1, 0),
[GEN6_FORMAT_L8A8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_R5G5_SNORM_B6_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_P8A8_UNORM_PALETTE0] = CAP( 5, 5, 0, 0),
[GEN6_FORMAT_P8A8_UNORM_PALETTE1] = CAP( 5, 5, 0, 0),
[GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 0, 4.5),
[GEN6_FORMAT_R8_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_I8_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_L8_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_P4A4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_A4P4_UNORM_PALETTE0] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_P8_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_L8_UNORM_SRGB] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_P8_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_P4A4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_A4P4_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_DXT1_RGB_SRGB] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_R1_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_P2_UNORM_PALETTE0] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_P2_UNORM_PALETTE1] = CAP(4.5, 4.5, 0, 0),
[GEN6_FORMAT_BC1_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_BC2_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_BC3_UNORM] = CAP( 1, 1, 0, 1),
[GEN6_FORMAT_BC4_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_BC5_UNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_BC1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_BC2_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_BC3_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_MONO8] = CAP( 1, 0, 0, 0),
[GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_DXT1_RGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_FXT1] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_BC4_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_BC5_SNORM] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R16G16B16_FLOAT] = CAP( 5, 5, 0, 0),
[GEN6_FORMAT_BC6H_SF16] = CAP( 7, 7, 0, 0),
[GEN6_FORMAT_BC7_UNORM] = CAP( 7, 7, 0, 0),
[GEN6_FORMAT_BC7_UNORM_SRGB] = CAP( 7, 7, 0, 0),
[GEN6_FORMAT_BC6H_UF16] = CAP( 7, 7, 0, 0),
#undef CAP
};
 
/*
* This table is based on:
*
* - the Sandy Bridge PRM, volume 4 part 1, page 88-97
* - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278
* - the Haswell PRM, volume 7, page 262-264
*/
static const struct ilo_dp_cap ilo_dp_caps[] = {
#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \
{ ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) }
[GEN6_FORMAT_R32G32B32A32_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_R32G32B32A32_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R32G32B32A32_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16G16B16A16_UNORM] = CAP( 1, 4.5, 7, 6),
[GEN6_FORMAT_R16G16B16A16_SNORM] = CAP( 1, 6, 7, 0),
[GEN6_FORMAT_R16G16B16A16_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16G16B16A16_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16G16B16A16_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_R32G32_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_R32G32_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R32G32_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_B8G8R8A8_UNORM] = CAP( 1, 1, 7, 6),
[GEN6_FORMAT_B8G8R8A8_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R10G10B10A2_UNORM] = CAP( 1, 1, 7, 6),
[GEN6_FORMAT_R10G10B10A2_UNORM_SRGB] = CAP( 0, 0, 0, 6),
[GEN6_FORMAT_R10G10B10A2_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R8G8B8A8_UNORM] = CAP( 1, 1, 7, 6),
[GEN6_FORMAT_R8G8B8A8_UNORM_SRGB] = CAP( 1, 1, 0, 6),
[GEN6_FORMAT_R8G8B8A8_SNORM] = CAP( 1, 6, 7, 0),
[GEN6_FORMAT_R8G8B8A8_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R8G8B8A8_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16G16_UNORM] = CAP( 1, 4.5, 7, 0),
[GEN6_FORMAT_R16G16_SNORM] = CAP( 1, 6, 7, 0),
[GEN6_FORMAT_R16G16_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16G16_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16G16_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B10G10R10A2_UNORM] = CAP( 1, 1, 7, 6),
[GEN6_FORMAT_B10G10R10A2_UNORM_SRGB] = CAP( 1, 1, 0, 6),
[GEN6_FORMAT_R11G11B10_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_R32_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R32_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R32_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B8G8R8X8_UNORM] = CAP( 0, 0, 0, 6),
[GEN6_FORMAT_B5G6R5_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B5G6R5_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B5G5R5A1_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B5G5R5A1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_B4G4R4A4_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B4G4R4A4_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8G8_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_R8G8_SNORM] = CAP( 1, 6, 7, 0),
[GEN6_FORMAT_R8G8_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R8G8_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16_UNORM] = CAP( 1, 4.5, 7, 7),
[GEN6_FORMAT_R16_SNORM] = CAP( 1, 6, 7, 0),
[GEN6_FORMAT_R16_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R16_FLOAT] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B5G5R5X1_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_B5G5R5X1_UNORM_SRGB] = CAP( 1, 1, 0, 0),
[GEN6_FORMAT_R8_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_R8_SNORM] = CAP( 1, 6, 7, 0),
[GEN6_FORMAT_R8_SINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_R8_UINT] = CAP( 1, 0, 7, 0),
[GEN6_FORMAT_A8_UNORM] = CAP( 1, 1, 7, 0),
[GEN6_FORMAT_YCRCB_NORMAL] = CAP( 1, 0, 0, 6),
[GEN6_FORMAT_YCRCB_SWAPUVY] = CAP( 1, 0, 0, 6),
[GEN6_FORMAT_YCRCB_SWAPUV] = CAP( 1, 0, 0, 6),
[GEN6_FORMAT_YCRCB_SWAPY] = CAP( 1, 0, 0, 6),
#undef CAP
};
 
bool
ilo_format_support_vb(const struct ilo_dev *dev,
enum pipe_format format)
{
const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
const struct ilo_vf_cap *cap = (idx >= 0 && idx < Elements(ilo_vf_caps)) ?
&ilo_vf_caps[idx] : NULL;
 
return (cap && cap->vertex_element &&
ilo_dev_gen(dev) >= cap->vertex_element);
}
 
bool
ilo_format_support_sol(const struct ilo_dev *dev,
enum pipe_format format)
{
const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT);
const struct ilo_sol_cap *cap = (idx >= 0 && idx < Elements(ilo_sol_caps)) ?
&ilo_sol_caps[idx] : NULL;
 
return (cap && cap->buffer && ilo_dev_gen(dev) >= cap->buffer);
}
 
bool
ilo_format_support_sampler(const struct ilo_dev *dev,
enum pipe_format format)
{
const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
const struct ilo_sampler_cap *cap = (idx >= 0 &&
idx < Elements(ilo_sampler_caps)) ? &ilo_sampler_caps[idx] : NULL;
 
if (!cap || !cap->sampling)
return false;
 
assert(!cap->filtering || cap->filtering >= cap->sampling);
 
if (util_format_is_pure_integer(format))
return (ilo_dev_gen(dev) >= cap->sampling);
else if (cap->filtering)
return (ilo_dev_gen(dev) >= cap->filtering);
else
return false;
}
 
bool
ilo_format_support_rt(const struct ilo_dev *dev,
enum pipe_format format)
{
const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
const struct ilo_dp_cap *cap = (idx >= 0 && idx < Elements(ilo_dp_caps)) ?
&ilo_dp_caps[idx] : NULL;
 
if (!cap || !cap->rt_write)
return false;
 
assert(!cap->rt_write_blending || cap->rt_write_blending >= cap->rt_write);
 
if (util_format_is_pure_integer(format))
return (ilo_dev_gen(dev) >= cap->rt_write);
else if (cap->rt_write_blending)
return (ilo_dev_gen(dev) >= cap->rt_write_blending);
else
return false;
}
 
bool
ilo_format_support_zs(const struct ilo_dev *dev,
enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return true;
case PIPE_FORMAT_S8_UINT:
/* TODO separate stencil */
default:
return false;
}
}
 
/**
* Translate a color (non-depth/stencil) pipe format to the matching hardware
* format. Return -1 on errors.
*/
int
ilo_format_translate_color(const struct ilo_dev *dev,
enum pipe_format format)
{
static const int format_mapping[PIPE_FORMAT_COUNT] = {
[PIPE_FORMAT_NONE] = 0,
[PIPE_FORMAT_B8G8R8A8_UNORM] = GEN6_FORMAT_B8G8R8A8_UNORM,
[PIPE_FORMAT_B8G8R8X8_UNORM] = GEN6_FORMAT_B8G8R8X8_UNORM,
[PIPE_FORMAT_A8R8G8B8_UNORM] = 0,
[PIPE_FORMAT_X8R8G8B8_UNORM] = 0,
[PIPE_FORMAT_B5G5R5A1_UNORM] = GEN6_FORMAT_B5G5R5A1_UNORM,
[PIPE_FORMAT_B4G4R4A4_UNORM] = GEN6_FORMAT_B4G4R4A4_UNORM,
[PIPE_FORMAT_B5G6R5_UNORM] = GEN6_FORMAT_B5G6R5_UNORM,
[PIPE_FORMAT_R10G10B10A2_UNORM] = GEN6_FORMAT_R10G10B10A2_UNORM,
[PIPE_FORMAT_L8_UNORM] = GEN6_FORMAT_L8_UNORM,
[PIPE_FORMAT_A8_UNORM] = GEN6_FORMAT_A8_UNORM,
[PIPE_FORMAT_I8_UNORM] = GEN6_FORMAT_I8_UNORM,
[PIPE_FORMAT_L8A8_UNORM] = GEN6_FORMAT_L8A8_UNORM,
[PIPE_FORMAT_L16_UNORM] = GEN6_FORMAT_L16_UNORM,
[PIPE_FORMAT_UYVY] = GEN6_FORMAT_YCRCB_SWAPUVY,
[PIPE_FORMAT_YUYV] = GEN6_FORMAT_YCRCB_NORMAL,
[PIPE_FORMAT_Z16_UNORM] = 0,
[PIPE_FORMAT_Z32_UNORM] = 0,
[PIPE_FORMAT_Z32_FLOAT] = 0,
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = 0,
[PIPE_FORMAT_S8_UINT_Z24_UNORM] = 0,
[PIPE_FORMAT_Z24X8_UNORM] = 0,
[PIPE_FORMAT_X8Z24_UNORM] = 0,
[PIPE_FORMAT_S8_UINT] = 0,
[PIPE_FORMAT_R64_FLOAT] = GEN6_FORMAT_R64_FLOAT,
[PIPE_FORMAT_R64G64_FLOAT] = GEN6_FORMAT_R64G64_FLOAT,
[PIPE_FORMAT_R64G64B64_FLOAT] = GEN6_FORMAT_R64G64B64_FLOAT,
[PIPE_FORMAT_R64G64B64A64_FLOAT] = GEN6_FORMAT_R64G64B64A64_FLOAT,
[PIPE_FORMAT_R32_FLOAT] = GEN6_FORMAT_R32_FLOAT,
[PIPE_FORMAT_R32G32_FLOAT] = GEN6_FORMAT_R32G32_FLOAT,
[PIPE_FORMAT_R32G32B32_FLOAT] = GEN6_FORMAT_R32G32B32_FLOAT,
[PIPE_FORMAT_R32G32B32A32_FLOAT] = GEN6_FORMAT_R32G32B32A32_FLOAT,
[PIPE_FORMAT_R32_UNORM] = GEN6_FORMAT_R32_UNORM,
[PIPE_FORMAT_R32G32_UNORM] = GEN6_FORMAT_R32G32_UNORM,
[PIPE_FORMAT_R32G32B32_UNORM] = GEN6_FORMAT_R32G32B32_UNORM,
[PIPE_FORMAT_R32G32B32A32_UNORM] = GEN6_FORMAT_R32G32B32A32_UNORM,
[PIPE_FORMAT_R32_USCALED] = GEN6_FORMAT_R32_USCALED,
[PIPE_FORMAT_R32G32_USCALED] = GEN6_FORMAT_R32G32_USCALED,
[PIPE_FORMAT_R32G32B32_USCALED] = GEN6_FORMAT_R32G32B32_USCALED,
[PIPE_FORMAT_R32G32B32A32_USCALED] = GEN6_FORMAT_R32G32B32A32_USCALED,
[PIPE_FORMAT_R32_SNORM] = GEN6_FORMAT_R32_SNORM,
[PIPE_FORMAT_R32G32_SNORM] = GEN6_FORMAT_R32G32_SNORM,
[PIPE_FORMAT_R32G32B32_SNORM] = GEN6_FORMAT_R32G32B32_SNORM,
[PIPE_FORMAT_R32G32B32A32_SNORM] = GEN6_FORMAT_R32G32B32A32_SNORM,
[PIPE_FORMAT_R32_SSCALED] = GEN6_FORMAT_R32_SSCALED,
[PIPE_FORMAT_R32G32_SSCALED] = GEN6_FORMAT_R32G32_SSCALED,
[PIPE_FORMAT_R32G32B32_SSCALED] = GEN6_FORMAT_R32G32B32_SSCALED,
[PIPE_FORMAT_R32G32B32A32_SSCALED] = GEN6_FORMAT_R32G32B32A32_SSCALED,
[PIPE_FORMAT_R16_UNORM] = GEN6_FORMAT_R16_UNORM,
[PIPE_FORMAT_R16G16_UNORM] = GEN6_FORMAT_R16G16_UNORM,
[PIPE_FORMAT_R16G16B16_UNORM] = GEN6_FORMAT_R16G16B16_UNORM,
[PIPE_FORMAT_R16G16B16A16_UNORM] = GEN6_FORMAT_R16G16B16A16_UNORM,
[PIPE_FORMAT_R16_USCALED] = GEN6_FORMAT_R16_USCALED,
[PIPE_FORMAT_R16G16_USCALED] = GEN6_FORMAT_R16G16_USCALED,
[PIPE_FORMAT_R16G16B16_USCALED] = GEN6_FORMAT_R16G16B16_USCALED,
[PIPE_FORMAT_R16G16B16A16_USCALED] = GEN6_FORMAT_R16G16B16A16_USCALED,
[PIPE_FORMAT_R16_SNORM] = GEN6_FORMAT_R16_SNORM,
[PIPE_FORMAT_R16G16_SNORM] = GEN6_FORMAT_R16G16_SNORM,
[PIPE_FORMAT_R16G16B16_SNORM] = GEN6_FORMAT_R16G16B16_SNORM,
[PIPE_FORMAT_R16G16B16A16_SNORM] = GEN6_FORMAT_R16G16B16A16_SNORM,
[PIPE_FORMAT_R16_SSCALED] = GEN6_FORMAT_R16_SSCALED,
[PIPE_FORMAT_R16G16_SSCALED] = GEN6_FORMAT_R16G16_SSCALED,
[PIPE_FORMAT_R16G16B16_SSCALED] = GEN6_FORMAT_R16G16B16_SSCALED,
[PIPE_FORMAT_R16G16B16A16_SSCALED] = GEN6_FORMAT_R16G16B16A16_SSCALED,
[PIPE_FORMAT_R8_UNORM] = GEN6_FORMAT_R8_UNORM,
[PIPE_FORMAT_R8G8_UNORM] = GEN6_FORMAT_R8G8_UNORM,
[PIPE_FORMAT_R8G8B8_UNORM] = GEN6_FORMAT_R8G8B8_UNORM,
[PIPE_FORMAT_R8G8B8A8_UNORM] = GEN6_FORMAT_R8G8B8A8_UNORM,
[PIPE_FORMAT_X8B8G8R8_UNORM] = 0,
[PIPE_FORMAT_R8_USCALED] = GEN6_FORMAT_R8_USCALED,
[PIPE_FORMAT_R8G8_USCALED] = GEN6_FORMAT_R8G8_USCALED,
[PIPE_FORMAT_R8G8B8_USCALED] = GEN6_FORMAT_R8G8B8_USCALED,
[PIPE_FORMAT_R8G8B8A8_USCALED] = GEN6_FORMAT_R8G8B8A8_USCALED,
[PIPE_FORMAT_R8_SNORM] = GEN6_FORMAT_R8_SNORM,
[PIPE_FORMAT_R8G8_SNORM] = GEN6_FORMAT_R8G8_SNORM,
[PIPE_FORMAT_R8G8B8_SNORM] = GEN6_FORMAT_R8G8B8_SNORM,
[PIPE_FORMAT_R8G8B8A8_SNORM] = GEN6_FORMAT_R8G8B8A8_SNORM,
[PIPE_FORMAT_R8_SSCALED] = GEN6_FORMAT_R8_SSCALED,
[PIPE_FORMAT_R8G8_SSCALED] = GEN6_FORMAT_R8G8_SSCALED,
[PIPE_FORMAT_R8G8B8_SSCALED] = GEN6_FORMAT_R8G8B8_SSCALED,
[PIPE_FORMAT_R8G8B8A8_SSCALED] = GEN6_FORMAT_R8G8B8A8_SSCALED,
[PIPE_FORMAT_R32_FIXED] = GEN6_FORMAT_R32_SFIXED,
[PIPE_FORMAT_R32G32_FIXED] = GEN6_FORMAT_R32G32_SFIXED,
[PIPE_FORMAT_R32G32B32_FIXED] = GEN6_FORMAT_R32G32B32_SFIXED,
[PIPE_FORMAT_R32G32B32A32_FIXED] = GEN6_FORMAT_R32G32B32A32_SFIXED,
[PIPE_FORMAT_R16_FLOAT] = GEN6_FORMAT_R16_FLOAT,
[PIPE_FORMAT_R16G16_FLOAT] = GEN6_FORMAT_R16G16_FLOAT,
[PIPE_FORMAT_R16G16B16_FLOAT] = GEN6_FORMAT_R16G16B16_FLOAT,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = GEN6_FORMAT_R16G16B16A16_FLOAT,
[PIPE_FORMAT_L8_SRGB] = GEN6_FORMAT_L8_UNORM_SRGB,
[PIPE_FORMAT_L8A8_SRGB] = GEN6_FORMAT_L8A8_UNORM_SRGB,
[PIPE_FORMAT_R8G8B8_SRGB] = GEN6_FORMAT_R8G8B8_UNORM_SRGB,
[PIPE_FORMAT_A8B8G8R8_SRGB] = 0,
[PIPE_FORMAT_X8B8G8R8_SRGB] = 0,
[PIPE_FORMAT_B8G8R8A8_SRGB] = GEN6_FORMAT_B8G8R8A8_UNORM_SRGB,
[PIPE_FORMAT_B8G8R8X8_SRGB] = GEN6_FORMAT_B8G8R8X8_UNORM_SRGB,
[PIPE_FORMAT_A8R8G8B8_SRGB] = 0,
[PIPE_FORMAT_X8R8G8B8_SRGB] = 0,
[PIPE_FORMAT_R8G8B8A8_SRGB] = GEN6_FORMAT_R8G8B8A8_UNORM_SRGB,
[PIPE_FORMAT_DXT1_RGB] = GEN6_FORMAT_DXT1_RGB,
[PIPE_FORMAT_DXT1_RGBA] = GEN6_FORMAT_BC1_UNORM,
[PIPE_FORMAT_DXT3_RGBA] = GEN6_FORMAT_BC2_UNORM,
[PIPE_FORMAT_DXT5_RGBA] = GEN6_FORMAT_BC3_UNORM,
[PIPE_FORMAT_DXT1_SRGB] = GEN6_FORMAT_DXT1_RGB_SRGB,
[PIPE_FORMAT_DXT1_SRGBA] = GEN6_FORMAT_BC1_UNORM_SRGB,
[PIPE_FORMAT_DXT3_SRGBA] = GEN6_FORMAT_BC2_UNORM_SRGB,
[PIPE_FORMAT_DXT5_SRGBA] = GEN6_FORMAT_BC3_UNORM_SRGB,
[PIPE_FORMAT_RGTC1_UNORM] = GEN6_FORMAT_BC4_UNORM,
[PIPE_FORMAT_RGTC1_SNORM] = GEN6_FORMAT_BC4_SNORM,
[PIPE_FORMAT_RGTC2_UNORM] = GEN6_FORMAT_BC5_UNORM,
[PIPE_FORMAT_RGTC2_SNORM] = GEN6_FORMAT_BC5_SNORM,
[PIPE_FORMAT_R8G8_B8G8_UNORM] = 0,
[PIPE_FORMAT_G8R8_G8B8_UNORM] = 0,
[PIPE_FORMAT_R8SG8SB8UX8U_NORM] = 0,
[PIPE_FORMAT_R5SG5SB6U_NORM] = 0,
[PIPE_FORMAT_A8B8G8R8_UNORM] = 0,
[PIPE_FORMAT_B5G5R5X1_UNORM] = GEN6_FORMAT_B5G5R5X1_UNORM,
[PIPE_FORMAT_R10G10B10A2_USCALED] = GEN6_FORMAT_R10G10B10A2_USCALED,
[PIPE_FORMAT_R11G11B10_FLOAT] = GEN6_FORMAT_R11G11B10_FLOAT,
[PIPE_FORMAT_R9G9B9E5_FLOAT] = GEN6_FORMAT_R9G9B9E5_SHAREDEXP,
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = 0,
[PIPE_FORMAT_R1_UNORM] = GEN6_FORMAT_R1_UNORM,
[PIPE_FORMAT_R10G10B10X2_USCALED] = GEN6_FORMAT_R10G10B10X2_USCALED,
[PIPE_FORMAT_R10G10B10X2_SNORM] = 0,
[PIPE_FORMAT_L4A4_UNORM] = 0,
[PIPE_FORMAT_B10G10R10A2_UNORM] = GEN6_FORMAT_B10G10R10A2_UNORM,
[PIPE_FORMAT_R10SG10SB10SA2U_NORM] = 0,
[PIPE_FORMAT_R8G8Bx_SNORM] = 0,
[PIPE_FORMAT_R8G8B8X8_UNORM] = GEN6_FORMAT_R8G8B8X8_UNORM,
[PIPE_FORMAT_B4G4R4X4_UNORM] = 0,
[PIPE_FORMAT_X24S8_UINT] = 0,
[PIPE_FORMAT_S8X24_UINT] = 0,
[PIPE_FORMAT_X32_S8X24_UINT] = 0,
[PIPE_FORMAT_B2G3R3_UNORM] = 0,
[PIPE_FORMAT_L16A16_UNORM] = GEN6_FORMAT_L16A16_UNORM,
[PIPE_FORMAT_A16_UNORM] = GEN6_FORMAT_A16_UNORM,
[PIPE_FORMAT_I16_UNORM] = GEN6_FORMAT_I16_UNORM,
[PIPE_FORMAT_LATC1_UNORM] = 0,
[PIPE_FORMAT_LATC1_SNORM] = 0,
[PIPE_FORMAT_LATC2_UNORM] = 0,
[PIPE_FORMAT_LATC2_SNORM] = 0,
[PIPE_FORMAT_A8_SNORM] = 0,
[PIPE_FORMAT_L8_SNORM] = 0,
[PIPE_FORMAT_L8A8_SNORM] = 0,
[PIPE_FORMAT_I8_SNORM] = 0,
[PIPE_FORMAT_A16_SNORM] = 0,
[PIPE_FORMAT_L16_SNORM] = 0,
[PIPE_FORMAT_L16A16_SNORM] = 0,
[PIPE_FORMAT_I16_SNORM] = 0,
[PIPE_FORMAT_A16_FLOAT] = GEN6_FORMAT_A16_FLOAT,
[PIPE_FORMAT_L16_FLOAT] = GEN6_FORMAT_L16_FLOAT,
[PIPE_FORMAT_L16A16_FLOAT] = GEN6_FORMAT_L16A16_FLOAT,
[PIPE_FORMAT_I16_FLOAT] = GEN6_FORMAT_I16_FLOAT,
[PIPE_FORMAT_A32_FLOAT] = GEN6_FORMAT_A32_FLOAT,
[PIPE_FORMAT_L32_FLOAT] = GEN6_FORMAT_L32_FLOAT,
[PIPE_FORMAT_L32A32_FLOAT] = GEN6_FORMAT_L32A32_FLOAT,
[PIPE_FORMAT_I32_FLOAT] = GEN6_FORMAT_I32_FLOAT,
[PIPE_FORMAT_YV12] = 0,
[PIPE_FORMAT_YV16] = 0,
[PIPE_FORMAT_IYUV] = 0,
[PIPE_FORMAT_NV12] = 0,
[PIPE_FORMAT_NV21] = 0,
[PIPE_FORMAT_A4R4_UNORM] = 0,
[PIPE_FORMAT_R4A4_UNORM] = 0,
[PIPE_FORMAT_R8A8_UNORM] = 0,
[PIPE_FORMAT_A8R8_UNORM] = 0,
[PIPE_FORMAT_R10G10B10A2_SSCALED] = GEN6_FORMAT_R10G10B10A2_SSCALED,
[PIPE_FORMAT_R10G10B10A2_SNORM] = GEN6_FORMAT_R10G10B10A2_SNORM,
[PIPE_FORMAT_B10G10R10A2_USCALED] = GEN6_FORMAT_B10G10R10A2_USCALED,
[PIPE_FORMAT_B10G10R10A2_SSCALED] = GEN6_FORMAT_B10G10R10A2_SSCALED,
[PIPE_FORMAT_B10G10R10A2_SNORM] = GEN6_FORMAT_B10G10R10A2_SNORM,
[PIPE_FORMAT_R8_UINT] = GEN6_FORMAT_R8_UINT,
[PIPE_FORMAT_R8G8_UINT] = GEN6_FORMAT_R8G8_UINT,
[PIPE_FORMAT_R8G8B8_UINT] = GEN6_FORMAT_R8G8B8_UINT,
[PIPE_FORMAT_R8G8B8A8_UINT] = GEN6_FORMAT_R8G8B8A8_UINT,
[PIPE_FORMAT_R8_SINT] = GEN6_FORMAT_R8_SINT,
[PIPE_FORMAT_R8G8_SINT] = GEN6_FORMAT_R8G8_SINT,
[PIPE_FORMAT_R8G8B8_SINT] = GEN6_FORMAT_R8G8B8_SINT,
[PIPE_FORMAT_R8G8B8A8_SINT] = GEN6_FORMAT_R8G8B8A8_SINT,
[PIPE_FORMAT_R16_UINT] = GEN6_FORMAT_R16_UINT,
[PIPE_FORMAT_R16G16_UINT] = GEN6_FORMAT_R16G16_UINT,
[PIPE_FORMAT_R16G16B16_UINT] = GEN6_FORMAT_R16G16B16_UINT,
[PIPE_FORMAT_R16G16B16A16_UINT] = GEN6_FORMAT_R16G16B16A16_UINT,
[PIPE_FORMAT_R16_SINT] = GEN6_FORMAT_R16_SINT,
[PIPE_FORMAT_R16G16_SINT] = GEN6_FORMAT_R16G16_SINT,
[PIPE_FORMAT_R16G16B16_SINT] = GEN6_FORMAT_R16G16B16_SINT,
[PIPE_FORMAT_R16G16B16A16_SINT] = GEN6_FORMAT_R16G16B16A16_SINT,
[PIPE_FORMAT_R32_UINT] = GEN6_FORMAT_R32_UINT,
[PIPE_FORMAT_R32G32_UINT] = GEN6_FORMAT_R32G32_UINT,
[PIPE_FORMAT_R32G32B32_UINT] = GEN6_FORMAT_R32G32B32_UINT,
[PIPE_FORMAT_R32G32B32A32_UINT] = GEN6_FORMAT_R32G32B32A32_UINT,
[PIPE_FORMAT_R32_SINT] = GEN6_FORMAT_R32_SINT,
[PIPE_FORMAT_R32G32_SINT] = GEN6_FORMAT_R32G32_SINT,
[PIPE_FORMAT_R32G32B32_SINT] = GEN6_FORMAT_R32G32B32_SINT,
[PIPE_FORMAT_R32G32B32A32_SINT] = GEN6_FORMAT_R32G32B32A32_SINT,
[PIPE_FORMAT_A8_UINT] = 0,
[PIPE_FORMAT_I8_UINT] = GEN6_FORMAT_I8_UINT,
[PIPE_FORMAT_L8_UINT] = GEN6_FORMAT_L8_UINT,
[PIPE_FORMAT_L8A8_UINT] = GEN6_FORMAT_L8A8_UINT,
[PIPE_FORMAT_A8_SINT] = 0,
[PIPE_FORMAT_I8_SINT] = GEN6_FORMAT_I8_SINT,
[PIPE_FORMAT_L8_SINT] = GEN6_FORMAT_L8_SINT,
[PIPE_FORMAT_L8A8_SINT] = GEN6_FORMAT_L8A8_SINT,
[PIPE_FORMAT_A16_UINT] = 0,
[PIPE_FORMAT_I16_UINT] = 0,
[PIPE_FORMAT_L16_UINT] = 0,
[PIPE_FORMAT_L16A16_UINT] = 0,
[PIPE_FORMAT_A16_SINT] = 0,
[PIPE_FORMAT_I16_SINT] = 0,
[PIPE_FORMAT_L16_SINT] = 0,
[PIPE_FORMAT_L16A16_SINT] = 0,
[PIPE_FORMAT_A32_UINT] = 0,
[PIPE_FORMAT_I32_UINT] = 0,
[PIPE_FORMAT_L32_UINT] = 0,
[PIPE_FORMAT_L32A32_UINT] = 0,
[PIPE_FORMAT_A32_SINT] = 0,
[PIPE_FORMAT_I32_SINT] = 0,
[PIPE_FORMAT_L32_SINT] = 0,
[PIPE_FORMAT_L32A32_SINT] = 0,
[PIPE_FORMAT_B10G10R10A2_UINT] = GEN6_FORMAT_B10G10R10A2_UINT,
[PIPE_FORMAT_ETC1_RGB8] = GEN6_FORMAT_ETC1_RGB8,
[PIPE_FORMAT_R8G8_R8B8_UNORM] = 0,
[PIPE_FORMAT_G8R8_B8R8_UNORM] = 0,
[PIPE_FORMAT_R8G8B8X8_SNORM] = 0,
[PIPE_FORMAT_R8G8B8X8_SRGB] = 0,
[PIPE_FORMAT_R8G8B8X8_UINT] = 0,
[PIPE_FORMAT_R8G8B8X8_SINT] = 0,
[PIPE_FORMAT_B10G10R10X2_UNORM] = GEN6_FORMAT_B10G10R10X2_UNORM,
[PIPE_FORMAT_R16G16B16X16_UNORM] = GEN6_FORMAT_R16G16B16X16_UNORM,
[PIPE_FORMAT_R16G16B16X16_SNORM] = 0,
[PIPE_FORMAT_R16G16B16X16_FLOAT] = GEN6_FORMAT_R16G16B16X16_FLOAT,
[PIPE_FORMAT_R16G16B16X16_UINT] = 0,
[PIPE_FORMAT_R16G16B16X16_SINT] = 0,
[PIPE_FORMAT_R32G32B32X32_FLOAT] = GEN6_FORMAT_R32G32B32X32_FLOAT,
[PIPE_FORMAT_R32G32B32X32_UINT] = 0,
[PIPE_FORMAT_R32G32B32X32_SINT] = 0,
[PIPE_FORMAT_R8A8_SNORM] = 0,
[PIPE_FORMAT_R16A16_UNORM] = 0,
[PIPE_FORMAT_R16A16_SNORM] = 0,
[PIPE_FORMAT_R16A16_FLOAT] = 0,
[PIPE_FORMAT_R32A32_FLOAT] = 0,
[PIPE_FORMAT_R8A8_UINT] = 0,
[PIPE_FORMAT_R8A8_SINT] = 0,
[PIPE_FORMAT_R16A16_UINT] = 0,
[PIPE_FORMAT_R16A16_SINT] = 0,
[PIPE_FORMAT_R32A32_UINT] = 0,
[PIPE_FORMAT_R32A32_SINT] = 0,
[PIPE_FORMAT_R10G10B10A2_UINT] = GEN6_FORMAT_R10G10B10A2_UINT,
[PIPE_FORMAT_B5G6R5_SRGB] = GEN6_FORMAT_B5G6R5_UNORM_SRGB,
};
int sfmt = format_mapping[format];
 
/* GEN6_FORMAT_R32G32B32A32_FLOAT happens to be 0 */
if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT)
sfmt = -1;
 
return sfmt;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_format.h
0,0 → 1,168
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_FORMAT_H
#define ILO_FORMAT_H
 
#include "genhw/genhw.h"
#include "ilo_core.h"
#include "ilo_dev.h"
 
bool
ilo_format_support_vb(const struct ilo_dev *dev,
enum pipe_format format);
 
bool
ilo_format_support_sol(const struct ilo_dev *dev,
enum pipe_format format);
 
bool
ilo_format_support_sampler(const struct ilo_dev *dev,
enum pipe_format format);
 
bool
ilo_format_support_rt(const struct ilo_dev *dev,
enum pipe_format format);
 
bool
ilo_format_support_zs(const struct ilo_dev *dev,
enum pipe_format format);
 
int
ilo_format_translate_color(const struct ilo_dev *dev,
enum pipe_format format);
 
/**
* Translate a pipe format to a hardware surface format suitable for
* the given purpose. Return -1 on errors.
*
* This is an inline function not only for performance reasons. There are
* caveats that the callers should be aware of before calling this function.
*/
static inline int
ilo_format_translate(const struct ilo_dev *dev,
enum pipe_format format, unsigned bind)
{
switch (bind) {
case PIPE_BIND_RENDER_TARGET:
/*
* Some RGBX formats are not supported as render target formats. But we
* can use their RGBA counterparts and force the destination alpha to be
* one when blending is enabled.
*/
switch (format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
return GEN6_FORMAT_B8G8R8A8_UNORM;
default:
return ilo_format_translate_color(dev, format);
}
break;
case PIPE_BIND_SAMPLER_VIEW:
/*
* For depth formats, we want the depth values to be returned as R
* values. But we assume in many places that the depth values are
* returned as I values (util_make_fragment_tex_shader_writedepth() is
* one such example). We have to live with that at least for now.
*
* For ETC1 format, the texture data will be decompressed before being
* written to the bo. See tex_staging_sys_convert_write().
*/
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return GEN6_FORMAT_I16_UNORM;
case PIPE_FORMAT_Z32_FLOAT:
return GEN6_FORMAT_I32_FLOAT;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
return GEN6_FORMAT_I24X8_UNORM;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return GEN6_FORMAT_I32X32_FLOAT;
case PIPE_FORMAT_ETC1_RGB8:
return GEN6_FORMAT_R8G8B8X8_UNORM;
default:
return ilo_format_translate_color(dev, format);
}
break;
case PIPE_BIND_VERTEX_BUFFER:
if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
return ilo_format_translate_color(dev, format);
 
/*
* Some 3-component formats are not supported as vertex element formats.
* But since we move between vertices using vb->stride, we should be
* good to use their 4-component counterparts if we force the W
* component to be one. The only exception is that the vb boundary
* check for the last vertex may fail.
*/
switch (format) {
case PIPE_FORMAT_R16G16B16_FLOAT:
return GEN6_FORMAT_R16G16B16A16_FLOAT;
case PIPE_FORMAT_R16G16B16_UINT:
return GEN6_FORMAT_R16G16B16A16_UINT;
case PIPE_FORMAT_R16G16B16_SINT:
return GEN6_FORMAT_R16G16B16A16_SINT;
case PIPE_FORMAT_R8G8B8_UINT:
return GEN6_FORMAT_R8G8B8A8_UINT;
case PIPE_FORMAT_R8G8B8_SINT:
return GEN6_FORMAT_R8G8B8A8_SINT;
default:
return ilo_format_translate_color(dev, format);
}
break;
case PIPE_BIND_STREAM_OUTPUT:
return ilo_format_translate_color(dev, format);
break;
default:
assert(!"cannot translate format");
break;
}
 
return -1;
}
 
static inline int
ilo_format_translate_render(const struct ilo_dev *dev,
enum pipe_format format)
{
return ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
}
 
static inline int
ilo_format_translate_texture(const struct ilo_dev *dev,
enum pipe_format format)
{
return ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
}
 
static inline int
ilo_format_translate_vertex(const struct ilo_dev *dev,
enum pipe_format format)
{
return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
}
 
#endif /* ILO_FORMAT_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_image.c
0,0 → 1,1437
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "ilo_debug.h"
#include "ilo_image.h"
 
enum {
IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
IMAGE_TILING_X = 1 << GEN6_TILING_X,
IMAGE_TILING_Y = 1 << GEN6_TILING_Y,
IMAGE_TILING_W = 1 << GEN8_TILING_W,
 
IMAGE_TILING_ALL = (IMAGE_TILING_NONE |
IMAGE_TILING_X |
IMAGE_TILING_Y |
IMAGE_TILING_W)
};
 
struct ilo_image_params {
const struct ilo_dev *dev;
const struct pipe_resource *templ;
unsigned valid_tilings;
 
bool compressed;
 
unsigned h0, h1;
unsigned max_x, max_y;
};
 
static void
img_get_slice_size(const struct ilo_image *img,
const struct ilo_image_params *params,
unsigned level, unsigned *width, unsigned *height)
{
const struct pipe_resource *templ = params->templ;
unsigned w, h;
 
w = u_minify(img->width0, level);
h = u_minify(img->height0, level);
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 114:
*
* "The dimensions of the mip maps are first determined by applying the
* sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
* if necessary, they are padded out to compression block boundaries."
*/
w = align(w, img->block_width);
h = align(h, img->block_height);
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 111:
*
* "If the surface is multisampled (4x), these values must be adjusted
* as follows before proceeding:
*
* W_L = ceiling(W_L / 2) * 4
* H_L = ceiling(H_L / 2) * 4"
*
* From the Ivy Bridge PRM, volume 1 part 1, page 108:
*
* "If the surface is multisampled and it is a depth or stencil surface
* or Multisampled Surface StorageFormat in SURFACE_STATE is
* MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
* proceeding:
*
* #samples W_L = H_L =
* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
*
* For interleaved samples (4x), where pixels
*
* (x, y ) (x+1, y )
* (x, y+1) (x+1, y+1)
*
* would be is occupied by
*
* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
*
* Thus the need to
*
* w = align(w, 2) * 2;
* y = align(y, 2) * 2;
*/
if (img->interleaved_samples) {
switch (templ->nr_samples) {
case 0:
case 1:
break;
case 2:
w = align(w, 2) * 2;
break;
case 4:
w = align(w, 2) * 2;
h = align(h, 2) * 2;
break;
case 8:
w = align(w, 2) * 4;
h = align(h, 2) * 2;
break;
case 16:
w = align(w, 2) * 4;
h = align(h, 2) * 4;
break;
default:
assert(!"unsupported sample count");
break;
}
}
 
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 108:
*
* "For separate stencil buffer, the width must be mutiplied by 2 and
* height divided by 2..."
*
* To make things easier (for transfer), we will just double the stencil
* stride in 3DSTATE_STENCIL_BUFFER.
*/
w = align(w, img->align_i);
h = align(h, img->align_j);
 
*width = w;
*height = h;
}
 
static unsigned
img_get_num_layers(const struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
unsigned num_layers = templ->array_size;
 
/* samples of the same index are stored in a layer */
if (templ->nr_samples > 1 && !img->interleaved_samples)
num_layers *= templ->nr_samples;
 
return num_layers;
}
 
static void
img_init_layer_height(struct ilo_image *img,
struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
unsigned num_layers;
 
if (img->walk != ILO_IMAGE_WALK_LAYER)
return;
 
num_layers = img_get_num_layers(img, params);
if (num_layers <= 1)
return;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 115:
*
* "The following equation is used for surface formats other than
* compressed textures:
*
* QPitch = (h0 + h1 + 11j)"
*
* "The equation for compressed textures (BC* and FXT1 surface formats)
* follows:
*
* QPitch = (h0 + h1 + 11j) / 4"
*
* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
* value calculated in the equation above, for every other odd Surface
* Height starting from 1 i.e. 1,5,9,13"
*
* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
*
* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
* buffer and stencil buffer have an implied value of ARYSPC_FULL):
*
* QPitch = (h0 + h1 + 12j)
* QPitch = (h0 + h1 + 12j) / 4 (compressed)
*
* (There are many typos or missing words here...)"
*
* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
* the base address. The PRM divides QPitch by 4 for compressed formats
* because the block height for those formats are 4, and it wants QPitch to
* mean the number of memory rows, as opposed to texel rows, between
* slices. Since we use texel rows everywhere, we do not need to divide
* QPitch by 4.
*/
img->walk_layer_height = params->h0 + params->h1 +
((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
 
if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
img->height0 % 4 == 1)
img->walk_layer_height += 4;
 
params->max_y += img->walk_layer_height * (num_layers - 1);
}
 
static void
img_init_lods(struct ilo_image *img,
struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
unsigned cur_x, cur_y;
unsigned lv;
 
cur_x = 0;
cur_y = 0;
for (lv = 0; lv <= templ->last_level; lv++) {
unsigned lod_w, lod_h;
 
img_get_slice_size(img, params, lv, &lod_w, &lod_h);
 
img->lods[lv].x = cur_x;
img->lods[lv].y = cur_y;
img->lods[lv].slice_width = lod_w;
img->lods[lv].slice_height = lod_h;
 
switch (img->walk) {
case ILO_IMAGE_WALK_LAYER:
/* MIPLAYOUT_BELOW */
if (lv == 1)
cur_x += lod_w;
else
cur_y += lod_h;
break;
case ILO_IMAGE_WALK_LOD:
lod_h *= img_get_num_layers(img, params);
if (lv == 1)
cur_x += lod_w;
else
cur_y += lod_h;
 
/* every LOD begins at tile boundaries */
if (templ->last_level > 0) {
assert(img->format == PIPE_FORMAT_S8_UINT);
cur_x = align(cur_x, 64);
cur_y = align(cur_y, 64);
}
break;
case ILO_IMAGE_WALK_3D:
{
const unsigned num_slices = u_minify(templ->depth0, lv);
const unsigned num_slices_per_row = 1 << lv;
const unsigned num_rows =
(num_slices + num_slices_per_row - 1) / num_slices_per_row;
 
lod_w *= num_slices_per_row;
lod_h *= num_rows;
 
cur_y += lod_h;
}
break;
}
 
if (params->max_x < img->lods[lv].x + lod_w)
params->max_x = img->lods[lv].x + lod_w;
if (params->max_y < img->lods[lv].y + lod_h)
params->max_y = img->lods[lv].y + lod_h;
}
 
if (img->walk == ILO_IMAGE_WALK_LAYER) {
params->h0 = img->lods[0].slice_height;
 
if (templ->last_level > 0)
params->h1 = img->lods[1].slice_height;
else
img_get_slice_size(img, params, 1, &cur_x, &params->h1);
}
}
 
static void
img_init_alignments(struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 113:
*
* "surface format align_i align_j
* YUV 4:2:2 formats 4 *see below
* BC1-5 4 4
* FXT1 8 4
* all other formats 4 *see below"
*
* "- align_j = 4 for any depth buffer
* - align_j = 2 for separate stencil buffer
* - align_j = 4 for any render target surface is multisampled (4x)
* - align_j = 4 for any render target surface with Surface Vertical
* Alignment = VALIGN_4
* - align_j = 2 for any render target surface with Surface Vertical
* Alignment = VALIGN_2
* - align_j = 2 for all other render target surface
* - align_j = 2 for any sampling engine surface with Surface Vertical
* Alignment = VALIGN_2
* - align_j = 4 for any sampling engine surface with Surface Vertical
* Alignment = VALIGN_4"
*
* From the Sandy Bridge PRM, volume 4 part 1, page 86:
*
* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
* the Surface Format is 96 bits per element (BPE)."
*
* They can be rephrased as
*
* align_i align_j
* compressed formats block width block height
* PIPE_FORMAT_S8_UINT 4 2
* other depth/stencil formats 4 4
* 4x multisampled 4 4
* bpp 96 4 2
* others 4 2 or 4
*/
 
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 110:
*
* "surface defined by surface format align_i align_j
* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
* not D16_UNORM 4 4
* 3DSTATE_STENCIL_BUFFER N/A 8 8
* SURFACE_STATE BC*, ETC*, EAC* 4 4
* FXT1 8 4
* all others (set by SURFACE_STATE)"
*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "- This field (Surface Vertical Aligment) is intended to be set to
* VALIGN_4 if the surface was rendered as a depth buffer, for a
* multisampled (4x) render target, or for a multisampled (8x)
* render target, since these surfaces support only alignment of 4.
* - Use of VALIGN_4 for other surfaces is supported, but uses more
* memory.
* - This field must be set to VALIGN_4 for all tiled Y Render Target
* surfaces.
* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
* must be set to VALIGN_4."
* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
*
* "- This field (Surface Horizontal Aligment) is intended to be set to
* HALIGN_8 only if the surface was rendered as a depth buffer with
* Z16 format or a stencil buffer, since these surfaces support only
* alignment of 8.
* - Use of HALIGN_8 for other surfaces is supported, but uses more
* memory.
* - This field must be set to HALIGN_4 if the Surface Format is BC*.
* - This field must be set to HALIGN_8 if the Surface Format is
* FXT1."
*
* They can be rephrased as
*
* align_i align_j
* compressed formats block width block height
* PIPE_FORMAT_Z16_UNORM 8 4
* PIPE_FORMAT_S8_UINT 8 8
* other depth/stencil formats 4 4
* 2x or 4x multisampled 4 or 8 4
* tiled Y 4 or 8 4 (if rt)
* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
* others 4 or 8 2 or 4
*/
 
if (params->compressed) {
/* this happens to be the case */
img->align_i = img->block_width;
img->align_j = img->block_height;
} else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
switch (img->format) {
case PIPE_FORMAT_Z16_UNORM:
img->align_i = 8;
img->align_j = 4;
break;
case PIPE_FORMAT_S8_UINT:
img->align_i = 8;
img->align_j = 8;
break;
default:
img->align_i = 4;
img->align_j = 4;
break;
}
} else {
switch (img->format) {
case PIPE_FORMAT_S8_UINT:
img->align_i = 4;
img->align_j = 2;
break;
default:
img->align_i = 4;
img->align_j = 4;
break;
}
}
} else {
const bool valign_4 =
(templ->nr_samples > 1) ||
(ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
(ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
img->tiling == GEN6_TILING_Y &&
(templ->bind & PIPE_BIND_RENDER_TARGET));
 
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
 
img->align_i = 4;
img->align_j = (valign_4) ? 4 : 2;
}
 
/*
* the fact that align i and j are multiples of block width and height
* respectively is what makes the size of the bo a multiple of the block
* size, slices start at block boundaries, and many of the computations
* work.
*/
assert(img->align_i % img->block_width == 0);
assert(img->align_j % img->block_height == 0);
 
/* make sure align() works */
assert(util_is_power_of_two(img->align_i) &&
util_is_power_of_two(img->align_j));
assert(util_is_power_of_two(img->block_width) &&
util_is_power_of_two(img->block_height));
}
 
static void
img_init_tiling(struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
unsigned preferred_tilings = params->valid_tilings;
 
/* no fencing nor BLT support */
if (preferred_tilings & ~IMAGE_TILING_W)
preferred_tilings &= ~IMAGE_TILING_W;
 
if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
/*
* heuristically set a minimum width/height for enabling tiling
*/
if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
preferred_tilings &= ~IMAGE_TILING_X;
 
if ((img->width0 < 32 || img->height0 < 16) &&
(img->width0 < 16 || img->height0 < 32) &&
(preferred_tilings & ~IMAGE_TILING_Y))
preferred_tilings &= ~IMAGE_TILING_Y;
} else {
/* force linear if we are not sure where the texture is bound to */
if (preferred_tilings & IMAGE_TILING_NONE)
preferred_tilings &= IMAGE_TILING_NONE;
}
 
/* prefer tiled over linear */
if (preferred_tilings & IMAGE_TILING_Y)
img->tiling = GEN6_TILING_Y;
else if (preferred_tilings & IMAGE_TILING_X)
img->tiling = GEN6_TILING_X;
else if (preferred_tilings & IMAGE_TILING_W)
img->tiling = GEN8_TILING_W;
else
img->tiling = GEN6_TILING_NONE;
}
 
static void
img_init_walk_gen7(struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
 
/*
* It is not explicitly states, but render targets are expected to be
* UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
* to be IMS (samples interleaved).
*
* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
*/
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 111:
*
* "note that the depth buffer and stencil buffer have an implied
* value of ARYSPC_FULL"
*/
img->walk = (templ->target == PIPE_TEXTURE_3D) ?
ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
 
img->interleaved_samples = true;
} else {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 66:
*
* "If Multisampled Surface Storage Format is MSFMT_MSS and Number
* of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
* Array Spacing) must be set to ARYSPC_LOD0."
*
* As multisampled resources are not mipmapped, we never use
* ARYSPC_FULL for them.
*/
if (templ->nr_samples > 1)
assert(templ->last_level == 0);
 
img->walk =
(templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
(templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
ILO_IMAGE_WALK_LOD;
 
img->interleaved_samples = false;
}
}
 
static void
img_init_walk_gen6(struct ilo_image *img,
const struct ilo_image_params *params)
{
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 115:
*
* "The separate stencil buffer does not support mip mapping, thus the
* storage for LODs other than LOD 0 is not needed. The following
* QPitch equation applies only to the separate stencil buffer:
*
* QPitch = h_0"
*
* GEN6 does not support compact spacing otherwise.
*/
img->walk =
(params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
(img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
ILO_IMAGE_WALK_LAYER;
 
/* GEN6 supports only interleaved samples */
img->interleaved_samples = true;
}
 
static void
img_init_walk(struct ilo_image *img,
const struct ilo_image_params *params)
{
if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
img_init_walk_gen7(img, params);
else
img_init_walk_gen6(img, params);
}
 
static unsigned
img_get_valid_tilings(const struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
const enum pipe_format format = img->format;
unsigned valid_tilings = params->valid_tilings;
 
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 32:
*
* "Display/Overlay Y-Major not supported.
* X-Major required for Async Flips"
*/
if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
valid_tilings &= IMAGE_TILING_X;
 
/*
* From the Sandy Bridge PRM, volume 3 part 2, page 158:
*
* "The cursor surface address must be 4K byte aligned. The cursor must
* be in linear memory, it cannot be tiled."
*/
if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
valid_tilings &= IMAGE_TILING_NONE;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 318:
*
* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
* Depth Buffer is not supported."
*
* "The Depth Buffer, if tiled, must use Y-Major tiling."
*
* From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
* "W-Major Tile Format is used for separate stencil."
*/
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
switch (format) {
case PIPE_FORMAT_S8_UINT:
valid_tilings &= IMAGE_TILING_W;
break;
default:
valid_tilings &= IMAGE_TILING_Y;
break;
}
}
 
if (templ->bind & PIPE_BIND_RENDER_TARGET) {
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 32:
*
* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
* either TileX or Linear."
*
* From the Haswell PRM, volume 5, page 32:
*
* "NOTE: 128 BPP format color buffer (render target) supports
* Linear, TiledX and TiledY."
*/
if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
valid_tilings &= ~IMAGE_TILING_Y;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "This field (Surface Vertical Aligment) must be set to VALIGN_4
* for all tiled Y Render Target surfaces."
*
* "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
*/
if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
img->format == PIPE_FORMAT_R32G32B32_FLOAT)
valid_tilings &= ~IMAGE_TILING_Y;
 
valid_tilings &= ~IMAGE_TILING_W;
}
 
if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
if (ilo_dev_gen(params->dev) < ILO_GEN(8))
valid_tilings &= ~IMAGE_TILING_W;
}
 
/* no conflicting binding flags */
assert(valid_tilings);
 
return valid_tilings;
}
 
static void
img_init_size_and_format(struct ilo_image *img,
struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
enum pipe_format format = templ->format;
bool require_separate_stencil = false;
 
img->width0 = templ->width0;
img->height0 = templ->height0;
img->depth0 = templ->depth0;
img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "This field (Separate Stencil Buffer Enable) must be set to the same
* value (enabled or disabled) as Hierarchical Depth Buffer Enable."
*
* GEN7+ requires separate stencil buffers.
*/
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
require_separate_stencil = true;
else
require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
}
 
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
format = PIPE_FORMAT_R8G8B8X8_UNORM;
break;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
if (require_separate_stencil) {
format = PIPE_FORMAT_Z24X8_UNORM;
img->separate_stencil = true;
}
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
if (require_separate_stencil) {
format = PIPE_FORMAT_Z32_FLOAT;
img->separate_stencil = true;
}
break;
default:
break;
}
 
img->format = format;
img->block_width = util_format_get_blockwidth(format);
img->block_height = util_format_get_blockheight(format);
img->block_size = util_format_get_blocksize(format);
 
params->valid_tilings = img_get_valid_tilings(img, params);
params->compressed = util_format_is_compressed(img->format);
}
 
static bool
img_want_mcs(const struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
bool want_mcs = false;
 
/* MCS is for RT on GEN7+ */
if (ilo_dev_gen(params->dev) < ILO_GEN(7))
return false;
 
if (templ->target != PIPE_TEXTURE_2D ||
!(templ->bind & PIPE_BIND_RENDER_TARGET))
return false;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 77:
*
* "For Render Target and Sampling Engine Surfaces:If the surface is
* multisampled (Number of Multisamples any value other than
* MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
*
* "This field must be set to 0 for all SINT MSRTs when all RT channels
* are not written"
*/
if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
want_mcs = true;
} else if (templ->nr_samples <= 1) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 326:
*
* "When MCS is buffer is used for color clear of non-multisampler
* render target, the following restrictions apply.
* - Support is limited to tiled render targets.
* - Support is for non-mip-mapped and non-array surface types
* only.
* - Clear is supported only on the full RT; i.e., no partial clear
* or overlapping clears.
* - MCS buffer for non-MSRT is supported only for RT formats
* 32bpp, 64bpp and 128bpp.
* ..."
*/
if (img->tiling != GEN6_TILING_NONE &&
templ->last_level == 0 && templ->array_size == 1) {
switch (img->block_size) {
case 4:
case 8:
case 16:
want_mcs = true;
break;
default:
break;
}
}
}
 
return want_mcs;
}
 
static bool
img_want_hiz(const struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
const struct util_format_description *desc =
util_format_description(templ->format);
 
if (ilo_debug & ILO_DEBUG_NOHIZ)
return false;
 
if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
return false;
 
if (!util_format_has_depth(desc))
return false;
 
/* no point in having HiZ */
if (templ->usage == PIPE_USAGE_STAGING)
return false;
 
/*
* As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
* for every level. This is generally fine except on GEN6, where HiZ and
* separate stencil are enabled and disabled at the same time. When the
* format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
* can result in incompatible formats.
*/
if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
templ->last_level)
return false;
 
return true;
}
 
static void
img_init_aux(struct ilo_image *img,
const struct ilo_image_params *params)
{
if (img_want_hiz(img, params))
img->aux.type = ILO_IMAGE_AUX_HIZ;
else if (img_want_mcs(img, params))
img->aux.type = ILO_IMAGE_AUX_MCS;
}
 
static void
img_align(struct ilo_image *img, struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
int align_w = 1, align_h = 1, pad_h = 0;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
* "To determine the necessary padding on the bottom and right side of
* the surface, refer to the table in Section 7.18.3.4 for the i and j
* parameters for the surface format in use. The surface must then be
* extended to the next multiple of the alignment unit size in each
* dimension, and all texels contained in this extended surface must
* have valid GTT entries."
*
* "For cube surfaces, an additional two rows of padding are required
* at the bottom of the surface. This must be ensured regardless of
* whether the surface is stored tiled or linear. This is due to the
* potential rotation of cache line orientation from memory to cache."
*
* "For compressed textures (BC* and FXT1 surface formats), padding at
* the bottom of the surface is to an even compressed row, which is
* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
* purposes, these surfaces behave as if j = 8 only for surface
* padding purposes. The value of 4 for j still applies for mip level
* alignment and QPitch calculation."
*/
if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
align_w = MAX2(align_w, img->align_i);
align_h = MAX2(align_h, img->align_j);
 
if (templ->target == PIPE_TEXTURE_CUBE)
pad_h += 2;
 
if (params->compressed)
align_h = MAX2(align_h, img->align_j * 2);
}
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
* "If the surface contains an odd number of rows of data, a final row
* below the surface must be allocated."
*/
if (templ->bind & PIPE_BIND_RENDER_TARGET)
align_h = MAX2(align_h, 2);
 
/*
* Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
* for unaligned non-mipmapped and non-array images.
*/
if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
templ->last_level == 0 &&
templ->array_size == 1 &&
templ->depth0 == 1) {
align_w = MAX2(align_w, 8);
align_h = MAX2(align_h, 4);
}
 
params->max_x = align(params->max_x, align_w);
params->max_y = align(params->max_y + pad_h, align_h);
}
 
/* note that this may force the texture to be linear */
static void
img_calculate_bo_size(struct ilo_image *img,
const struct ilo_image_params *params)
{
assert(params->max_x % img->block_width == 0);
assert(params->max_y % img->block_height == 0);
assert(img->walk_layer_height % img->block_height == 0);
 
img->bo_stride =
(params->max_x / img->block_width) * img->block_size;
img->bo_height = params->max_y / img->block_height;
 
while (true) {
unsigned w = img->bo_stride, h = img->bo_height;
unsigned align_w, align_h;
 
/*
* From the Haswell PRM, volume 5, page 163:
*
* "For linear surfaces, additional padding of 64 bytes is required
* at the bottom of the surface. This is in addition to the padding
* required above."
*/
if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
(params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
img->tiling == GEN6_TILING_NONE)
h += (64 + img->bo_stride - 1) / img->bo_stride;
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "- For linear render target surfaces, the pitch must be a
* multiple of the element size for non-YUV surface formats.
* Pitch must be a multiple of 2 * element size for YUV surface
* formats.
* - For other linear surfaces, the pitch can be any multiple of
* bytes.
* - For tiled surfaces, the pitch must be a multiple of the tile
* width."
*
* Different requirements may exist when the bo is used in different
* places, but our alignments here should be good enough that we do not
* need to check params->templ->bind.
*/
switch (img->tiling) {
case GEN6_TILING_X:
align_w = 512;
align_h = 8;
break;
case GEN6_TILING_Y:
align_w = 128;
align_h = 32;
break;
case GEN8_TILING_W:
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
* "A 4KB tile is subdivided into 8-high by 8-wide array of
* Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
* bytes."
*/
align_w = 64;
align_h = 64;
break;
default:
assert(img->tiling == GEN6_TILING_NONE);
/* some good enough values */
align_w = 64;
align_h = 2;
break;
}
 
w = align(w, align_w);
h = align(h, align_h);
 
/* make sure the bo is mappable */
if (img->tiling != GEN6_TILING_NONE) {
/*
* Usually only the first 256MB of the GTT is mappable.
*
* See also how intel_context::max_gtt_map_object_size is calculated.
*/
const size_t mappable_gtt_size = 256 * 1024 * 1024;
 
/*
* Be conservative. We may be able to switch from VALIGN_4 to
* VALIGN_2 if the image was Y-tiled, but let's keep it simple.
*/
if (mappable_gtt_size / w / 4 < h) {
if (params->valid_tilings & IMAGE_TILING_NONE) {
img->tiling = GEN6_TILING_NONE;
/* MCS support for non-MSRTs is limited to tiled RTs */
if (img->aux.type == ILO_IMAGE_AUX_MCS &&
params->templ->nr_samples <= 1)
img->aux.type = ILO_IMAGE_AUX_NONE;
 
continue;
} else {
ilo_warn("cannot force texture to be linear\n");
}
}
}
 
img->bo_stride = w;
img->bo_height = h;
break;
}
}
 
static void
img_calculate_hiz_size(struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
const unsigned hz_align_j = 8;
enum ilo_image_walk_type hz_walk;
unsigned hz_width, hz_height, lv;
unsigned hz_clear_w, hz_clear_h;
 
assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
 
assert(img->walk == ILO_IMAGE_WALK_LAYER ||
img->walk == ILO_IMAGE_WALK_3D);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 312:
*
* "The hierarchical depth buffer does not support the LOD field, it is
* assumed by hardware to be zero. A separate hierarachical depth
* buffer is required for each LOD used, and the corresponding
* buffer's state delivered to hardware each time a new depth buffer
* state with modified LOD is delivered."
*
* We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
*/
if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
hz_walk = img->walk;
else
hz_walk = ILO_IMAGE_WALK_LOD;
 
/*
* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
* PRM, volume 2 part 1, page 312-313.
*
* It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
* memory row.
*/
switch (hz_walk) {
case ILO_IMAGE_WALK_LAYER:
{
const unsigned h0 = align(params->h0, hz_align_j);
const unsigned h1 = align(params->h1, hz_align_j);
const unsigned htail =
((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
const unsigned hz_qpitch = h0 + h1 + htail;
 
hz_width = align(img->lods[0].slice_width, 16);
 
hz_height = hz_qpitch * templ->array_size / 2;
if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
hz_height = align(hz_height, 8);
 
img->aux.walk_layer_height = hz_qpitch;
}
break;
case ILO_IMAGE_WALK_LOD:
{
unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
unsigned cur_tx, cur_ty;
 
/* figure out the tile offsets of LODs */
hz_width = 0;
hz_height = 0;
cur_tx = 0;
cur_ty = 0;
for (lv = 0; lv <= templ->last_level; lv++) {
unsigned tw, th;
 
lod_tx[lv] = cur_tx;
lod_ty[lv] = cur_ty;
 
tw = align(img->lods[lv].slice_width, 16);
th = align(img->lods[lv].slice_height, hz_align_j) *
templ->array_size / 2;
/* convert to Y-tiles */
tw = align(tw, 128) / 128;
th = align(th, 32) / 32;
 
if (hz_width < cur_tx + tw)
hz_width = cur_tx + tw;
if (hz_height < cur_ty + th)
hz_height = cur_ty + th;
 
if (lv == 1)
cur_tx += tw;
else
cur_ty += th;
}
 
/* convert tile offsets to memory offsets */
for (lv = 0; lv <= templ->last_level; lv++) {
img->aux.walk_lod_offsets[lv] =
(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
}
hz_width *= 128;
hz_height *= 32;
}
break;
case ILO_IMAGE_WALK_3D:
hz_width = align(img->lods[0].slice_width, 16);
 
hz_height = 0;
for (lv = 0; lv <= templ->last_level; lv++) {
const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
/* according to the formula, slices are packed together vertically */
hz_height += h * u_minify(templ->depth0, lv);
}
hz_height /= 2;
break;
default:
assert(!"unknown HiZ walk");
hz_width = 0;
hz_height = 0;
break;
}
 
/*
* In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
* Experiments on Haswell show that aligning the RECTLIST primitive and
* 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be
* aligned.
*/
hz_clear_w = 8;
hz_clear_h = 4;
switch (templ->nr_samples) {
case 0:
case 1:
default:
break;
case 2:
hz_clear_w /= 2;
break;
case 4:
hz_clear_w /= 2;
hz_clear_h /= 2;
break;
case 8:
hz_clear_w /= 4;
hz_clear_h /= 2;
break;
case 16:
hz_clear_w /= 4;
hz_clear_h /= 4;
break;
}
 
for (lv = 0; lv <= templ->last_level; lv++) {
if (u_minify(img->width0, lv) % hz_clear_w ||
u_minify(img->height0, lv) % hz_clear_h)
break;
img->aux.enables |= 1 << lv;
}
 
/* we padded to allow this in img_align() */
if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
img->aux.enables |= 0x1;
 
/* align to Y-tile */
img->aux.bo_stride = align(hz_width, 128);
img->aux.bo_height = align(hz_height, 32);
}
 
static void
img_calculate_mcs_size(struct ilo_image *img,
const struct ilo_image_params *params)
{
const struct pipe_resource *templ = params->templ;
int mcs_width, mcs_height, mcs_cpp;
int downscale_x, downscale_y;
 
assert(img->aux.type == ILO_IMAGE_AUX_MCS);
 
if (templ->nr_samples > 1) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
* need of scale down could be that the clear rectangle is used to clear
* the MCS instead of the RT.
*
* For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The
* 2x2 factor could come from that the hardware writes 128 bits (an
* OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
* the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the
* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
* pixel block in the RT.
*/
switch (templ->nr_samples) {
case 2:
case 4:
downscale_x = 8;
downscale_y = 2;
mcs_cpp = 1;
break;
case 8:
downscale_x = 2;
downscale_y = 2;
mcs_cpp = 4;
break;
case 16:
downscale_x = 2;
downscale_y = 1;
mcs_cpp = 8;
break;
default:
assert(!"unsupported sample count");
return;
break;
}
 
/*
* It also appears that the 2x2 subspans generated by the scaled-down
* clear rectangle cannot be masked. The scale-down clear rectangle
* thus must be aligned to 2x2, and we need to pad.
*/
mcs_width = align(img->width0, downscale_x * 2);
mcs_height = align(img->height0, downscale_y * 2);
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 327:
*
* " Pixels Lines
* TiledY RT CL
* bpp
* 32 8 4
* 64 4 4
* 128 2 4
*
* TiledX RT CL
* bpp
* 32 16 2
* 64 8 2
* 128 4 2"
*
* This table and the two following tables define the RT alignments, the
* clear rectangle alignments, and the clear rectangle scale factors.
* Viewing the RT alignments as the sizes of 128-byte blocks, we can see
* that the clear rectangle alignments are 16x32 blocks, and the clear
* rectangle scale factors are 8x16 blocks.
*
* For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
* RT. Similar to the MSAA cases, we can argue that an OWord maps to
* 8x16 blocks.
*
* One problem with this reasoning is that a Y-tile in MCS has 8x32
* OWords and maps to 64x512 128-byte blocks. This differs from i965,
* which says that a Y-tile maps to 128x256 blocks (\see
* intel_get_non_msrt_mcs_alignment). It does not really change
* anything except for the size of the allocated MCS. Let's see if we
* hit out-of-bound access.
*/
switch (img->tiling) {
case GEN6_TILING_X:
downscale_x = 64 / img->block_size;
downscale_y = 2;
break;
case GEN6_TILING_Y:
downscale_x = 32 / img->block_size;
downscale_y = 4;
break;
default:
assert(!"unsupported tiling mode");
return;
break;
}
 
downscale_x *= 8;
downscale_y *= 16;
 
/*
* From the Haswell PRM, volume 7, page 652:
*
* "Clear rectangle must be aligned to two times the number of
* pixels in the table shown below due to 16X16 hashing across the
* slice."
*
* The scaled-down clear rectangle must be aligned to 4x4 instead of
* 2x2, and we need to pad.
*/
mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
mcs_cpp = 16; /* an OWord */
}
 
img->aux.enables = (1 << (templ->last_level + 1)) - 1;
/* align to Y-tile */
img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
img->aux.bo_height = align(mcs_height, 32);
}
 
static void
img_init(struct ilo_image *img,
struct ilo_image_params *params)
{
/* there are hard dependencies between every function here */
 
img_init_aux(img, params);
img_init_size_and_format(img, params);
img_init_walk(img, params);
img_init_tiling(img, params);
img_init_alignments(img, params);
img_init_lods(img, params);
img_init_layer_height(img, params);
 
img_align(img, params);
img_calculate_bo_size(img, params);
 
img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
 
switch (img->aux.type) {
case ILO_IMAGE_AUX_HIZ:
img_calculate_hiz_size(img, params);
break;
case ILO_IMAGE_AUX_MCS:
img_calculate_mcs_size(img, params);
break;
default:
break;
}
}
 
/**
* The texutre is for transfer only. We can define our own layout to save
* space.
*/
static void
img_init_for_transfer(struct ilo_image *img,
const struct ilo_dev *dev,
const struct pipe_resource *templ)
{
const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
templ->depth0 : templ->array_size;
unsigned layer_width, layer_height;
 
assert(templ->last_level == 0);
assert(templ->nr_samples <= 1);
 
img->aux.type = ILO_IMAGE_AUX_NONE;
 
img->width0 = templ->width0;
img->height0 = templ->height0;
img->depth0 = templ->depth0;
img->sample_count = 1;
 
img->format = templ->format;
img->block_width = util_format_get_blockwidth(templ->format);
img->block_height = util_format_get_blockheight(templ->format);
img->block_size = util_format_get_blocksize(templ->format);
 
img->walk = ILO_IMAGE_WALK_LOD;
 
img->tiling = GEN6_TILING_NONE;
 
img->align_i = img->block_width;
img->align_j = img->block_height;
 
assert(util_is_power_of_two(img->block_width) &&
util_is_power_of_two(img->block_height));
 
/* use packed layout */
layer_width = align(templ->width0, img->align_i);
layer_height = align(templ->height0, img->align_j);
 
img->lods[0].slice_width = layer_width;
img->lods[0].slice_height = layer_height;
 
img->bo_stride = (layer_width / img->block_width) * img->block_size;
img->bo_stride = align(img->bo_stride, 64);
 
img->bo_height = (layer_height / img->block_height) * num_layers;
}
 
/**
* Initialize the image. Callers should zero-initialize \p img first.
*/
void ilo_image_init(struct ilo_image *img,
const struct ilo_dev *dev,
const struct pipe_resource *templ)
{
struct ilo_image_params params;
bool transfer_only;
 
/* use transfer layout when the texture is never bound to GPU */
transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
PIPE_BIND_TRANSFER_READ));
if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
img_init_for_transfer(img, dev, templ);
return;
}
 
memset(&params, 0, sizeof(params));
params.dev = dev;
params.templ = templ;
params.valid_tilings = IMAGE_TILING_ALL;
 
img_init(img, &params);
}
 
bool
ilo_image_init_for_imported(struct ilo_image *img,
const struct ilo_dev *dev,
const struct pipe_resource *templ,
enum gen_surface_tiling tiling,
unsigned bo_stride)
{
struct ilo_image_params params;
 
if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
(tiling == GEN6_TILING_Y && bo_stride % 128) ||
(tiling == GEN8_TILING_W && bo_stride % 64))
return false;
 
memset(&params, 0, sizeof(params));
params.dev = dev;
params.templ = templ;
params.valid_tilings = 1 << tiling;
 
img_init(img, &params);
 
assert(img->tiling == tiling);
if (img->bo_stride > bo_stride)
return false;
 
img->bo_stride = bo_stride;
 
/* assume imported RTs are also scanouts */
if (!img->scanout)
img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
 
return true;
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_image.h
0,0 → 1,341
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_IMAGE_H
#define ILO_IMAGE_H
 
#include "genhw/genhw.h"
#include "intel_winsys.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
 
enum ilo_image_aux_type {
ILO_IMAGE_AUX_NONE,
ILO_IMAGE_AUX_HIZ,
ILO_IMAGE_AUX_MCS,
};
 
enum ilo_image_walk_type {
/*
* LODs of each array layer are first packed together in MIPLAYOUT_BELOW.
* Array layers are then stacked together vertically.
*
* This can be used for mipmapped 2D textures.
*/
ILO_IMAGE_WALK_LAYER,
 
/*
* Array layers of each LOD are first stacked together vertically and
* tightly. LODs are then packed together in MIPLAYOUT_BELOW with each LOD
* starting at page boundaries.
*
* This is usually used for non-mipmapped 2D textures, as multiple LODs are
* not supported natively.
*/
ILO_IMAGE_WALK_LOD,
 
/*
* 3D slices of each LOD are first packed together horizontally and tightly
* with wrapping. LODs are then stacked together vertically and tightly.
*
* This is used for 3D textures.
*/
ILO_IMAGE_WALK_3D,
};
 
/*
* When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each
* LOD and this is used to describe LODs in the first array layer. Otherwise,
* there can be multiple slices in each LOD and this is used to describe the
* first slice in each LOD.
*/
struct ilo_image_lod {
/* physical position in pixels */
unsigned x;
unsigned y;
 
/* physical size of a slice in pixels */
unsigned slice_width;
unsigned slice_height;
};
 
/**
* Texture layout.
*/
struct ilo_image {
/* size, format, etc for programming hardware states */
unsigned width0;
unsigned height0;
unsigned depth0;
unsigned sample_count;
enum pipe_format format;
bool separate_stencil;
 
/*
* width, height, and size of pixel blocks for conversion between pixel
* positions and memory offsets
*/
unsigned block_width;
unsigned block_height;
unsigned block_size;
 
enum ilo_image_walk_type walk;
bool interleaved_samples;
 
enum gen_surface_tiling tiling;
 
/* physical LOD slice alignments */
unsigned align_i;
unsigned align_j;
 
struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS];
 
/* physical layer height for ILO_IMAGE_WALK_LAYER */
unsigned walk_layer_height;
 
/* distance in bytes between two pixel block rows */
unsigned bo_stride;
/* number of pixel block rows */
unsigned bo_height;
 
bool scanout;
 
struct intel_bo *bo;
 
struct {
enum ilo_image_aux_type type;
 
/* bitmask of levels that can use aux */
unsigned enables;
 
/* LOD offsets for ILO_IMAGE_WALK_LOD */
unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS];
 
unsigned walk_layer_height;
unsigned bo_stride;
unsigned bo_height;
 
struct intel_bo *bo;
} aux;
};
 
struct pipe_resource;
 
void
ilo_image_init(struct ilo_image *img,
const struct ilo_dev *dev,
const struct pipe_resource *templ);
 
bool
ilo_image_init_for_imported(struct ilo_image *img,
const struct ilo_dev *dev,
const struct pipe_resource *templ,
enum gen_surface_tiling tiling,
unsigned bo_stride);
 
static inline void
ilo_image_cleanup(struct ilo_image *img)
{
intel_bo_unref(img->bo);
intel_bo_unref(img->aux.bo);
}
 
static inline void
ilo_image_set_bo(struct ilo_image *img, struct intel_bo *bo)
{
intel_bo_unref(img->bo);
img->bo = intel_bo_ref(bo);
}
 
static inline void
ilo_image_set_aux_bo(struct ilo_image *img, struct intel_bo *bo)
{
intel_bo_unref(img->aux.bo);
img->aux.bo = intel_bo_ref(bo);
}
 
static inline bool
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
{
return (img->aux.bo && (img->aux.enables & (1 << level)));
}
 
/**
* Convert from pixel position to 2D memory offset.
*/
static inline void
ilo_image_pos_to_mem(const struct ilo_image *img,
unsigned pos_x, unsigned pos_y,
unsigned *mem_x, unsigned *mem_y)
{
assert(pos_x % img->block_width == 0);
assert(pos_y % img->block_height == 0);
 
*mem_x = pos_x / img->block_width * img->block_size;
*mem_y = pos_y / img->block_height;
}
 
/**
* Convert from 2D memory offset to linear offset.
*/
static inline unsigned
ilo_image_mem_to_linear(const struct ilo_image *img,
unsigned mem_x, unsigned mem_y)
{
return mem_y * img->bo_stride + mem_x;
}
 
/**
* Convert from 2D memory offset to raw offset.
*/
static inline unsigned
ilo_image_mem_to_raw(const struct ilo_image *img,
unsigned mem_x, unsigned mem_y)
{
unsigned tile_w, tile_h;
 
switch (img->tiling) {
case GEN6_TILING_NONE:
tile_w = 1;
tile_h = 1;
break;
case GEN6_TILING_X:
tile_w = 512;
tile_h = 8;
break;
case GEN6_TILING_Y:
tile_w = 128;
tile_h = 32;
break;
case GEN8_TILING_W:
tile_w = 64;
tile_h = 64;
break;
default:
assert(!"unknown tiling");
tile_w = 1;
tile_h = 1;
break;
}
 
assert(mem_x % tile_w == 0);
assert(mem_y % tile_h == 0);
 
return mem_y * img->bo_stride + mem_x * tile_h;
}
 
/**
* Return the stride, in bytes, between slices within a level.
*/
static inline unsigned
ilo_image_get_slice_stride(const struct ilo_image *img, unsigned level)
{
unsigned h;
 
switch (img->walk) {
case ILO_IMAGE_WALK_LAYER:
h = img->walk_layer_height;
break;
case ILO_IMAGE_WALK_LOD:
h = img->lods[level].slice_height;
break;
case ILO_IMAGE_WALK_3D:
if (level == 0) {
h = img->lods[0].slice_height;
break;
}
/* fall through */
default:
assert(!"no single stride to walk across slices");
h = 0;
break;
}
 
assert(h % img->block_height == 0);
 
return (h / img->block_height) * img->bo_stride;
}
 
/**
* Return the physical size, in bytes, of a slice in a level.
*/
static inline unsigned
ilo_image_get_slice_size(const struct ilo_image *img, unsigned level)
{
const unsigned w = img->lods[level].slice_width;
const unsigned h = img->lods[level].slice_height;
 
assert(w % img->block_width == 0);
assert(h % img->block_height == 0);
 
return (w / img->block_width * img->block_size) *
(h / img->block_height);
}
 
/**
* Return the pixel position of a slice.
*/
static inline void
ilo_image_get_slice_pos(const struct ilo_image *img,
unsigned level, unsigned slice,
unsigned *x, unsigned *y)
{
switch (img->walk) {
case ILO_IMAGE_WALK_LAYER:
*x = img->lods[level].x;
*y = img->lods[level].y + img->walk_layer_height * slice;
break;
case ILO_IMAGE_WALK_LOD:
*x = img->lods[level].x;
*y = img->lods[level].y + img->lods[level].slice_height * slice;
break;
case ILO_IMAGE_WALK_3D:
{
/* slices are packed horizontally with wrapping */
const unsigned sx = slice & ((1 << level) - 1);
const unsigned sy = slice >> level;
 
assert(slice < u_minify(img->depth0, level));
 
*x = img->lods[level].x + img->lods[level].slice_width * sx;
*y = img->lods[level].y + img->lods[level].slice_height * sy;
}
break;
default:
assert(!"unknown img walk type");
*x = 0;
*y = 0;
break;
}
 
/* should not exceed the bo size */
assert(*y + img->lods[level].slice_height <=
img->bo_height * img->block_height);
}
 
#endif /* ILO_IMAGE_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d.h
0,0 → 1,427
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_STATE_3D_H
#define ILO_STATE_3D_H
 
#include "genhw/genhw.h"
#include "pipe/p_state.h"
 
#include "ilo_core.h"
#include "ilo_dev.h"
 
/**
* \see brw_context.h
*/
#define ILO_MAX_DRAW_BUFFERS 8
#define ILO_MAX_CONST_BUFFERS (1 + 12)
#define ILO_MAX_SAMPLER_VIEWS 16
#define ILO_MAX_SAMPLERS 16
#define ILO_MAX_SO_BINDINGS 64
#define ILO_MAX_SO_BUFFERS 4
#define ILO_MAX_VIEWPORTS 1
 
#define ILO_MAX_SURFACES 256
 
struct intel_bo;
struct ilo_buffer;
struct ilo_image;
struct ilo_shader_state;
 
struct ilo_vb_state {
struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
uint32_t enabled_mask;
};
 
struct ilo_ib_state {
struct pipe_resource *buffer;
const void *user_buffer;
unsigned offset;
unsigned index_size;
 
/* these are not valid until the state is finalized */
struct pipe_resource *hw_resource;
unsigned hw_index_size;
/* an offset to be added to pipe_draw_info::start */
int64_t draw_start_offset;
};
 
struct ilo_ve_cso {
/* VERTEX_ELEMENT_STATE */
uint32_t payload[2];
};
 
struct ilo_ve_state {
struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
unsigned count;
 
unsigned instance_divisors[PIPE_MAX_ATTRIBS];
unsigned vb_mapping[PIPE_MAX_ATTRIBS];
unsigned vb_count;
 
/* these are not valid until the state is finalized */
struct ilo_ve_cso edgeflag_cso;
bool last_cso_edgeflag;
 
struct ilo_ve_cso nosrc_cso;
bool prepend_nosrc_cso;
};
 
struct ilo_so_state {
struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
unsigned count;
unsigned append_bitmask;
 
bool enabled;
};
 
struct ilo_viewport_cso {
/* matrix form */
float m00, m11, m22, m30, m31, m32;
 
/* guardband in NDC space */
float min_gbx, min_gby, max_gbx, max_gby;
 
/* viewport in screen space */
float min_x, min_y, min_z;
float max_x, max_y, max_z;
};
 
struct ilo_viewport_state {
struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
unsigned count;
 
struct pipe_viewport_state viewport0;
};
 
struct ilo_scissor_state {
/* SCISSOR_RECT */
uint32_t payload[ILO_MAX_VIEWPORTS * 2];
 
struct pipe_scissor_state scissor0;
};
 
struct ilo_rasterizer_clip {
/* 3DSTATE_CLIP */
uint32_t payload[3];
 
uint32_t can_enable_guardband;
};
 
struct ilo_rasterizer_sf {
/* 3DSTATE_SF */
uint32_t payload[3];
uint32_t dw_msaa;
 
/* Global Depth Offset Constant/Scale/Clamp */
uint32_t dw_depth_offset_const;
uint32_t dw_depth_offset_scale;
uint32_t dw_depth_offset_clamp;
 
/* Gen8+ 3DSTATE_RASTER */
uint32_t dw_raster;
};
 
struct ilo_rasterizer_wm {
/* 3DSTATE_WM */
uint32_t payload[2];
uint32_t dw_msaa_rast;
uint32_t dw_msaa_disp;
};
 
struct ilo_rasterizer_state {
struct pipe_rasterizer_state state;
 
struct ilo_rasterizer_clip clip;
struct ilo_rasterizer_sf sf;
struct ilo_rasterizer_wm wm;
};
 
struct ilo_dsa_state {
/* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
uint32_t payload[3];
 
uint32_t dw_blend_alpha;
uint32_t dw_ps_blend_alpha;
ubyte alpha_ref;
};
 
struct ilo_blend_cso {
/* BLEND_STATE */
uint32_t payload[2];
 
uint32_t dw_blend;
uint32_t dw_blend_dst_alpha_forced_one;
};
 
struct ilo_blend_state {
struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
 
bool dual_blend;
bool alpha_to_coverage;
 
uint32_t dw_shared;
uint32_t dw_alpha_mod;
uint32_t dw_logicop;
 
/* a part of 3DSTATE_PS_BLEND */
uint32_t dw_ps_blend;
uint32_t dw_ps_blend_dst_alpha_forced_one;
};
 
struct ilo_sampler_cso {
/* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
uint32_t payload[15];
 
uint32_t dw_filter;
uint32_t dw_filter_aniso;
uint32_t dw_wrap;
uint32_t dw_wrap_1d;
uint32_t dw_wrap_cube;
 
bool anisotropic;
bool saturate_r;
bool saturate_s;
bool saturate_t;
};
 
struct ilo_sampler_state {
const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
};
 
struct ilo_view_surface {
/* SURFACE_STATE */
uint32_t payload[13];
struct intel_bo *bo;
 
uint32_t scanout;
};
 
struct ilo_view_cso {
struct pipe_sampler_view base;
 
struct ilo_view_surface surface;
};
 
struct ilo_view_state {
struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
unsigned count;
};
 
struct ilo_cbuf_cso {
struct pipe_resource *resource;
struct ilo_view_surface surface;
 
/*
* this CSO is not so constant because user buffer needs to be uploaded in
* finalize_constant_buffers()
*/
const void *user_buffer;
unsigned user_buffer_size;
};
 
struct ilo_cbuf_state {
struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
uint32_t enabled_mask;
};
 
struct ilo_resource_state {
struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
unsigned count;
};
 
struct ilo_surface_cso {
struct pipe_surface base;
 
bool is_rt;
union {
struct ilo_view_surface rt;
struct ilo_zs_surface {
uint32_t payload[12];
uint32_t dw_aligned_8x4;
 
struct intel_bo *bo;
struct intel_bo *hiz_bo;
struct intel_bo *separate_s8_bo;
} zs;
} u;
};
 
struct ilo_fb_state {
struct pipe_framebuffer_state state;
 
struct ilo_view_surface null_rt;
struct ilo_zs_surface null_zs;
 
struct ilo_fb_blend_caps {
bool can_logicop;
bool can_blend;
bool can_alpha_test;
bool dst_alpha_forced_one;
} blend_caps[PIPE_MAX_COLOR_BUFS];
 
unsigned num_samples;
};
 
struct ilo_shader_cso {
uint32_t payload[5];
};
 
/**
* Translate a pipe texture target to the matching hardware surface type.
*/
static inline int
ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
{
switch (target) {
case PIPE_BUFFER:
return GEN6_SURFTYPE_BUFFER;
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return GEN6_SURFTYPE_1D;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D_ARRAY:
return GEN6_SURFTYPE_2D;
case PIPE_TEXTURE_3D:
return GEN6_SURFTYPE_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return GEN6_SURFTYPE_CUBE;
default:
assert(!"unknown texture target");
return GEN6_SURFTYPE_BUFFER;
}
}
 
void
ilo_gpe_init_ve(const struct ilo_dev *dev,
unsigned num_states,
const struct pipe_vertex_element *states,
struct ilo_ve_state *ve);
 
void
ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
struct ilo_ve_cso *cso);
 
void
ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso);
 
void
ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
const struct pipe_viewport_state *state,
struct ilo_viewport_cso *vp);
 
void
ilo_gpe_set_scissor(const struct ilo_dev *dev,
unsigned start_slot,
unsigned num_states,
const struct pipe_scissor_state *states,
struct ilo_scissor_state *scissor);
 
void
ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
struct ilo_scissor_state *scissor);
 
void
ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_state *rasterizer);
void
ilo_gpe_init_dsa(const struct ilo_dev *dev,
const struct pipe_depth_stencil_alpha_state *state,
struct ilo_dsa_state *dsa);
 
void
ilo_gpe_init_blend(const struct ilo_dev *dev,
const struct pipe_blend_state *state,
struct ilo_blend_state *blend);
 
void
ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
const struct pipe_sampler_state *state,
struct ilo_sampler_cso *sampler);
 
void
ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
const struct ilo_image *img,
const struct ilo_image *s8_img,
enum pipe_texture_target target,
enum pipe_format format, unsigned level,
unsigned first_layer, unsigned num_layers,
struct ilo_zs_surface *zs);
 
void
ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso);
 
void
ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso);
 
void
ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso);
 
void
ilo_gpe_set_fb(const struct ilo_dev *dev,
const struct pipe_framebuffer_state *state,
struct ilo_fb_state *fb);
 
#endif /* ILO_STATE_3D_H */
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
0,0 → 1,2222
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "genhw/genhw.h"
#include "util/u_dual_blend.h"
#include "util/u_framebuffer.h"
#include "util/u_half.h"
 
#include "ilo_format.h"
#include "ilo_image.h"
#include "ilo_state_3d.h"
#include "../ilo_shader.h"
 
static void
rasterizer_init_clip(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_clip *clip)
{
uint32_t dw1, dw2, dw3;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
dw1 = GEN6_CLIP_DW1_STATISTICS;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 219:
*
* "Workaround : Due to Hardware issue "EarlyCull" needs to be
* enabled only for the cases where the incoming primitive topology
* into the clipper guaranteed to be Trilist."
*
* What does this mean?
*/
dw1 |= 0 << 19 |
GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
 
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
if (state->front_ccw)
dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
 
switch (state->cull_face) {
case PIPE_FACE_NONE:
dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
break;
case PIPE_FACE_FRONT:
dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
break;
case PIPE_FACE_BACK:
dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
break;
}
}
}
 
dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
GEN6_CLIP_DW2_XY_TEST_ENABLE |
state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
GEN6_CLIP_DW2_CLIPMODE_NORMAL;
 
if (state->clip_halfz)
dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
else
dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
 
if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
 
if (state->flatshade_first) {
dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
}
else {
dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
}
 
dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
 
clip->payload[0] = dw1;
clip->payload[1] = dw2;
clip->payload[2] = dw3;
 
clip->can_enable_guardband = true;
 
/*
* There are several reasons that guard band test should be disabled
*
* - GL wide points (to avoid partially visibie object)
* - GL wide or AA lines (to avoid partially visibie object)
*/
if (state->point_size_per_vertex || state->point_size > 1.0f)
clip->can_enable_guardband = false;
if (state->line_smooth || state->line_width > 1.0f)
clip->can_enable_guardband = false;
}
 
static void
rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_sf *sf)
{
ILO_DEV_ASSERT(dev, 6, 8);
 
/*
* Scale the constant term. The minimum representable value used by the HW
* is not large enouch to be the minimum resolvable difference.
*/
sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
sf->dw_depth_offset_scale = fui(state->offset_scale);
sf->dw_depth_offset_clamp = fui(state->offset_clamp);
}
 
static void
rasterizer_init_sf_gen6(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_sf *sf)
{
int line_width, point_width;
uint32_t dw1, dw2, dw3;
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "This bit (Statistics Enable) should be set whenever clipping is
* enabled and the Statistics Enable bit is set in CLIP_STATE. It
* should be cleared if clipping is disabled or Statistics Enable in
* CLIP_STATE is clear."
*/
dw1 = GEN7_SF_DW1_STATISTICS |
GEN7_SF_DW1_VIEWPORT_ENABLE;
 
/* XXX GEN6 path seems to work fine for GEN7 */
if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
* "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
* Enable Solid , Global Depth Offset Enable Wireframe, and Global
* Depth Offset Enable Point) should be set whenever non zero depth
* bias (Slope, Bias) values are used. Setting this bit may have
* some degradation of performance for some workloads."
*/
if (state->offset_tri || state->offset_line || state->offset_point) {
/* XXX need to scale offset_const according to the depth format */
dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
 
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
GEN7_SF_DW1_DEPTH_OFFSET_POINT;
}
} else {
if (state->offset_tri)
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
if (state->offset_line)
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
if (state->offset_point)
dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
}
 
switch (state->fill_front) {
case PIPE_POLYGON_MODE_FILL:
dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
break;
}
 
switch (state->fill_back) {
case PIPE_POLYGON_MODE_FILL:
dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
break;
}
 
if (state->front_ccw)
dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
 
dw2 = 0;
 
if (state->line_smooth) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 251:
*
* "This field (Anti-aliasing Enable) must be disabled if any of the
* render targets have integer (UINT or SINT) surface format."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "This field (Hierarchical Depth Buffer Enable) must be disabled
* if Anti-aliasing Enable in 3DSTATE_SF is enabled.
*
* TODO We do not check those yet.
*/
dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
GEN7_SF_DW2_AA_LINE_CAP_1_0;
}
 
switch (state->cull_face) {
case PIPE_FACE_NONE:
dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
break;
case PIPE_FACE_FRONT:
dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
break;
case PIPE_FACE_BACK:
dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
break;
}
 
/*
* Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
* pixels in the minor direction. We have to make the lines slightly
* thicker, 0.5 pixel on both sides, so that they intersect that many
* pixels are considered into the lines.
*
* Line width is in U3.7.
*/
line_width = (int)
((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
line_width = CLAMP(line_width, 0, 1023);
 
/* use GIQ rules */
if (line_width == 128 && !state->line_smooth)
line_width = 0;
 
dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
 
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
 
if (state->scissor)
dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
 
dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
GEN7_SF_DW3_SUBPIXEL_8BITS;
 
if (state->line_last_pixel)
dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
 
if (state->flatshade_first) {
dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
} else {
dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
}
 
if (!state->point_size_per_vertex)
dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
 
/* in U8.3 */
point_width = (int) (state->point_size * 8.0f + 0.5f);
point_width = CLAMP(point_width, 1, 2047);
 
dw3 |= point_width;
 
STATIC_ASSERT(Elements(sf->payload) >= 3);
sf->payload[0] = dw1;
sf->payload[1] = dw2;
sf->payload[2] = dw3;
 
if (state->multisample) {
sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 251:
*
* "Software must not program a value of 0.0 when running in
* MSRASTMODE_ON_xxx modes - zero-width lines are not available
* when multisampling rasterization is enabled."
*/
if (!line_width) {
line_width = 128; /* 1.0f */
 
sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
}
} else {
sf->dw_msaa = 0;
}
 
rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
/* 3DSTATE_RASTER is Gen8+ only */
sf->dw_raster = 0;
}
 
static uint32_t
rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state)
{
uint32_t dw = 0;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (state->front_ccw)
dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
 
switch (state->cull_face) {
case PIPE_FACE_NONE:
dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
break;
case PIPE_FACE_FRONT:
dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
break;
case PIPE_FACE_BACK:
dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
break;
}
 
if (state->point_smooth)
dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
 
if (state->multisample)
dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
 
if (state->offset_tri)
dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
if (state->offset_line)
dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
if (state->offset_point)
dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
 
switch (state->fill_front) {
case PIPE_POLYGON_MODE_FILL:
dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
break;
}
 
switch (state->fill_back) {
case PIPE_POLYGON_MODE_FILL:
dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
break;
}
 
if (state->line_smooth)
dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
 
if (state->scissor)
dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
 
if (state->depth_clip)
dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
 
return dw;
}
 
static void
rasterizer_init_sf_gen8(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_sf *sf)
{
int line_width, point_width;
uint32_t dw1, dw2, dw3;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
/* in U3.7 */
line_width = (int)
((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
line_width = CLAMP(line_width, 0, 1023);
 
/* use GIQ rules */
if (line_width == 128 && !state->line_smooth)
line_width = 0;
 
/* in U8.3 */
point_width = (int) (state->point_size * 8.0f + 0.5f);
point_width = CLAMP(point_width, 1, 2047);
 
dw1 = GEN7_SF_DW1_STATISTICS |
GEN7_SF_DW1_VIEWPORT_ENABLE;
 
dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
if (state->line_smooth)
dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
 
dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
GEN7_SF_DW3_SUBPIXEL_8BITS |
point_width;
 
if (state->line_last_pixel)
dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
 
if (state->flatshade_first) {
dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
} else {
dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
}
 
if (!state->point_size_per_vertex)
dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
 
dw3 |= point_width;
 
STATIC_ASSERT(Elements(sf->payload) >= 3);
sf->payload[0] = dw1;
sf->payload[1] = dw2;
sf->payload[2] = dw3;
 
rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
 
sf->dw_msaa = 0;
sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
}
 
static void
rasterizer_init_wm_gen6(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_wm *wm)
{
uint32_t dw5, dw6;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
/* only the FF unit states are set, as in GEN7 */
 
dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
 
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
 
if (state->poly_stipple_enable)
dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
if (state->line_stipple_enable)
dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
 
/*
* assertion that makes sure
*
* dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
*
* is valid
*/
STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
 
if (state->bottom_edge_rule)
dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
 
wm->dw_msaa_rast =
(state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
 
STATIC_ASSERT(Elements(wm->payload) >= 2);
wm->payload[0] = dw5;
wm->payload[1] = dw6;
}
 
static void
rasterizer_init_wm_gen7(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_wm *wm)
{
uint32_t dw1, dw2;
 
ILO_DEV_ASSERT(dev, 7, 7.5);
 
/*
* assertion that makes sure
*
* dw1 |= wm->dw_msaa_rast;
* dw2 |= wm->dw_msaa_disp;
*
* is valid
*/
STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
dw2 = 0;
 
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
 
if (state->poly_stipple_enable)
dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
if (state->line_stipple_enable)
dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
 
if (state->bottom_edge_rule)
dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
 
wm->dw_msaa_rast =
(state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
 
STATIC_ASSERT(Elements(wm->payload) >= 2);
wm->payload[0] = dw1;
wm->payload[1] = dw2;
}
 
static uint32_t
rasterizer_get_wm_gen8(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
 
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
 
if (state->poly_stipple_enable)
dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
if (state->line_stipple_enable)
dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
 
if (state->bottom_edge_rule)
dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
 
return dw;
}
 
void
ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_state *rasterizer)
{
rasterizer_init_clip(dev, state, &rasterizer->clip);
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
 
rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
} else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
} else {
rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
}
}
 
static void
fs_init_cso_gen6(const struct ilo_dev *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
int start_grf, input_count, sampler_count, interps, max_threads;
uint32_t dw2, dw4, dw5, dw6;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
interps = ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
 
/* see brwCreateContext() */
max_threads = (dev->gt == 2) ? 80 : 40;
 
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
 
dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
*
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
* PS kernel or color calculator has the ability to kill (discard)
* pixels or samples, other than due to depth or stencil testing.
* This bit is required to be ENABLED in the following situations:
*
* The API pixel shader program contains "killpix" or "discard"
* instructions, or other code in the pixel shader kernel that can
* cause the final pixel mask to differ from the pixel mask received
* on dispatch.
*
* A sampler with chroma key enabled with kill pixel mode is used by
* the pixel shader.
*
* Any render target has Alpha Test Enable or AlphaToCoverage Enable
* enabled.
*
* The pixel shader kernel generates and outputs oMask.
*
* Note: As ClipDistance clipping is fully supported in hardware and
* therefore not via PS instructions, there should be no need to
* ENABLE this bit due to ClipDistance clipping."
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
*
* "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
* field must be set to disabled."
*
* TODO This is not checked yet.
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
dw5 |= GEN6_WM_DW5_PS_USE_W;
 
/*
* TODO set this bit only when
*
* a) fs writes colors and color is not masked, or
* b) fs writes depth, or
* c) fs or cc kills
*/
if (true)
dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
 
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
 
dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
GEN6_WM_DW6_PS_POSOFFSET_NONE |
interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = dw6;
}
 
static uint32_t
fs_get_wm_gen7(const struct ilo_dev *dev,
const struct ilo_shader_state *fs)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 7, 7.5);
 
dw = ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
 
/*
* TODO set this bit only when
*
* a) fs writes colors and color is not masked, or
* b) fs writes depth, or
* c) fs or cc kills
*/
dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 278:
*
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
* the PS kernel or color calculator has the ability to kill
* (discard) pixels or samples, other than due to depth or stencil
* testing. This bit is required to be ENABLED in the following
* situations:
*
* - The API pixel shader program contains "killpix" or "discard"
* instructions, or other code in the pixel shader kernel that
* can cause the final pixel mask to differ from the pixel mask
* received on dispatch.
*
* - A sampler with chroma key enabled with kill pixel mode is used
* by the pixel shader.
*
* - Any render target has Alpha Test Enable or AlphaToCoverage
* Enable enabled.
*
* - The pixel shader kernel generates and outputs oMask.
*
* Note: As ClipDistance clipping is fully supported in hardware
* and therefore not via PS instructions, there should be no need
* to ENABLE this bit due to ClipDistance clipping."
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
dw |= GEN7_WM_DW1_PSCDEPTH_ON;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
dw |= GEN7_WM_DW1_PS_USE_DEPTH;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
dw |= GEN7_WM_DW1_PS_USE_W;
 
return dw;
}
 
static void
fs_init_cso_gen7(const struct ilo_dev *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
int start_grf, sampler_count, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_DEV_ASSERT(dev, 7, 7.5);
 
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
 
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
 
/* see brwCreateContext() */
switch (ilo_dev_gen(dev)) {
case ILO_GEN(7.5):
max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
break;
case ILO_GEN(7):
default:
max_threads = (dev->gt == 2) ? 172 : 48;
dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
break;
}
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
 
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
 
dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = fs_get_wm_gen7(dev, fs);
}
 
static uint32_t
fs_get_psx_gen8(const struct ilo_dev *dev,
const struct ilo_shader_state *fs)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
dw |= GEN8_PSX_DW1_KILL_PIXEL;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
dw |= GEN8_PSX_DW1_USE_DEPTH;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
dw |= GEN8_PSX_DW1_USE_W;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
dw |= GEN8_PSX_DW1_ATTR_ENABLE;
 
return dw;
}
 
static uint32_t
fs_get_wm_gen8(const struct ilo_dev *dev,
const struct ilo_shader_state *fs)
{
ILO_DEV_ASSERT(dev, 8, 8);
 
return ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
}
 
static void
fs_init_cso_gen8(const struct ilo_dev *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
int start_grf, sampler_count;
uint32_t dw3, dw6, dw7;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
 
dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
/* always 64? */
dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
GEN8_PS_DW6_POSOFFSET_NONE;
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
 
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
 
dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 5);
cso->payload[0] = dw3;
cso->payload[1] = dw6;
cso->payload[2] = dw7;
cso->payload[3] = fs_get_psx_gen8(dev, fs);
cso->payload[4] = fs_get_wm_gen8(dev, fs);
}
 
void
ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
if (ilo_dev_gen(dev) >= ILO_GEN(8))
fs_init_cso_gen8(dev, fs, cso);
else if (ilo_dev_gen(dev) >= ILO_GEN(7))
fs_init_cso_gen7(dev, fs, cso);
else
fs_init_cso_gen6(dev, fs, cso);
}
 
struct ilo_zs_surface_info {
int surface_type;
int format;
 
struct {
struct intel_bo *bo;
unsigned stride;
unsigned qpitch;
enum gen_surface_tiling tiling;
uint32_t offset;
} zs, stencil, hiz;
 
unsigned width, height, depth;
unsigned lod, first_layer, num_layers;
};
 
static void
zs_init_info_null(const struct ilo_dev *dev,
struct ilo_zs_surface_info *info)
{
ILO_DEV_ASSERT(dev, 6, 8);
 
memset(info, 0, sizeof(*info));
 
info->surface_type = GEN6_SURFTYPE_NULL;
info->format = GEN6_ZFORMAT_D32_FLOAT;
info->width = 1;
info->height = 1;
info->depth = 1;
info->num_layers = 1;
}
 
static void
zs_init_info(const struct ilo_dev *dev,
const struct ilo_image *img,
const struct ilo_image *s8_img,
enum pipe_texture_target target,
enum pipe_format format, unsigned level,
unsigned first_layer, unsigned num_layers,
struct ilo_zs_surface_info *info)
{
bool separate_stencil;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
memset(info, 0, sizeof(*info));
 
info->surface_type = ilo_gpe_gen6_translate_texture(target);
 
if (info->surface_type == GEN6_SURFTYPE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
*
* "For Other Surfaces (Cube Surfaces):
* This field (Minimum Array Element) is ignored."
*
* "For Other Surfaces (Cube Surfaces):
* This field (Render Target View Extent) is ignored."
*
* As such, we cannot set first_layer and num_layers on cube surfaces.
* To work around that, treat it as a 2D surface.
*/
info->surface_type = GEN6_SURFTYPE_2D;
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
separate_stencil = true;
} else {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "This field (Separate Stencil Buffer Enable) must be set to the
* same value (enabled or disabled) as Hierarchical Depth Buffer
* Enable."
*/
separate_stencil = ilo_image_can_enable_aux(img, level);
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "If this field (Hierarchical Depth Buffer Enable) is enabled, the
* Surface Format of the depth buffer cannot be
* D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
* requires the separate stencil buffer."
*
* From the Ironlake PRM, volume 2 part 1, page 330:
*
* "If this field (Separate Stencil Buffer Enable) is disabled, the
* Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
*
* There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
* is indeed used, the depth values output by the fragment shaders will
* be different when read back.
*
* As for GEN7+, separate_stencil is always true.
*/
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
info->format = GEN6_ZFORMAT_D16_UNORM;
break;
case PIPE_FORMAT_Z32_FLOAT:
info->format = GEN6_ZFORMAT_D32_FLOAT;
break;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
info->format = (separate_stencil) ?
GEN6_ZFORMAT_D24_UNORM_X8_UINT :
GEN6_ZFORMAT_D24_UNORM_S8_UINT;
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
info->format = (separate_stencil) ?
GEN6_ZFORMAT_D32_FLOAT :
GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
break;
case PIPE_FORMAT_S8_UINT:
if (separate_stencil) {
info->format = GEN6_ZFORMAT_D32_FLOAT;
break;
}
/* fall through */
default:
assert(!"unsupported depth/stencil format");
zs_init_info_null(dev, info);
return;
break;
}
 
if (format != PIPE_FORMAT_S8_UINT) {
info->zs.bo = img->bo;
info->zs.stride = img->bo_stride;
 
assert(img->walk_layer_height % 4 == 0);
info->zs.qpitch = img->walk_layer_height / 4;
 
info->zs.tiling = img->tiling;
info->zs.offset = 0;
}
 
if (s8_img || format == PIPE_FORMAT_S8_UINT) {
info->stencil.bo = s8_img->bo;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 329:
*
* "The pitch must be set to 2x the value computed based on width,
* as the stencil buffer is stored with two rows interleaved."
*
* For GEN7, we still dobule the stride because we did not double the
* slice widths when initializing the layout.
*/
info->stencil.stride = s8_img->bo_stride * 2;
 
assert(s8_img->walk_layer_height % 4 == 0);
info->stencil.qpitch = s8_img->walk_layer_height / 4;
 
info->stencil.tiling = s8_img->tiling;
 
if (ilo_dev_gen(dev) == ILO_GEN(6)) {
unsigned x, y;
 
assert(s8_img->walk == ILO_IMAGE_WALK_LOD);
 
/* offset to the level */
ilo_image_get_slice_pos(s8_img, level, 0, &x, &y);
ilo_image_pos_to_mem(s8_img, x, y, &x, &y);
info->stencil.offset = ilo_image_mem_to_raw(s8_img, x, y);
}
}
 
if (ilo_image_can_enable_aux(img, level)) {
info->hiz.bo = img->aux.bo;
info->hiz.stride = img->aux.bo_stride;
 
assert(img->aux.walk_layer_height % 4 == 0);
info->hiz.qpitch = img->aux.walk_layer_height / 4;
 
info->hiz.tiling = GEN6_TILING_Y;
 
/* offset to the level */
if (ilo_dev_gen(dev) == ILO_GEN(6))
info->hiz.offset = img->aux.walk_lod_offsets[level];
}
 
info->width = img->width0;
info->height = img->height0;
info->depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
 
info->lod = level;
info->first_layer = first_layer;
info->num_layers = num_layers;
}
 
void
ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
const struct ilo_image *img,
const struct ilo_image *s8_img,
enum pipe_texture_target target,
enum pipe_format format, unsigned level,
unsigned first_layer, unsigned num_layers,
struct ilo_zs_surface *zs)
{
const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
struct ilo_zs_surface_info info;
uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
int align_w = 8, align_h = 4;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
if (img) {
zs_init_info(dev, img, s8_img, target, format,
level, first_layer, num_layers, &info);
 
switch (img->sample_count) {
case 2:
align_w /= 2;
break;
case 4:
align_w /= 2;
align_h /= 2;
break;
case 8:
align_w /= 4;
align_h /= 2;
break;
case 16:
align_w /= 4;
align_h /= 4;
break;
default:
break;
}
} else {
zs_init_info_null(dev, &info);
}
 
switch (info.surface_type) {
case GEN6_SURFTYPE_NULL:
break;
case GEN6_SURFTYPE_1D:
assert(info.width <= max_2d_size && info.height == 1 &&
info.depth <= max_array_size);
assert(info.first_layer < max_array_size - 1 &&
info.num_layers <= max_array_size);
break;
case GEN6_SURFTYPE_2D:
assert(info.width <= max_2d_size && info.height <= max_2d_size &&
info.depth <= max_array_size);
assert(info.first_layer < max_array_size - 1 &&
info.num_layers <= max_array_size);
break;
case GEN6_SURFTYPE_3D:
assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
break;
case GEN6_SURFTYPE_CUBE:
assert(info.width <= max_2d_size && info.height <= max_2d_size &&
info.depth == 1);
assert(info.first_layer == 0 && info.num_layers == 1);
assert(info.width == info.height);
break;
default:
assert(!"unexpected depth surface type");
break;
}
 
dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
 
if (info.zs.bo) {
/* required for GEN6+ */
assert(info.zs.tiling == GEN6_TILING_Y);
assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
info.zs.stride % 128 == 0);
assert(info.width <= info.zs.stride);
 
dw1 |= (info.zs.stride - 1);
dw2 = info.zs.offset;
} else {
dw2 = 0;
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
if (info.zs.bo)
dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
 
if (info.stencil.bo)
dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
 
if (info.hiz.bo)
dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
 
dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
(info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
 
zs->dw_aligned_8x4 =
(align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
(align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
 
dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
 
dw5 = 0;
 
dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
dw6 |= info.zs.qpitch;
} else {
/* always Y-tiled */
dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
 
if (info.hiz.bo) {
dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
GEN6_DEPTH_DW1_SEPARATE_STENCIL;
}
 
dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
(info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
 
zs->dw_aligned_8x4 =
(align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
(align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
 
dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
(info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
 
dw5 = 0;
 
dw6 = 0;
}
 
STATIC_ASSERT(Elements(zs->payload) >= 12);
 
zs->payload[0] = dw1;
zs->payload[1] = dw2;
zs->payload[2] = dw3;
zs->payload[3] = dw4;
zs->payload[4] = dw5;
zs->payload[5] = dw6;
 
/* do not increment reference count */
zs->bo = info.zs.bo;
 
/* separate stencil */
if (info.stencil.bo) {
assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
info.stencil.stride % 128 == 0);
 
dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
 
dw2 = info.stencil.offset;
dw4 = info.stencil.qpitch;
} else {
dw1 = 0;
dw2 = 0;
dw4 = 0;
}
 
zs->payload[6] = dw1;
zs->payload[7] = dw2;
zs->payload[8] = dw4;
/* do not increment reference count */
zs->separate_s8_bo = info.stencil.bo;
 
/* hiz */
if (info.hiz.bo) {
dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
dw2 = info.hiz.offset;
dw4 = info.hiz.qpitch;
} else {
dw1 = 0;
dw2 = 0;
dw4 = 0;
}
 
zs->payload[9] = dw1;
zs->payload[10] = dw2;
zs->payload[11] = dw4;
/* do not increment reference count */
zs->hiz_bo = info.hiz.bo;
}
 
static void
viewport_get_guardband(const struct ilo_dev *dev,
int center_x, int center_y,
int *min_gbx, int *max_gbx,
int *min_gby, int *max_gby)
{
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 234:
*
* "Per-Device Guardband Extents
*
* - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
* - Maximum Post-Clamp Delta (X or Y): 16K"
*
* "In addition, in order to be correctly rendered, objects must have a
* screenspace bounding box not exceeding 8K in the X or Y direction.
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* From the Ivy Bridge PRM, volume 2 part 1, page 248:
*
* "Per-Device Guardband Extents
*
* - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
* - Maximum Post-Clamp Delta (X or Y): N/A"
*
* "In addition, in order to be correctly rendered, objects must have a
* screenspace bounding box not exceeding 8K in the X or Y direction.
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* Combined, the bounding box of any object can not exceed 8K in both
* width and height.
*
* Below we set the guardband as a squre of length 8K, centered at where
* the viewport is. This makes sure all objects passing the GB test are
* valid to the renderer, and those failing the XY clipping have a
* better chance of passing the GB test.
*/
const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
const int half_len = 8192 / 2;
 
/* make sure the guardband is within the valid range */
if (center_x - half_len < -max_extent)
center_x = -max_extent + half_len;
else if (center_x + half_len > max_extent - 1)
center_x = max_extent - half_len;
 
if (center_y - half_len < -max_extent)
center_y = -max_extent + half_len;
else if (center_y + half_len > max_extent - 1)
center_y = max_extent - half_len;
 
*min_gbx = (float) (center_x - half_len);
*max_gbx = (float) (center_x + half_len);
*min_gby = (float) (center_y - half_len);
*max_gby = (float) (center_y + half_len);
}
 
void
ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
const struct pipe_viewport_state *state,
struct ilo_viewport_cso *vp)
{
const float scale_x = fabs(state->scale[0]);
const float scale_y = fabs(state->scale[1]);
const float scale_z = fabs(state->scale[2]);
int min_gbx, max_gbx, min_gby, max_gby;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
viewport_get_guardband(dev,
(int) state->translate[0],
(int) state->translate[1],
&min_gbx, &max_gbx, &min_gby, &max_gby);
 
/* matrix form */
vp->m00 = state->scale[0];
vp->m11 = state->scale[1];
vp->m22 = state->scale[2];
vp->m30 = state->translate[0];
vp->m31 = state->translate[1];
vp->m32 = state->translate[2];
 
/* guardband in NDC space */
vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
 
/* viewport in screen space */
vp->min_x = scale_x * -1.0f + state->translate[0];
vp->max_x = scale_x * 1.0f + state->translate[0];
vp->min_y = scale_y * -1.0f + state->translate[1];
vp->max_y = scale_y * 1.0f + state->translate[1];
vp->min_z = scale_z * -1.0f + state->translate[2];
vp->max_z = scale_z * 1.0f + state->translate[2];
}
 
/**
* Translate a pipe logicop to the matching hardware logicop.
*/
static int
gen6_translate_pipe_logicop(unsigned logicop)
{
switch (logicop) {
case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR;
case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR;
case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED;
case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE;
case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT;
case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR;
case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND;
case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND;
case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV;
case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP;
case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED;
case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY;
case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE;
case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR;
case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET;
default:
assert(!"unknown logicop function");
return GEN6_LOGICOP_CLEAR;
}
}
 
/**
* Translate a pipe blend function to the matching hardware blend function.
*/
static int
gen6_translate_pipe_blend(unsigned blend)
{
switch (blend) {
case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD;
case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN;
case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX;
default:
assert(!"unknown blend function");
return GEN6_BLENDFUNCTION_ADD;
};
}
 
/**
* Translate a pipe blend factor to the matching hardware blend factor.
*/
static int
gen6_translate_pipe_blendfactor(unsigned blendfactor)
{
switch (blendfactor) {
case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE;
case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO;
case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
default:
assert(!"unknown blend factor");
return GEN6_BLENDFACTOR_ONE;
};
}
 
/**
* Translate a pipe stencil op to the matching hardware stencil op.
*/
static int
gen6_translate_pipe_stencil_op(unsigned stencil_op)
{
switch (stencil_op) {
case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT;
case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT;
case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR;
case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR;
case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
default:
assert(!"unknown stencil op");
return GEN6_STENCILOP_KEEP;
}
}
 
static int
gen6_blend_factor_dst_alpha_forced_one(int factor)
{
switch (factor) {
case GEN6_BLENDFACTOR_DST_ALPHA:
return GEN6_BLENDFACTOR_ONE;
case GEN6_BLENDFACTOR_INV_DST_ALPHA:
case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
return GEN6_BLENDFACTOR_ZERO;
default:
return factor;
}
}
 
static uint32_t
blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
const struct pipe_rt_blend_state *rt,
bool dst_alpha_forced_one)
{
int rgb_src, rgb_dst, a_src, a_dst;
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
if (!rt->blend_enable)
return 0;
 
rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
 
if (dst_alpha_forced_one) {
rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
}
 
dw = GEN6_RT_DW0_BLEND_ENABLE |
gen6_translate_pipe_blend(rt->alpha_func) << 26 |
a_src << 20 |
a_dst << 15 |
gen6_translate_pipe_blend(rt->rgb_func) << 11 |
rgb_src << 5 |
rgb_dst;
 
if (rt->rgb_func != rt->alpha_func ||
rgb_src != a_src || rgb_dst != a_dst)
dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
 
return dw;
}
 
static uint32_t
blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
const struct pipe_rt_blend_state *rt,
bool dst_alpha_forced_one,
bool *independent_alpha)
{
int rgb_src, rgb_dst, a_src, a_dst;
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (!rt->blend_enable) {
*independent_alpha = false;
return 0;
}
 
rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
 
if (dst_alpha_forced_one) {
rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
}
 
dw = GEN8_RT_DW0_BLEND_ENABLE |
rgb_src << 26 |
rgb_dst << 21 |
gen6_translate_pipe_blend(rt->rgb_func) << 18 |
a_src << 13 |
a_dst << 8 |
gen6_translate_pipe_blend(rt->alpha_func) << 5;
 
*independent_alpha = (rt->rgb_func != rt->alpha_func ||
rgb_src != a_src ||
rgb_dst != a_dst);
 
return dw;
}
 
static void
blend_init_cso_gen6(const struct ilo_dev *dev,
const struct pipe_blend_state *state,
struct ilo_blend_state *blend,
unsigned index)
{
const struct pipe_rt_blend_state *rt = &state->rt[index];
struct ilo_blend_cso *cso = &blend->cso[index];
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
cso->payload[0] = 0;
cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
GEN6_RT_DW1_PRE_BLEND_CLAMP |
GEN6_RT_DW1_POST_BLEND_CLAMP;
 
if (!(rt->colormask & PIPE_MASK_A))
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
if (!(rt->colormask & PIPE_MASK_R))
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
if (!(rt->colormask & PIPE_MASK_G))
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
if (!(rt->colormask & PIPE_MASK_B))
cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 365:
*
* "Color Buffer Blending and Logic Ops must not be enabled
* simultaneously, or behavior is UNDEFINED."
*
* Since state->logicop_enable takes precedence over rt->blend_enable,
* no special care is needed.
*/
if (state->logicop_enable) {
cso->dw_blend = 0;
cso->dw_blend_dst_alpha_forced_one = 0;
} else {
cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
cso->dw_blend_dst_alpha_forced_one =
blend_get_rt_blend_enable_gen6(dev, rt, true);
}
}
 
static bool
blend_init_cso_gen8(const struct ilo_dev *dev,
const struct pipe_blend_state *state,
struct ilo_blend_state *blend,
unsigned index)
{
const struct pipe_rt_blend_state *rt = &state->rt[index];
struct ilo_blend_cso *cso = &blend->cso[index];
bool independent_alpha = false;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
cso->payload[0] = 0;
cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
GEN8_RT_DW1_PRE_BLEND_CLAMP |
GEN8_RT_DW1_POST_BLEND_CLAMP;
 
if (!(rt->colormask & PIPE_MASK_A))
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
if (!(rt->colormask & PIPE_MASK_R))
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
if (!(rt->colormask & PIPE_MASK_G))
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
if (!(rt->colormask & PIPE_MASK_B))
cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
 
if (state->logicop_enable) {
cso->dw_blend = 0;
cso->dw_blend_dst_alpha_forced_one = 0;
} else {
bool tmp[2];
 
cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
cso->dw_blend_dst_alpha_forced_one =
blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
 
if (tmp[0] || tmp[1])
independent_alpha = true;
}
 
return independent_alpha;
}
 
static uint32_t
blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
const struct pipe_blend_state *state)
{
ILO_DEV_ASSERT(dev, 6, 7.5);
 
if (!state->logicop_enable)
return 0;
 
return GEN6_RT_DW1_LOGICOP_ENABLE |
gen6_translate_pipe_logicop(state->logicop_func) << 18;
}
 
static uint32_t
blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
const struct pipe_blend_state *state)
{
ILO_DEV_ASSERT(dev, 8, 8);
 
if (!state->logicop_enable)
return 0;
 
return GEN8_RT_DW1_LOGICOP_ENABLE |
gen6_translate_pipe_logicop(state->logicop_func) << 27;
}
 
static uint32_t
blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
const struct pipe_blend_state *state,
bool dual_blend)
{
uint32_t dw = 0;
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
if (state->alpha_to_coverage) {
dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
if (ilo_dev_gen(dev) >= ILO_GEN(7))
dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
}
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 378:
*
* "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
* must be disabled."
*/
if (state->alpha_to_one && !dual_blend)
dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
 
return dw;
}
 
static uint32_t
blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
const struct pipe_blend_state *state,
bool dual_blend)
{
uint32_t dw = 0;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (state->alpha_to_coverage) {
dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
}
 
if (state->alpha_to_one && !dual_blend)
dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
 
return dw;
}
 
static uint32_t
blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
{
int rgb_src, rgb_dst, a_src, a_dst;
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
return 0;
 
a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
 
dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
 
if (a_src != rgb_src || a_dst != rgb_dst)
dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
 
return dw;
}
 
void
ilo_gpe_init_blend(const struct ilo_dev *dev,
const struct pipe_blend_state *state,
struct ilo_blend_state *blend)
{
unsigned i;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
state->rt[0].blend_enable &&
!state->logicop_enable);
blend->alpha_to_coverage = state->alpha_to_coverage;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
bool independent_alpha;
 
blend->dw_alpha_mod =
blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
 
independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
if (independent_alpha)
blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
 
blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
blend->cso[0].dw_blend);
blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
blend->cso[0].dw_blend_dst_alpha_forced_one);
 
if (state->independent_blend_enable) {
for (i = 1; i < Elements(blend->cso); i++) {
independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
if (independent_alpha)
blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
}
} else {
for (i = 1; i < Elements(blend->cso); i++)
blend->cso[i] = blend->cso[0];
}
} else {
blend->dw_alpha_mod =
blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
 
blend->dw_ps_blend = 0;
blend->dw_ps_blend_dst_alpha_forced_one = 0;
 
blend_init_cso_gen6(dev, state, blend, 0);
if (state->independent_blend_enable) {
for (i = 1; i < Elements(blend->cso); i++)
blend_init_cso_gen6(dev, state, blend, i);
} else {
for (i = 1; i < Elements(blend->cso); i++)
blend->cso[i] = blend->cso[0];
}
}
}
 
/**
* Translate a pipe DSA test function to the matching hardware compare
* function.
*/
static int
gen6_translate_dsa_func(unsigned func)
{
switch (func) {
case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS;
case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
default:
assert(!"unknown depth/stencil/alpha test function");
return GEN6_COMPAREFUNCTION_NEVER;
}
}
 
static uint32_t
dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
const struct pipe_stencil_state *stencil0,
const struct pipe_stencil_state *stencil1)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
if (!stencil0->enabled)
return 0;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 359:
*
* "If the Depth Buffer is either undefined or does not have a surface
* format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
* stencil buffer is disabled, Stencil Test Enable must be DISABLED"
*
* From the Sandy Bridge PRM, volume 2 part 1, page 370:
*
* "This field (Stencil Test Enable) cannot be enabled if
* Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
*
* TODO We do not check these yet.
*/
dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
gen6_translate_dsa_func(stencil0->func) << 28 |
gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
if (stencil0->writemask)
dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
 
if (stencil1->enabled) {
dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
gen6_translate_dsa_func(stencil1->func) << 12 |
gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
if (stencil1->writemask)
dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
}
 
return dw;
}
 
static uint32_t
dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
const struct pipe_stencil_state *stencil0,
const struct pipe_stencil_state *stencil1)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (!stencil0->enabled)
return 0;
 
dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
gen6_translate_dsa_func(stencil0->func) << 8 |
GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
if (stencil0->writemask)
dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
 
if (stencil1->enabled) {
dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
GEN8_ZS_DW1_STENCIL1_ENABLE;
if (stencil1->writemask)
dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
}
 
return dw;
}
 
static uint32_t
dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
const struct pipe_depth_state *state)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 360:
*
* "Enabling the Depth Test function without defining a Depth Buffer is
* UNDEFINED."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 375:
*
* "A Depth Buffer must be defined before enabling writes to it, or
* operation is UNDEFINED."
*
* TODO We do not check these yet.
*/
if (state->enabled) {
dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
gen6_translate_dsa_func(state->func) << 27;
} else {
dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
}
 
if (state->writemask)
dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
 
return dw;
}
 
static uint32_t
dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
const struct pipe_depth_state *state)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (state->enabled) {
dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
gen6_translate_dsa_func(state->func) << 5;
} else {
dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
}
 
if (state->writemask)
dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
 
return dw;
}
 
static uint32_t
dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
const struct pipe_alpha_state *state)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 6, 7.5);
 
if (!state->enabled)
return 0;
 
/* this will be ORed to BLEND_STATE */
dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
gen6_translate_dsa_func(state->func) << 13;
 
return dw;
}
 
static uint32_t
dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
const struct pipe_alpha_state *state)
{
uint32_t dw;
 
ILO_DEV_ASSERT(dev, 8, 8);
 
if (!state->enabled)
return 0;
 
/* this will be ORed to BLEND_STATE */
dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
gen6_translate_dsa_func(state->func) << 24;
 
return dw;
}
 
void
ilo_gpe_init_dsa(const struct ilo_dev *dev,
const struct pipe_depth_stencil_alpha_state *state,
struct ilo_dsa_state *dsa)
{
ILO_DEV_ASSERT(dev, 6, 8);
 
STATIC_ASSERT(Elements(dsa->payload) >= 3);
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
&state->stencil[0], &state->stencil[1]);
const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
 
assert(!(dw_stencil & dw_depth));
dsa->payload[0] = dw_stencil | dw_depth;
 
dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
} else {
dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
&state->stencil[0], &state->stencil[1]);
dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
 
dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
dsa->dw_ps_blend_alpha = 0;
}
 
dsa->payload[1] = state->stencil[0].valuemask << 24 |
state->stencil[0].writemask << 16 |
state->stencil[1].valuemask << 8 |
state->stencil[1].writemask;
 
dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
}
 
void
ilo_gpe_set_scissor(const struct ilo_dev *dev,
unsigned start_slot,
unsigned num_states,
const struct pipe_scissor_state *states,
struct ilo_scissor_state *scissor)
{
unsigned i;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
for (i = 0; i < num_states; i++) {
uint16_t min_x, min_y, max_x, max_y;
 
/* both max and min are inclusive in SCISSOR_RECT */
if (states[i].minx < states[i].maxx &&
states[i].miny < states[i].maxy) {
min_x = states[i].minx;
min_y = states[i].miny;
max_x = states[i].maxx - 1;
max_y = states[i].maxy - 1;
}
else {
/* we have to make min greater than max */
min_x = 1;
min_y = 1;
max_x = 0;
max_y = 0;
}
 
scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
}
 
if (!start_slot && num_states)
scissor->scissor0 = states[0];
}
 
void
ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
struct ilo_scissor_state *scissor)
{
unsigned i;
 
for (i = 0; i < Elements(scissor->payload); i += 2) {
scissor->payload[i + 0] = 1 << 16 | 1;
scissor->payload[i + 1] = 0;
}
}
 
static void
fb_set_blend_caps(const struct ilo_dev *dev,
enum pipe_format format,
struct ilo_fb_blend_caps *caps)
{
const struct util_format_description *desc =
util_format_description(format);
const int ch = util_format_get_first_non_void_channel(format);
 
memset(caps, 0, sizeof(*caps));
 
if (format == PIPE_FORMAT_NONE || desc->is_mixed)
return;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 365:
*
* "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
* variants), otherwise Logic Ops must be DISABLED."
*
* According to the classic driver, this is lifted on Gen8+.
*/
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
caps->can_logicop = true;
} else {
caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
}
 
/* no blending for pure integer formats */
caps->can_blend = !util_format_is_pure_integer(format);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 382:
*
* "Alpha Test can only be enabled if Pixel Shader outputs a float
* alpha value."
*/
caps->can_alpha_test = !util_format_is_pure_integer(format);
 
caps->dst_alpha_forced_one =
(ilo_format_translate_render(dev, format) !=
ilo_format_translate_color(dev, format));
 
/* sanity check */
if (caps->dst_alpha_forced_one) {
enum pipe_format render_format;
 
switch (format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
break;
default:
render_format = PIPE_FORMAT_NONE;
break;
}
 
assert(ilo_format_translate_render(dev, format) ==
ilo_format_translate_color(dev, render_format));
}
}
 
void
ilo_gpe_set_fb(const struct ilo_dev *dev,
const struct pipe_framebuffer_state *state,
struct ilo_fb_state *fb)
{
const struct pipe_surface *first_surf = NULL;
int i;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
util_copy_framebuffer_state(&fb->state, state);
 
ilo_gpe_init_view_surface_null(dev,
(state->width) ? state->width : 1,
(state->height) ? state->height : 1,
1, 0, &fb->null_rt);
 
for (i = 0; i < state->nr_cbufs; i++) {
if (state->cbufs[i]) {
fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
 
if (!first_surf)
first_surf = state->cbufs[i];
} else {
fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
}
}
 
if (!first_surf && state->zsbuf)
first_surf = state->zsbuf;
 
fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
if (!fb->num_samples)
fb->num_samples = 1;
 
/*
* The PRMs list several restrictions when the framebuffer has more than
* one surface. It seems they are actually lifted on GEN6+.
*/
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d_top.c
0,0 → 1,1716
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "genhw/genhw.h"
#include "util/u_dual_blend.h"
#include "util/u_framebuffer.h"
#include "util/u_half.h"
#include "util/u_resource.h"
 
#include "ilo_buffer.h"
#include "ilo_format.h"
#include "ilo_image.h"
#include "ilo_state_3d.h"
#include "../ilo_shader.h"
 
static void
ve_init_cso(const struct ilo_dev *dev,
const struct pipe_vertex_element *state,
unsigned vb_index,
struct ilo_ve_cso *cso)
{
int comp[4] = {
GEN6_VFCOMP_STORE_SRC,
GEN6_VFCOMP_STORE_SRC,
GEN6_VFCOMP_STORE_SRC,
GEN6_VFCOMP_STORE_SRC,
};
int format;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
switch (util_format_get_nr_components(state->src_format)) {
case 1: comp[1] = GEN6_VFCOMP_STORE_0;
case 2: comp[2] = GEN6_VFCOMP_STORE_0;
case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
GEN6_VFCOMP_STORE_1_INT :
GEN6_VFCOMP_STORE_1_FP;
}
 
format = ilo_format_translate_vertex(dev, state->src_format);
 
STATIC_ASSERT(Elements(cso->payload) >= 2);
cso->payload[0] =
vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
GEN6_VE_DW0_VALID |
format << GEN6_VE_DW0_FORMAT__SHIFT |
state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
 
cso->payload[1] =
comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
}
 
void
ilo_gpe_init_ve(const struct ilo_dev *dev,
unsigned num_states,
const struct pipe_vertex_element *states,
struct ilo_ve_state *ve)
{
unsigned i;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
ve->count = num_states;
ve->vb_count = 0;
 
for (i = 0; i < num_states; i++) {
const unsigned pipe_idx = states[i].vertex_buffer_index;
const unsigned instance_divisor = states[i].instance_divisor;
unsigned hw_idx;
 
/*
* map the pipe vb to the hardware vb, which has a fixed instance
* divisor
*/
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
if (ve->vb_mapping[hw_idx] == pipe_idx &&
ve->instance_divisors[hw_idx] == instance_divisor)
break;
}
 
/* create one if there is no matching hardware vb */
if (hw_idx >= ve->vb_count) {
hw_idx = ve->vb_count++;
 
ve->vb_mapping[hw_idx] = pipe_idx;
ve->instance_divisors[hw_idx] = instance_divisor;
}
 
ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
}
}
 
void
ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
struct ilo_ve_cso *cso)
{
int format;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 94:
*
* "- This bit (Edge Flag Enable) must only be ENABLED on the last
* valid VERTEX_ELEMENT structure.
*
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
* and Component 1-3 Control must be set to VFCOMP_NOSTORE.
*
* - The Source Element Format must be set to the UINT format.
*
* - [DevSNB]: Edge Flags are not supported for QUADLIST
* primitives. Software may elect to convert QUADLIST primitives
* to some set of corresponding edge-flag-supported primitive
* types (e.g., POLYGONs) prior to submission to the 3D pipeline."
*/
cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
 
/*
* Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
*
* Since all the hardware cares about is whether the flags are zero or not,
* we can treat them as the corresponding _UINT formats.
*/
format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
 
switch (format) {
case GEN6_FORMAT_R32_FLOAT:
format = GEN6_FORMAT_R32_UINT;
break;
case GEN6_FORMAT_R8_USCALED:
format = GEN6_FORMAT_R8_UINT;
break;
default:
break;
}
 
cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
 
cso->payload[1] =
GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
}
 
void
ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso)
{
ILO_DEV_ASSERT(dev, 6, 8);
 
STATIC_ASSERT(Elements(cso->payload) >= 2);
 
assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
comp1 != GEN6_VFCOMP_STORE_SRC &&
comp2 != GEN6_VFCOMP_STORE_SRC &&
comp3 != GEN6_VFCOMP_STORE_SRC);
 
cso->payload[0] = GEN6_VE_DW0_VALID;
cso->payload[1] =
comp0 << GEN6_VE_DW1_COMP0__SHIFT |
comp1 << GEN6_VE_DW1_COMP1__SHIFT |
comp2 << GEN6_VE_DW1_COMP2__SHIFT |
comp3 << GEN6_VE_DW1_COMP3__SHIFT;
}
 
void
ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, sampler_count, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 135:
*
* "(Vertex URB Entry Read Length) Specifies the number of pairs of
* 128-bit vertex elements to be passed into the payload for each
* vertex."
*
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB
* data to be read and passed to the thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
 
max_threads = dev->thread_count;
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
max_threads *= 2;
 
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
 
dw5 = GEN6_VS_DW5_STATISTICS |
GEN6_VS_DW5_VS_ENABLE;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
else
dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
 
static void
gs_init_cso_gen6(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5, dw6;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_URB_DATA_START_REG);
 
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
}
else {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_VS_GEN6_SO_START_REG);
 
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
*
* "Specifies the amount of URB data read and passed in the thread
* payload for each Vertex URB entry, in 256-bit register increments.
*
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read and passed to the
* thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
*
* "Maximum Number of Threads valid range is [0,27] when Rendering
* Enabled bit is set."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
*
* "Programming Note: If the GS stage is enabled, software must always
* allocate at least one GS URB Entry. This is true even if the GS
* thread never needs to output vertices to the pipeline, e.g., when
* only performing stream output. This is an artifact of the need to
* pass the GS thread an initial destination URB handle."
*
* As such, we always enable rendering, and limit the number of threads.
*/
if (dev->gt == 2) {
/* maximum is 60, but limited to 28 */
max_threads = 28;
}
else {
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
max_threads = 21;
}
 
dw2 = GEN6_THREADDISP_SPF;
 
dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
 
dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
GEN6_GS_DW5_STATISTICS |
GEN6_GS_DW5_SO_STATISTICS |
GEN6_GS_DW5_RENDER_ENABLE;
 
/*
* we cannot make use of GEN6_GS_REORDER because it will reorder
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
* (2N+2, 2N+1, 2N+3)).
*/
dw6 = GEN6_GS_DW6_GS_ENABLE;
 
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
 
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
const uint32_t svbi_post_inc =
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
 
dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
if (svbi_post_inc) {
dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
}
}
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = dw6;
}
 
static void
gs_init_cso_gen7(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, sampler_count, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_DEV_ASSERT(dev, 7, 7.5);
 
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
 
/* in pairs */
vue_read_len = (vue_read_len + 1) / 2;
 
switch (ilo_dev_gen(dev)) {
case ILO_GEN(7.5):
max_threads = (dev->gt >= 2) ? 256 : 70;
break;
case ILO_GEN(7):
max_threads = (dev->gt == 2) ? 128 : 36;
break;
default:
max_threads = 1;
break;
}
 
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
 
dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
GEN7_GS_DW5_STATISTICS |
GEN7_GS_DW5_GS_ENABLE;
 
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
 
void
ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7))
gs_init_cso_gen7(dev, gs, cso);
else
gs_init_cso_gen6(dev, gs, cso);
}
 
static void
view_init_null_gen6(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
assert(width >= 1 && height >= 1 && depth >= 1);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 71:
*
* "A null surface will be used in instances where an actual surface is
* not bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions:
*
* * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
* depth buffer's corresponding state for all render target
* surfaces, including null.
* * Surface Format must be R8G8B8A8_UNORM."
*
* From the Sandy Bridge PRM, volume 4 part 1, page 82:
*
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
* true"
*/
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
 
dw[1] = 0;
 
dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
(width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
 
dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
GEN6_TILING_X;
 
dw[4] = 0;
dw[5] = 0;
}
 
static void
view_init_for_buffer_gen6(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
const int elem_size = util_format_get_blocksize(elem_format);
int width, height, depth, pitch;
int surface_format, num_entries;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
/*
* For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
* structure in a buffer.
*/
 
surface_format = ilo_format_translate_color(dev, elem_format);
 
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
if (size % struct_size >= elem_size)
num_entries++;
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 76:
*
* "For SURFTYPE_BUFFER render targets, this field (Surface Base
* Address) specifies the base address of first element of the
* surface. The surface is interpreted as a simple array of that
* single element type. The address must be naturally-aligned to the
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
* must be 16-byte aligned).
*
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
* the base address of the first element of the surface, computed in
* software by adding the surface base address to the byte offset of
* the element in the buffer."
*/
if (is_rt)
assert(offset % elem_size == 0);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 77:
*
* "For buffer surfaces, the number of entries in the buffer ranges
* from 1 to 2^27."
*/
assert(num_entries >= 1 && num_entries <= 1 << 27);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
* indicates the size of the structure."
*/
pitch = struct_size;
 
pitch--;
num_entries--;
/* bits [6:0] */
width = (num_entries & 0x0000007f);
/* bits [19:7] */
height = (num_entries & 0x000fff80) >> 7;
/* bits [26:20] */
depth = (num_entries & 0x07f00000) >> 20;
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
if (render_cache_rw)
dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
 
dw[1] = offset;
 
dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
 
dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
 
dw[4] = 0;
dw[5] = 0;
}
 
static void
view_init_for_image_gen6(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
int width, height, depth, pitch, lod;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
surface_type = ilo_gpe_gen6_translate_texture(target);
assert(surface_type != GEN6_SURFTYPE_BUFFER);
 
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
format = PIPE_FORMAT_Z32_FLOAT;
 
if (is_rt)
surface_format = ilo_format_translate_render(dev, format);
else
surface_format = ilo_format_translate_texture(dev, format);
assert(surface_format >= 0);
 
width = img->width0;
height = img->height0;
depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
pitch = img->bo_stride;
 
if (surface_type == GEN6_SURFTYPE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
* range of this field (Depth) is [0,84], indicating the number of
* cube array elements (equal to the number of underlying 2D array
* elements divided by 6). For other surfaces, this field must be
* zero."
*
* When is_rt is true, we treat the texture as a 2D one to avoid the
* restriction.
*/
if (is_rt) {
surface_type = GEN6_SURFTYPE_2D;
}
else {
assert(num_layers % 6 == 0);
depth = num_layers / 6;
}
}
 
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
switch (surface_type) {
case GEN6_SURFTYPE_1D:
assert(width <= 8192 && height == 1 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
case GEN6_SURFTYPE_2D:
assert(width <= 8192 && height <= 8192 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
case GEN6_SURFTYPE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
assert(first_layer < 2048 && num_layers <= 512);
if (!is_rt)
assert(first_layer == 0);
break;
case GEN6_SURFTYPE_CUBE:
assert(width <= 8192 && height <= 8192 && depth <= 85);
assert(width == height);
assert(first_layer < 512 && num_layers <= 512);
if (is_rt)
assert(first_layer == 0);
break;
default:
assert(!"unexpected surface type");
break;
}
 
/* non-full array spacing is supported only on GEN7+ */
assert(img->walk != ILO_IMAGE_WALK_LOD);
/* non-interleaved samples are supported only on GEN7+ */
if (img->sample_count > 1)
assert(img->interleaved_samples);
 
if (is_rt) {
assert(num_levels == 1);
lod = first_level;
}
else {
lod = num_levels - 1;
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 76:
*
* "Linear render target surface base addresses must be element-size
* aligned, for non-YUV surface formats, or a multiple of 2
* element-sizes for YUV surface formats. Other linear surfaces have
* no alignment requirements (byte alignment is sufficient.)"
*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For linear render target surfaces, the pitch must be a multiple
* of the element size for non-YUV surface formats. Pitch must be a
* multiple of 2 * element size for YUV surface formats."
*
* From the Sandy Bridge PRM, volume 4 part 1, page 86:
*
* "For linear surfaces, this field (X Offset) must be zero"
*/
if (img->tiling == GEN6_TILING_NONE) {
if (is_rt) {
const int elem_size = util_format_get_blocksize(format);
assert(pitch % elem_size == 0);
}
}
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
 
if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
dw[0] |= 1 << 9 |
GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
}
 
if (is_rt)
dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
 
dw[1] = 0;
 
dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
(width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
 
assert(img->tiling != GEN8_TILING_W);
dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
(pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
img->tiling;
 
dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
first_layer << 17 |
(num_layers - 1) << 8 |
((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
 
dw[5] = 0;
 
assert(img->align_j == 2 || img->align_j == 4);
if (img->align_j == 4)
dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
}
 
static void
view_init_null_gen7(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 7, 8);
 
assert(width >= 1 && height >= 1 && depth >= 1);
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 62:
*
* "A null surface is used in instances where an actual surface is not
* bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions:
*
* * Width, Height, Depth, LOD, and Render Target View Extent fields
* must match the depth buffer's corresponding state for all render
* target surfaces, including null.
* * All sampling engine and data port messages support null surfaces
* with the above behavior, even if not mentioned as specifically
* supported, except for the following:
* * Data Port Media Block Read/Write messages.
* * The Surface Type of a surface used as a render target (accessed
* via the Data Port's Render Target Write message) must be the same
* as the Surface Type of all other render targets and of the depth
* buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
* buffer or render targets are SURFTYPE_NULL."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 65:
*
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
* true"
*/
 
STATIC_ASSERT(Elements(surf->payload) >= 13);
dw = surf->payload;
 
dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
else
dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
 
dw[1] = 0;
 
dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
 
dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
 
dw[4] = 0;
dw[5] = level;
 
dw[6] = 0;
dw[7] = 0;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
}
 
static void
view_init_for_buffer_gen7(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
const bool typed = (elem_format != PIPE_FORMAT_NONE);
const bool structured = (!typed && struct_size > 1);
const int elem_size = (typed) ?
util_format_get_blocksize(elem_format) : 1;
int width, height, depth, pitch;
int surface_type, surface_format, num_entries;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 7, 8);
 
surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
 
surface_format = (typed) ?
ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
 
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
if (size % struct_size >= elem_size && !structured)
num_entries++;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 67:
*
* "For SURFTYPE_BUFFER render targets, this field (Surface Base
* Address) specifies the base address of first element of the
* surface. The surface is interpreted as a simple array of that
* single element type. The address must be naturally-aligned to the
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
* must be 16-byte aligned)
*
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
* the base address of the first element of the surface, computed in
* software by adding the surface base address to the byte offset of
* the element in the buffer."
*/
if (is_rt)
assert(offset % elem_size == 0);
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
*
* "For typed buffer and structured buffer surfaces, the number of
* entries in the buffer ranges from 1 to 2^27. For raw buffer
* surfaces, the number of entries in the buffer is the number of
* bytes which can range from 1 to 2^30."
*/
assert(num_entries >= 1 &&
num_entries <= 1 << ((typed || structured) ? 27 : 30));
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 69:
*
* "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
* 11 if the Surface Format is RAW (the size of the buffer must be a
* multiple of 4 bytes)."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
* field (Surface Pitch) indicates the size of the structure."
*
* "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
* must be a multiple of 4 bytes."
*/
if (structured)
assert(struct_size % 4 == 0);
else if (!typed)
assert(num_entries % 4 == 0);
 
pitch = struct_size;
 
pitch--;
num_entries--;
/* bits [6:0] */
width = (num_entries & 0x0000007f);
/* bits [20:7] */
height = (num_entries & 0x001fff80) >> 7;
/* bits [30:21] */
depth = (num_entries & 0x7fe00000) >> 21;
/* limit to [26:21] */
if (typed || structured)
depth &= 0x3f;
 
STATIC_ASSERT(Elements(surf->payload) >= 13);
dw = surf->payload;
 
dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
if (render_cache_rw)
dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
dw[8] = offset;
memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
} else {
dw[1] = offset;
}
 
dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
 
dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
pitch;
 
dw[4] = 0;
dw[5] = 0;
 
dw[6] = 0;
dw[7] = 0;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
}
}
 
static void
view_init_for_image_gen7(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
int width, height, depth, pitch, lod;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 7, 8);
 
surface_type = ilo_gpe_gen6_translate_texture(target);
assert(surface_type != GEN6_SURFTYPE_BUFFER);
 
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
format = PIPE_FORMAT_Z32_FLOAT;
 
if (is_rt)
surface_format = ilo_format_translate_render(dev, format);
else
surface_format = ilo_format_translate_texture(dev, format);
assert(surface_format >= 0);
 
width = img->width0;
height = img->height0;
depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
pitch = img->bo_stride;
 
if (surface_type == GEN6_SURFTYPE_CUBE) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
* this field is [0,340], indicating the number of cube array
* elements (equal to the number of underlying 2D array elements
* divided by 6). For other surfaces, this field must be zero."
*
* When is_rt is true, we treat the texture as a 2D one to avoid the
* restriction.
*/
if (is_rt) {
surface_type = GEN6_SURFTYPE_2D;
}
else {
assert(num_layers % 6 == 0);
depth = num_layers / 6;
}
}
 
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
assert(first_layer < 2048 && num_layers <= 2048);
switch (surface_type) {
case GEN6_SURFTYPE_1D:
assert(width <= 16384 && height == 1 && depth <= 2048);
break;
case GEN6_SURFTYPE_2D:
assert(width <= 16384 && height <= 16384 && depth <= 2048);
break;
case GEN6_SURFTYPE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
if (!is_rt)
assert(first_layer == 0);
break;
case GEN6_SURFTYPE_CUBE:
assert(width <= 16384 && height <= 16384 && depth <= 86);
assert(width == height);
if (is_rt)
assert(first_layer == 0);
break;
default:
assert(!"unexpected surface type");
break;
}
 
if (is_rt) {
assert(num_levels == 1);
lod = first_level;
}
else {
lod = num_levels - 1;
}
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
*
* "The Base Address for linear render target surfaces and surfaces
* accessed with the typed surface read/write data port messages must
* be element-size aligned, for non-YUV surface formats, or a multiple
* of 2 element-sizes for YUV surface formats. Other linear surfaces
* have no alignment requirements (byte alignment is sufficient)."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For linear render target surfaces and surfaces accessed with the
* typed data port messages, the pitch must be a multiple of the
* element size for non-YUV surface formats. Pitch must be a multiple
* of 2 * element size for YUV surface formats. For linear surfaces
* with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
* of 4 bytes.For other linear surfaces, the pitch can be any multiple
* of bytes."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 74:
*
* "For linear surfaces, this field (X Offset) must be zero."
*/
if (img->tiling == GEN6_TILING_NONE) {
if (is_rt) {
const int elem_size = util_format_get_blocksize(format);
assert(pitch % elem_size == 0);
}
}
 
STATIC_ASSERT(Elements(surf->payload) >= 13);
dw = surf->payload;
 
dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "If this field (Surface Array) is enabled, the Surface Type must be
* SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
* disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
* SURFTYPE_CUBE, the Depth field must be set to zero."
*
* For non-3D sampler surfaces, resinfo (the sampler message) always
* returns zero for the number of layers when this field is not set.
*/
if (surface_type != GEN6_SURFTYPE_3D) {
switch (target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY:
dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
break;
default:
assert(depth == 1);
break;
}
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
switch (img->align_j) {
case 4:
dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
break;
case 8:
dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
break;
case 16:
dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
break;
default:
assert(!"unsupported valign");
break;
}
 
switch (img->align_i) {
case 4:
dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
break;
case 8:
dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
break;
case 16:
dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
break;
default:
assert(!"unsupported halign");
break;
}
 
dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
} else {
assert(img->align_i == 4 || img->align_i == 8);
assert(img->align_j == 2 || img->align_j == 4);
 
if (img->align_j == 4)
dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
 
if (img->align_i == 8)
dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
 
assert(img->tiling != GEN8_TILING_W);
dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
 
if (img->walk == ILO_IMAGE_WALK_LOD)
dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
else
dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
}
 
if (is_rt)
dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
 
if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
assert(img->walk_layer_height % 4 == 0);
dw[1] = img->walk_layer_height / 4;
} else {
dw[1] = 0;
}
 
dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
 
dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
(pitch - 1);
 
dw[4] = first_layer << 18 |
(num_layers - 1) << 7;
 
/*
* MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
* means the samples are interleaved. The layouts are the same when the
* number of samples is 1.
*/
if (img->interleaved_samples && img->sample_count > 1) {
assert(!is_rt);
dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
}
else {
dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
}
 
switch (img->sample_count) {
case 0:
case 1:
default:
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
break;
case 2:
dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
break;
case 4:
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
break;
case 8:
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
break;
case 16:
dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
break;
}
 
dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
lod;
 
dw[6] = 0;
dw[7] = 0;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
}
 
void
ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
view_init_null_gen7(dev,
width, height, depth, level, surf);
} else {
view_init_null_gen6(dev,
width, height, depth, level, surf);
}
 
surf->bo = NULL;
surf->scanout = false;
}
 
void
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
view_init_for_buffer_gen7(dev, buf, offset, size,
struct_size, elem_format, is_rt, render_cache_rw, surf);
} else {
view_init_for_buffer_gen6(dev, buf, offset, size,
struct_size, elem_format, is_rt, render_cache_rw, surf);
}
 
/* do not increment reference count */
surf->bo = buf->bo;
surf->scanout = false;
}
 
void
ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
view_init_for_image_gen7(dev, img, target, format,
first_level, num_levels, first_layer, num_layers,
is_rt, surf);
} else {
view_init_for_image_gen6(dev, img, target, format,
first_level, num_levels, first_layer, num_layers,
is_rt, surf);
}
 
surf->scanout = img->scanout;
/* do not increment reference count */
surf->bo = img->bo;
}
 
static void
sampler_init_border_color_gen6(const struct ilo_dev *dev,
const union pipe_color_union *color,
uint32_t *dw, int num_dwords)
{
float rgba[4] = {
color->f[0], color->f[1], color->f[2], color->f[3],
};
 
ILO_DEV_ASSERT(dev, 6, 6);
 
assert(num_dwords >= 12);
 
/*
* This state is not documented in the Sandy Bridge PRM, but in the
* Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
*/
 
/* IEEE_FP */
dw[1] = fui(rgba[0]);
dw[2] = fui(rgba[1]);
dw[3] = fui(rgba[2]);
dw[4] = fui(rgba[3]);
 
/* FLOAT_16 */
dw[5] = util_float_to_half(rgba[0]) |
util_float_to_half(rgba[1]) << 16;
dw[6] = util_float_to_half(rgba[2]) |
util_float_to_half(rgba[3]) << 16;
 
/* clamp to [-1.0f, 1.0f] */
rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
 
/* SNORM16 */
dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
(int16_t) util_iround(rgba[1] * 32767.0f) << 16;
dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
(int16_t) util_iround(rgba[3] * 32767.0f) << 16;
 
/* SNORM8 */
dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
(int8_t) util_iround(rgba[1] * 127.0f) << 8 |
(int8_t) util_iround(rgba[2] * 127.0f) << 16 |
(int8_t) util_iround(rgba[3] * 127.0f) << 24;
 
/* clamp to [0.0f, 1.0f] */
rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
 
/* UNORM8 */
dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
(uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
(uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
(uint8_t) util_iround(rgba[3] * 255.0f) << 24;
 
/* UNORM16 */
dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
(uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
(uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
}
 
/**
* Translate a pipe texture mipfilter to the matching hardware mipfilter.
*/
static int
gen6_translate_tex_mipfilter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
default:
assert(!"unknown mipfilter");
return GEN6_MIPFILTER_NONE;
}
}
 
/**
* Translate a pipe texture filter to the matching hardware mapfilter.
*/
static int
gen6_translate_tex_filter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
default:
assert(!"unknown sampler filter");
return GEN6_MAPFILTER_NEAREST;
}
}
 
/**
* Translate a pipe texture coordinate wrapping mode to the matching hardware
* wrapping mode.
*/
static int
gen6_translate_tex_wrap(unsigned wrap)
{
switch (wrap) {
case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER;
case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(!"unknown sampler wrap mode");
return GEN6_TEXCOORDMODE_WRAP;
}
}
 
/**
* Translate a pipe shadow compare function to the matching hardware shadow
* function.
*/
static int
gen6_translate_shadow_func(unsigned func)
{
/*
* For PIPE_FUNC_x, the reference value is on the left-hand side of the
* comparison, and 1.0 is returned when the comparison is true.
*
* For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
* the comparison, and 0.0 is returned when the comparison is true.
*/
switch (func) {
case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS;
case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL;
case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS;
case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL;
case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER;
case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER;
default:
assert(!"unknown shadow compare function");
return GEN6_COMPAREFUNCTION_NEVER;
}
}
 
void
ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
const struct pipe_sampler_state *state,
struct ilo_sampler_cso *sampler)
{
int mip_filter, min_filter, mag_filter, max_aniso;
int lod_bias, max_lod, min_lod;
int wrap_s, wrap_t, wrap_r, wrap_cube;
uint32_t dw0, dw1, dw3;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
memset(sampler, 0, sizeof(*sampler));
 
mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
min_filter = gen6_translate_tex_filter(state->min_img_filter);
mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
 
sampler->anisotropic = state->max_anisotropy;
 
if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
max_aniso = state->max_anisotropy / 2 - 1;
else if (state->max_anisotropy > 16)
max_aniso = GEN6_ANISORATIO_16;
else
max_aniso = GEN6_ANISORATIO_2;
 
/*
*
* Here is how the hardware calculate per-pixel LOD, from my reading of the
* PRMs:
*
* 1) LOD is set to log2(ratio of texels to pixels) if not specified in
* other ways. The number of texels is measured using level
* SurfMinLod.
* 2) Bias is added to LOD.
* 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
* compared with Base to determine whether magnification or
* minification is needed. (if preclamp is disabled, LOD is compared
* with Base before clamping)
* 4) If magnification is needed, or no mipmapping is requested, LOD is
* set to floor(MinLod).
* 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
*
* With Gallium interface, Base is always zero and
* pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
*/
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
const float scale = 256.0f;
 
/* [-16.0, 16.0) in S4.8 */
lod_bias = (int)
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
lod_bias &= 0x1fff;
 
/* [0.0, 14.0] in U4.8 */
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
}
else {
const float scale = 64.0f;
 
/* [-16.0, 16.0) in S4.6 */
lod_bias = (int)
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
lod_bias &= 0x7ff;
 
/* [0.0, 13.0] in U4.6 */
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
}
 
/*
* We want LOD to be clamped to determine magnification/minification, and
* get set to zero when it is magnification or when mipmapping is disabled.
* The hardware would set LOD to floor(MinLod) and that is a problem when
* MinLod is greater than or equal to 1.0f.
*
* With Base being zero, it is always minification when MinLod is non-zero.
* To achieve our goal, we just need to set MinLod to zero and set
* MagFilter to MinFilter when mipmapping is disabled.
*/
if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
min_lod = 0;
mag_filter = min_filter;
}
 
/* determine wrap s/t/r */
wrap_s = gen6_translate_tex_wrap(state->wrap_s);
wrap_t = gen6_translate_tex_wrap(state->wrap_t);
wrap_r = gen6_translate_tex_wrap(state->wrap_r);
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
/*
* For nearest filtering, PIPE_TEX_WRAP_CLAMP means
* PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
* PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
* additionally clamping the texture coordinates to [0.0, 1.0].
*
* PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
* clamping has to be taken care of in the shaders. There are two
* filters here, but let the minification one has a say.
*/
const bool clamp_is_to_edge =
(state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
 
if (clamp_is_to_edge) {
if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
wrap_s = GEN6_TEXCOORDMODE_CLAMP;
if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
wrap_t = GEN6_TEXCOORDMODE_CLAMP;
if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
wrap_r = GEN6_TEXCOORDMODE_CLAMP;
} else {
if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler->saturate_s = true;
}
if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler->saturate_t = true;
}
if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler->saturate_r = true;
}
}
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 107:
*
* "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
* and TEXCOORDMODE_CUBE settings are valid, and each TC component
* must have the same Address Control mode."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 96:
*
* "This field (Cube Surface Control Mode) must be set to
* CUBECTRLMODE_PROGRAMMED"
*
* Therefore, we cannot use "Cube Surface Control Mode" for semless cube
* map filtering.
*/
if (state->seamless_cube_map &&
(state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
wrap_cube = GEN6_TEXCOORDMODE_CUBE;
}
else {
wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
}
 
if (!state->normalized_coords) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 98:
*
* "The following state must be set as indicated if this field
* (Non-normalized Coordinate Enable) is enabled:
*
* - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
* TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
* - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
* - Mag Mode Filter must be MAPFILTER_NEAREST or
* MAPFILTER_LINEAR.
* - Min Mode Filter must be MAPFILTER_NEAREST or
* MAPFILTER_LINEAR.
* - Mip Mode Filter must be MIPFILTER_NONE.
* - Min LOD must be 0.
* - Max LOD must be 0.
* - MIP Count must be 0.
* - Surface Min LOD must be 0.
* - Texture LOD Bias must be 0."
*/
assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
 
assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
mag_filter == GEN6_MAPFILTER_LINEAR);
assert(min_filter == GEN6_MAPFILTER_NEAREST ||
min_filter == GEN6_MAPFILTER_LINEAR);
 
/* work around a bug in util_blitter */
mip_filter = GEN6_MIPFILTER_NONE;
 
assert(mip_filter == GEN6_MIPFILTER_NONE);
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
dw0 = 1 << 28 |
mip_filter << 20 |
lod_bias << 1;
 
sampler->dw_filter = mag_filter << 17 |
min_filter << 14;
 
sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
GEN6_MAPFILTER_ANISOTROPIC << 14 |
1;
 
dw1 = min_lod << 20 |
max_lod << 8;
 
if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
 
dw3 = max_aniso << 19;
 
/* round the coordinates for linear filtering */
if (min_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
GEN6_SAMPLER_DW3_V_MIN_ROUND |
GEN6_SAMPLER_DW3_R_MIN_ROUND);
}
if (mag_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
GEN6_SAMPLER_DW3_V_MAG_ROUND |
GEN6_SAMPLER_DW3_R_MAG_ROUND);
}
 
if (!state->normalized_coords)
dw3 |= 1 << 10;
 
sampler->dw_wrap = wrap_s << 6 |
wrap_t << 3 |
wrap_r;
 
/*
* As noted in the classic i965 driver, the HW may still reference
* wrap_t and wrap_r for 1D textures. We need to set them to a safe
* mode
*/
sampler->dw_wrap_1d = wrap_s << 6 |
GEN6_TEXCOORDMODE_WRAP << 3 |
GEN6_TEXCOORDMODE_WRAP;
 
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
wrap_cube;
 
STATIC_ASSERT(Elements(sampler->payload) >= 7);
 
sampler->payload[0] = dw0;
sampler->payload[1] = dw1;
sampler->payload[2] = dw3;
 
memcpy(&sampler->payload[3],
state->border_color.ui, sizeof(state->border_color.ui));
}
else {
dw0 = 1 << 28 |
mip_filter << 20 |
lod_bias << 3;
 
if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
dw0 |= gen6_translate_shadow_func(state->compare_func);
 
sampler->dw_filter = (min_filter != mag_filter) << 27 |
mag_filter << 17 |
min_filter << 14;
 
sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
GEN6_MAPFILTER_ANISOTROPIC << 14;
 
dw1 = min_lod << 22 |
max_lod << 12;
 
sampler->dw_wrap = wrap_s << 6 |
wrap_t << 3 |
wrap_r;
 
sampler->dw_wrap_1d = wrap_s << 6 |
GEN6_TEXCOORDMODE_WRAP << 3 |
GEN6_TEXCOORDMODE_WRAP;
 
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
wrap_cube;
 
dw3 = max_aniso << 19;
 
/* round the coordinates for linear filtering */
if (min_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
GEN6_SAMPLER_DW3_V_MIN_ROUND |
GEN6_SAMPLER_DW3_R_MIN_ROUND);
}
if (mag_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
GEN6_SAMPLER_DW3_V_MAG_ROUND |
GEN6_SAMPLER_DW3_R_MAG_ROUND);
}
 
if (!state->normalized_coords)
dw3 |= 1;
 
STATIC_ASSERT(Elements(sampler->payload) >= 15);
 
sampler->payload[0] = dw0;
sampler->payload[1] = dw1;
sampler->payload[2] = dw3;
 
sampler_init_border_color_gen6(dev,
&state->border_color, &sampler->payload[3], 12);
}
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/intel_winsys.h
0,0 → 1,329
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef INTEL_WINSYS_H
#define INTEL_WINSYS_H
 
#include "pipe/p_compiler.h"
 
/* this is compatible with i915_drm.h's definitions */
enum intel_ring_type {
INTEL_RING_RENDER = 1,
INTEL_RING_BSD = 2,
INTEL_RING_BLT = 3,
INTEL_RING_VEBOX = 4,
};
 
/* this is compatible with i915_drm.h's definitions */
enum intel_exec_flag {
INTEL_EXEC_GEN7_SOL_RESET = 1 << 8,
};
 
/* this is compatible with i915_drm.h's definitions */
enum intel_reloc_flag {
INTEL_RELOC_FENCE = 1 << 0,
INTEL_RELOC_GGTT = 1 << 1,
INTEL_RELOC_WRITE = 1 << 2,
};
 
/* this is compatible with i915_drm.h's definitions */
enum intel_tiling_mode {
INTEL_TILING_NONE = 0,
INTEL_TILING_X = 1,
INTEL_TILING_Y = 2,
};
 
struct winsys_handle;
struct intel_winsys;
struct intel_context;
struct intel_bo;
 
struct intel_winsys_info {
int devid;
 
/* the sizes of the aperture in bytes */
size_t aperture_total;
size_t aperture_mappable;
 
bool has_llc;
bool has_address_swizzling;
bool has_logical_context;
bool has_ppgtt;
 
/* valid registers for intel_winsys_read_reg() */
bool has_timestamp;
 
/* valid flags for intel_winsys_submit_bo() */
bool has_gen7_sol_reset;
};
 
void
intel_winsys_destroy(struct intel_winsys *winsys);
 
const struct intel_winsys_info *
intel_winsys_get_info(const struct intel_winsys *winsys);
 
/**
* Create a logical context for use with the render ring.
*/
struct intel_context *
intel_winsys_create_context(struct intel_winsys *winsys);
 
/**
* Destroy a logical context.
*/
void
intel_winsys_destroy_context(struct intel_winsys *winsys,
struct intel_context *ctx);
 
/**
* Read a register. Only registers that are considered safe, such as
*
* TIMESTAMP (0x2358)
*
* can be read.
*/
int
intel_winsys_read_reg(struct intel_winsys *winsys,
uint32_t reg, uint64_t *val);
 
/**
* Return the numbers of submissions lost due to GPU reset.
*
* \param active_lost Number of lost active/guilty submissions
* \param pending_lost Number of lost pending/innocent submissions
*/
int
intel_winsys_get_reset_stats(struct intel_winsys *winsys,
struct intel_context *ctx,
uint32_t *active_lost,
uint32_t *pending_lost);
/**
* Allocate a buffer object.
*
* \param name Informative description of the bo.
* \param size Size of the bo.
* \param cpu_init Will be initialized by CPU.
*/
struct intel_bo *
intel_winsys_alloc_bo(struct intel_winsys *winsys,
const char *name,
unsigned long size,
bool cpu_init);
 
/**
* Create a bo from a user memory pointer. Both \p userptr and \p size must
* be page aligned.
*/
struct intel_bo *
intel_winsys_import_userptr(struct intel_winsys *winsys,
const char *name,
void *userptr,
unsigned long size,
unsigned long flags);
 
/**
* Create a bo from a winsys handle.
*/
struct intel_bo *
intel_winsys_import_handle(struct intel_winsys *winsys,
const char *name,
const struct winsys_handle *handle,
unsigned long height,
enum intel_tiling_mode *tiling,
unsigned long *pitch);
 
/**
* Export \p bo as a winsys handle for inter-process sharing. \p tiling and
* \p pitch must match those set by \p intel_bo_set_tiling().
*/
int
intel_winsys_export_handle(struct intel_winsys *winsys,
struct intel_bo *bo,
enum intel_tiling_mode tiling,
unsigned long pitch,
unsigned long height,
struct winsys_handle *handle);
 
/**
* Return true when buffer objects directly specified in \p bo_array, and
* those indirectly referenced by them, can fit in the aperture space.
*/
bool
intel_winsys_can_submit_bo(struct intel_winsys *winsys,
struct intel_bo **bo_array,
int count);
 
/**
* Submit \p bo for execution.
*
* \p bo and all bos referenced by \p bo will be considered busy until all
* commands are parsed and executed. \p ctx is ignored when the bo is not
* submitted to the render ring.
*/
int
intel_winsys_submit_bo(struct intel_winsys *winsys,
enum intel_ring_type ring,
struct intel_bo *bo, int used,
struct intel_context *ctx,
unsigned long flags);
 
/**
* Decode the commands contained in \p bo. For debugging.
*
* \param bo Batch buffer to decode.
* \param used Size of the commands in bytes.
*/
void
intel_winsys_decode_bo(struct intel_winsys *winsys,
struct intel_bo *bo, int used);
 
/**
* Increase the reference count of \p bo. No-op when \p bo is NULL.
*/
struct intel_bo *
intel_bo_ref(struct intel_bo *bo);
 
/**
* Decrease the reference count of \p bo. When the reference count reaches
* zero, \p bo is destroyed. No-op when \p bo is NULL.
*/
void
intel_bo_unref(struct intel_bo *bo);
 
/**
* Set the tiling of \p bo. The info is used by GTT mapping and bo export.
*/
int
intel_bo_set_tiling(struct intel_bo *bo,
enum intel_tiling_mode tiling,
unsigned long pitch);
 
/**
* Map \p bo for CPU access. Recursive mapping is allowed.
*
* map() maps the backing store into CPU address space, cached. It will block
* if the bo is busy. This variant allows fastest random reads and writes,
* but the caller needs to handle tiling or swizzling manually if the bo is
* tiled or swizzled. If write is enabled and there is no shared last-level
* cache (LLC), the CPU cache will be flushed, which is expensive.
*
* map_gtt() maps the bo for MMIO access, uncached but write-combined. It
* will block if the bo is busy. This variant promises a reasonable speed for
* sequential writes, but reads would be very slow. Callers always have a
* linear view of the bo.
*
* map_async() and map_gtt_async() work similar to map() and map_gtt()
* respectively, except that they do not block.
*/
void *
intel_bo_map(struct intel_bo *bo, bool write_enable);
 
void *
intel_bo_map_async(struct intel_bo *bo);
 
void *
intel_bo_map_gtt(struct intel_bo *bo);
 
void *
intel_bo_map_gtt_async(struct intel_bo *bo);
 
/**
* Unmap \p bo.
*/
void
intel_bo_unmap(struct intel_bo *bo);
 
/**
* Write data to \p bo.
*/
int
intel_bo_pwrite(struct intel_bo *bo, unsigned long offset,
unsigned long size, const void *data);
 
/**
* Read data from the bo.
*/
int
intel_bo_pread(struct intel_bo *bo, unsigned long offset,
unsigned long size, void *data);
 
/**
* Add \p target_bo to the relocation list.
*
* When \p bo is submitted for execution, and if \p target_bo has moved,
* the kernel will patch \p bo at \p offset to \p target_bo->offset plus
* \p target_offset.
*
* \p presumed_offset should be written to \p bo at \p offset.
*/
int
intel_bo_add_reloc(struct intel_bo *bo, uint32_t offset,
struct intel_bo *target_bo, uint32_t target_offset,
uint32_t flags, uint64_t *presumed_offset);
 
/**
* Return the current number of relocations.
*/
int
intel_bo_get_reloc_count(struct intel_bo *bo);
 
/**
* Truncate all relocations except the first \p start ones.
*
* Combined with \p intel_bo_get_reloc_count(), they can be used to undo the
* \p intel_bo_add_reloc() calls that were just made.
*/
void
intel_bo_truncate_relocs(struct intel_bo *bo, int start);
 
/**
* Return true if \p target_bo is on the relocation list of \p bo, or on
* the relocation list of some bo that is referenced by \p bo.
*/
bool
intel_bo_has_reloc(struct intel_bo *bo, struct intel_bo *target_bo);
 
/**
* Wait until \bo is idle, or \p timeout nanoseconds have passed. A
* negative timeout means to wait indefinitely.
*
* \return 0 only when \p bo is idle
*/
int
intel_bo_wait(struct intel_bo *bo, int64_t timeout);
 
/**
* Return true if \p bo is busy.
*/
static inline bool
intel_bo_is_busy(struct intel_bo *bo)
{
return (intel_bo_wait(bo, 0) != 0);
}
 
#endif /* INTEL_WINSYS_H */