Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5563 → Rev 5564

/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/core/ilo_state_3d_top.c
0,0 → 1,1716
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "genhw/genhw.h"
#include "util/u_dual_blend.h"
#include "util/u_framebuffer.h"
#include "util/u_half.h"
#include "util/u_resource.h"
 
#include "ilo_buffer.h"
#include "ilo_format.h"
#include "ilo_image.h"
#include "ilo_state_3d.h"
#include "../ilo_shader.h"
 
static void
ve_init_cso(const struct ilo_dev *dev,
const struct pipe_vertex_element *state,
unsigned vb_index,
struct ilo_ve_cso *cso)
{
int comp[4] = {
GEN6_VFCOMP_STORE_SRC,
GEN6_VFCOMP_STORE_SRC,
GEN6_VFCOMP_STORE_SRC,
GEN6_VFCOMP_STORE_SRC,
};
int format;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
switch (util_format_get_nr_components(state->src_format)) {
case 1: comp[1] = GEN6_VFCOMP_STORE_0;
case 2: comp[2] = GEN6_VFCOMP_STORE_0;
case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
GEN6_VFCOMP_STORE_1_INT :
GEN6_VFCOMP_STORE_1_FP;
}
 
format = ilo_format_translate_vertex(dev, state->src_format);
 
STATIC_ASSERT(Elements(cso->payload) >= 2);
cso->payload[0] =
vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
GEN6_VE_DW0_VALID |
format << GEN6_VE_DW0_FORMAT__SHIFT |
state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
 
cso->payload[1] =
comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
}
 
void
ilo_gpe_init_ve(const struct ilo_dev *dev,
unsigned num_states,
const struct pipe_vertex_element *states,
struct ilo_ve_state *ve)
{
unsigned i;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
ve->count = num_states;
ve->vb_count = 0;
 
for (i = 0; i < num_states; i++) {
const unsigned pipe_idx = states[i].vertex_buffer_index;
const unsigned instance_divisor = states[i].instance_divisor;
unsigned hw_idx;
 
/*
* map the pipe vb to the hardware vb, which has a fixed instance
* divisor
*/
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
if (ve->vb_mapping[hw_idx] == pipe_idx &&
ve->instance_divisors[hw_idx] == instance_divisor)
break;
}
 
/* create one if there is no matching hardware vb */
if (hw_idx >= ve->vb_count) {
hw_idx = ve->vb_count++;
 
ve->vb_mapping[hw_idx] = pipe_idx;
ve->instance_divisors[hw_idx] = instance_divisor;
}
 
ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
}
}
 
void
ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
struct ilo_ve_cso *cso)
{
int format;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 94:
*
* "- This bit (Edge Flag Enable) must only be ENABLED on the last
* valid VERTEX_ELEMENT structure.
*
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
* and Component 1-3 Control must be set to VFCOMP_NOSTORE.
*
* - The Source Element Format must be set to the UINT format.
*
* - [DevSNB]: Edge Flags are not supported for QUADLIST
* primitives. Software may elect to convert QUADLIST primitives
* to some set of corresponding edge-flag-supported primitive
* types (e.g., POLYGONs) prior to submission to the 3D pipeline."
*/
cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
 
/*
* Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
* glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
*
* Since all the hardware cares about is whether the flags are zero or not,
* we can treat them as the corresponding _UINT formats.
*/
format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
 
switch (format) {
case GEN6_FORMAT_R32_FLOAT:
format = GEN6_FORMAT_R32_UINT;
break;
case GEN6_FORMAT_R8_USCALED:
format = GEN6_FORMAT_R8_UINT;
break;
default:
break;
}
 
cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
 
cso->payload[1] =
GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
}
 
void
ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso)
{
ILO_DEV_ASSERT(dev, 6, 8);
 
STATIC_ASSERT(Elements(cso->payload) >= 2);
 
assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
comp1 != GEN6_VFCOMP_STORE_SRC &&
comp2 != GEN6_VFCOMP_STORE_SRC &&
comp3 != GEN6_VFCOMP_STORE_SRC);
 
cso->payload[0] = GEN6_VE_DW0_VALID;
cso->payload[1] =
comp0 << GEN6_VE_DW1_COMP0__SHIFT |
comp1 << GEN6_VE_DW1_COMP1__SHIFT |
comp2 << GEN6_VE_DW1_COMP2__SHIFT |
comp3 << GEN6_VE_DW1_COMP3__SHIFT;
}
 
void
ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, sampler_count, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 135:
*
* "(Vertex URB Entry Read Length) Specifies the number of pairs of
* 128-bit vertex elements to be passed into the payload for each
* vertex."
*
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB
* data to be read and passed to the thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
 
max_threads = dev->thread_count;
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
max_threads *= 2;
 
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
 
dw5 = GEN6_VS_DW5_STATISTICS |
GEN6_VS_DW5_VS_ENABLE;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
else
dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
 
static void
gs_init_cso_gen6(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5, dw6;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_URB_DATA_START_REG);
 
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
}
else {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_VS_GEN6_SO_START_REG);
 
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
*
* "Specifies the amount of URB data read and passed in the thread
* payload for each Vertex URB entry, in 256-bit register increments.
*
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read and passed to the
* thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
*
* "Maximum Number of Threads valid range is [0,27] when Rendering
* Enabled bit is set."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
*
* "Programming Note: If the GS stage is enabled, software must always
* allocate at least one GS URB Entry. This is true even if the GS
* thread never needs to output vertices to the pipeline, e.g., when
* only performing stream output. This is an artifact of the need to
* pass the GS thread an initial destination URB handle."
*
* As such, we always enable rendering, and limit the number of threads.
*/
if (dev->gt == 2) {
/* maximum is 60, but limited to 28 */
max_threads = 28;
}
else {
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
max_threads = 21;
}
 
dw2 = GEN6_THREADDISP_SPF;
 
dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
 
dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
GEN6_GS_DW5_STATISTICS |
GEN6_GS_DW5_SO_STATISTICS |
GEN6_GS_DW5_RENDER_ENABLE;
 
/*
* we cannot make use of GEN6_GS_REORDER because it will reorder
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
* (2N+2, 2N+1, 2N+3)).
*/
dw6 = GEN6_GS_DW6_GS_ENABLE;
 
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
 
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
const uint32_t svbi_post_inc =
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
 
dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
if (svbi_post_inc) {
dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
}
}
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = dw6;
}
 
static void
gs_init_cso_gen7(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, sampler_count, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_DEV_ASSERT(dev, 7, 7.5);
 
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
 
/* in pairs */
vue_read_len = (vue_read_len + 1) / 2;
 
switch (ilo_dev_gen(dev)) {
case ILO_GEN(7.5):
max_threads = (dev->gt >= 2) ? 256 : 70;
break;
case ILO_GEN(7):
max_threads = (dev->gt == 2) ? 128 : 36;
break;
default:
max_threads = 1;
break;
}
 
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
 
dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
GEN7_GS_DW5_STATISTICS |
GEN7_GS_DW5_GS_ENABLE;
 
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
 
void
ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7))
gs_init_cso_gen7(dev, gs, cso);
else
gs_init_cso_gen6(dev, gs, cso);
}
 
static void
view_init_null_gen6(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
assert(width >= 1 && height >= 1 && depth >= 1);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 71:
*
* "A null surface will be used in instances where an actual surface is
* not bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions:
*
* * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
* depth buffer's corresponding state for all render target
* surfaces, including null.
* * Surface Format must be R8G8B8A8_UNORM."
*
* From the Sandy Bridge PRM, volume 4 part 1, page 82:
*
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
* true"
*/
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
 
dw[1] = 0;
 
dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
(width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
 
dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
GEN6_TILING_X;
 
dw[4] = 0;
dw[5] = 0;
}
 
static void
view_init_for_buffer_gen6(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
const int elem_size = util_format_get_blocksize(elem_format);
int width, height, depth, pitch;
int surface_format, num_entries;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
/*
* For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
* structure in a buffer.
*/
 
surface_format = ilo_format_translate_color(dev, elem_format);
 
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
if (size % struct_size >= elem_size)
num_entries++;
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 76:
*
* "For SURFTYPE_BUFFER render targets, this field (Surface Base
* Address) specifies the base address of first element of the
* surface. The surface is interpreted as a simple array of that
* single element type. The address must be naturally-aligned to the
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
* must be 16-byte aligned).
*
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
* the base address of the first element of the surface, computed in
* software by adding the surface base address to the byte offset of
* the element in the buffer."
*/
if (is_rt)
assert(offset % elem_size == 0);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 77:
*
* "For buffer surfaces, the number of entries in the buffer ranges
* from 1 to 2^27."
*/
assert(num_entries >= 1 && num_entries <= 1 << 27);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
* indicates the size of the structure."
*/
pitch = struct_size;
 
pitch--;
num_entries--;
/* bits [6:0] */
width = (num_entries & 0x0000007f);
/* bits [19:7] */
height = (num_entries & 0x000fff80) >> 7;
/* bits [26:20] */
depth = (num_entries & 0x07f00000) >> 20;
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
if (render_cache_rw)
dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
 
dw[1] = offset;
 
dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
 
dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
 
dw[4] = 0;
dw[5] = 0;
}
 
static void
view_init_for_image_gen6(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
int width, height, depth, pitch, lod;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 6, 6);
 
surface_type = ilo_gpe_gen6_translate_texture(target);
assert(surface_type != GEN6_SURFTYPE_BUFFER);
 
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
format = PIPE_FORMAT_Z32_FLOAT;
 
if (is_rt)
surface_format = ilo_format_translate_render(dev, format);
else
surface_format = ilo_format_translate_texture(dev, format);
assert(surface_format >= 0);
 
width = img->width0;
height = img->height0;
depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
pitch = img->bo_stride;
 
if (surface_type == GEN6_SURFTYPE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
* range of this field (Depth) is [0,84], indicating the number of
* cube array elements (equal to the number of underlying 2D array
* elements divided by 6). For other surfaces, this field must be
* zero."
*
* When is_rt is true, we treat the texture as a 2D one to avoid the
* restriction.
*/
if (is_rt) {
surface_type = GEN6_SURFTYPE_2D;
}
else {
assert(num_layers % 6 == 0);
depth = num_layers / 6;
}
}
 
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
switch (surface_type) {
case GEN6_SURFTYPE_1D:
assert(width <= 8192 && height == 1 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
case GEN6_SURFTYPE_2D:
assert(width <= 8192 && height <= 8192 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
case GEN6_SURFTYPE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
assert(first_layer < 2048 && num_layers <= 512);
if (!is_rt)
assert(first_layer == 0);
break;
case GEN6_SURFTYPE_CUBE:
assert(width <= 8192 && height <= 8192 && depth <= 85);
assert(width == height);
assert(first_layer < 512 && num_layers <= 512);
if (is_rt)
assert(first_layer == 0);
break;
default:
assert(!"unexpected surface type");
break;
}
 
/* non-full array spacing is supported only on GEN7+ */
assert(img->walk != ILO_IMAGE_WALK_LOD);
/* non-interleaved samples are supported only on GEN7+ */
if (img->sample_count > 1)
assert(img->interleaved_samples);
 
if (is_rt) {
assert(num_levels == 1);
lod = first_level;
}
else {
lod = num_levels - 1;
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 76:
*
* "Linear render target surface base addresses must be element-size
* aligned, for non-YUV surface formats, or a multiple of 2
* element-sizes for YUV surface formats. Other linear surfaces have
* no alignment requirements (byte alignment is sufficient.)"
*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For linear render target surfaces, the pitch must be a multiple
* of the element size for non-YUV surface formats. Pitch must be a
* multiple of 2 * element size for YUV surface formats."
*
* From the Sandy Bridge PRM, volume 4 part 1, page 86:
*
* "For linear surfaces, this field (X Offset) must be zero"
*/
if (img->tiling == GEN6_TILING_NONE) {
if (is_rt) {
const int elem_size = util_format_get_blocksize(format);
assert(pitch % elem_size == 0);
}
}
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
 
if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
dw[0] |= 1 << 9 |
GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
}
 
if (is_rt)
dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
 
dw[1] = 0;
 
dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
(width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
 
assert(img->tiling != GEN8_TILING_W);
dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
(pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
img->tiling;
 
dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
first_layer << 17 |
(num_layers - 1) << 8 |
((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
 
dw[5] = 0;
 
assert(img->align_j == 2 || img->align_j == 4);
if (img->align_j == 4)
dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
}
 
static void
view_init_null_gen7(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 7, 8);
 
assert(width >= 1 && height >= 1 && depth >= 1);
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 62:
*
* "A null surface is used in instances where an actual surface is not
* bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions:
*
* * Width, Height, Depth, LOD, and Render Target View Extent fields
* must match the depth buffer's corresponding state for all render
* target surfaces, including null.
* * All sampling engine and data port messages support null surfaces
* with the above behavior, even if not mentioned as specifically
* supported, except for the following:
* * Data Port Media Block Read/Write messages.
* * The Surface Type of a surface used as a render target (accessed
* via the Data Port's Render Target Write message) must be the same
* as the Surface Type of all other render targets and of the depth
* buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
* buffer or render targets are SURFTYPE_NULL."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 65:
*
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
* true"
*/
 
STATIC_ASSERT(Elements(surf->payload) >= 13);
dw = surf->payload;
 
dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
else
dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
 
dw[1] = 0;
 
dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
 
dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
 
dw[4] = 0;
dw[5] = level;
 
dw[6] = 0;
dw[7] = 0;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
}
 
static void
view_init_for_buffer_gen7(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
const bool typed = (elem_format != PIPE_FORMAT_NONE);
const bool structured = (!typed && struct_size > 1);
const int elem_size = (typed) ?
util_format_get_blocksize(elem_format) : 1;
int width, height, depth, pitch;
int surface_type, surface_format, num_entries;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 7, 8);
 
surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
 
surface_format = (typed) ?
ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
 
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
if (size % struct_size >= elem_size && !structured)
num_entries++;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 67:
*
* "For SURFTYPE_BUFFER render targets, this field (Surface Base
* Address) specifies the base address of first element of the
* surface. The surface is interpreted as a simple array of that
* single element type. The address must be naturally-aligned to the
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
* must be 16-byte aligned)
*
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
* the base address of the first element of the surface, computed in
* software by adding the surface base address to the byte offset of
* the element in the buffer."
*/
if (is_rt)
assert(offset % elem_size == 0);
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
*
* "For typed buffer and structured buffer surfaces, the number of
* entries in the buffer ranges from 1 to 2^27. For raw buffer
* surfaces, the number of entries in the buffer is the number of
* bytes which can range from 1 to 2^30."
*/
assert(num_entries >= 1 &&
num_entries <= 1 << ((typed || structured) ? 27 : 30));
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 69:
*
* "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
* 11 if the Surface Format is RAW (the size of the buffer must be a
* multiple of 4 bytes)."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
* field (Surface Pitch) indicates the size of the structure."
*
* "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
* must be a multiple of 4 bytes."
*/
if (structured)
assert(struct_size % 4 == 0);
else if (!typed)
assert(num_entries % 4 == 0);
 
pitch = struct_size;
 
pitch--;
num_entries--;
/* bits [6:0] */
width = (num_entries & 0x0000007f);
/* bits [20:7] */
height = (num_entries & 0x001fff80) >> 7;
/* bits [30:21] */
depth = (num_entries & 0x7fe00000) >> 21;
/* limit to [26:21] */
if (typed || structured)
depth &= 0x3f;
 
STATIC_ASSERT(Elements(surf->payload) >= 13);
dw = surf->payload;
 
dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
if (render_cache_rw)
dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
dw[8] = offset;
memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
} else {
dw[1] = offset;
}
 
dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
 
dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
pitch;
 
dw[4] = 0;
dw[5] = 0;
 
dw[6] = 0;
dw[7] = 0;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
}
}
 
static void
view_init_for_image_gen7(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
int width, height, depth, pitch, lod;
uint32_t *dw;
 
ILO_DEV_ASSERT(dev, 7, 8);
 
surface_type = ilo_gpe_gen6_translate_texture(target);
assert(surface_type != GEN6_SURFTYPE_BUFFER);
 
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
format = PIPE_FORMAT_Z32_FLOAT;
 
if (is_rt)
surface_format = ilo_format_translate_render(dev, format);
else
surface_format = ilo_format_translate_texture(dev, format);
assert(surface_format >= 0);
 
width = img->width0;
height = img->height0;
depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
pitch = img->bo_stride;
 
if (surface_type == GEN6_SURFTYPE_CUBE) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
* this field is [0,340], indicating the number of cube array
* elements (equal to the number of underlying 2D array elements
* divided by 6). For other surfaces, this field must be zero."
*
* When is_rt is true, we treat the texture as a 2D one to avoid the
* restriction.
*/
if (is_rt) {
surface_type = GEN6_SURFTYPE_2D;
}
else {
assert(num_layers % 6 == 0);
depth = num_layers / 6;
}
}
 
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
assert(first_layer < 2048 && num_layers <= 2048);
switch (surface_type) {
case GEN6_SURFTYPE_1D:
assert(width <= 16384 && height == 1 && depth <= 2048);
break;
case GEN6_SURFTYPE_2D:
assert(width <= 16384 && height <= 16384 && depth <= 2048);
break;
case GEN6_SURFTYPE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
if (!is_rt)
assert(first_layer == 0);
break;
case GEN6_SURFTYPE_CUBE:
assert(width <= 16384 && height <= 16384 && depth <= 86);
assert(width == height);
if (is_rt)
assert(first_layer == 0);
break;
default:
assert(!"unexpected surface type");
break;
}
 
if (is_rt) {
assert(num_levels == 1);
lod = first_level;
}
else {
lod = num_levels - 1;
}
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
*
* "The Base Address for linear render target surfaces and surfaces
* accessed with the typed surface read/write data port messages must
* be element-size aligned, for non-YUV surface formats, or a multiple
* of 2 element-sizes for YUV surface formats. Other linear surfaces
* have no alignment requirements (byte alignment is sufficient)."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For linear render target surfaces and surfaces accessed with the
* typed data port messages, the pitch must be a multiple of the
* element size for non-YUV surface formats. Pitch must be a multiple
* of 2 * element size for YUV surface formats. For linear surfaces
* with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
* of 4 bytes.For other linear surfaces, the pitch can be any multiple
* of bytes."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 74:
*
* "For linear surfaces, this field (X Offset) must be zero."
*/
if (img->tiling == GEN6_TILING_NONE) {
if (is_rt) {
const int elem_size = util_format_get_blocksize(format);
assert(pitch % elem_size == 0);
}
}
 
STATIC_ASSERT(Elements(surf->payload) >= 13);
dw = surf->payload;
 
dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "If this field (Surface Array) is enabled, the Surface Type must be
* SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
* disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
* SURFTYPE_CUBE, the Depth field must be set to zero."
*
* For non-3D sampler surfaces, resinfo (the sampler message) always
* returns zero for the number of layers when this field is not set.
*/
if (surface_type != GEN6_SURFTYPE_3D) {
switch (target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY:
dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
break;
default:
assert(depth == 1);
break;
}
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
switch (img->align_j) {
case 4:
dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
break;
case 8:
dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
break;
case 16:
dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
break;
default:
assert(!"unsupported valign");
break;
}
 
switch (img->align_i) {
case 4:
dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
break;
case 8:
dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
break;
case 16:
dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
break;
default:
assert(!"unsupported halign");
break;
}
 
dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
} else {
assert(img->align_i == 4 || img->align_i == 8);
assert(img->align_j == 2 || img->align_j == 4);
 
if (img->align_j == 4)
dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
 
if (img->align_i == 8)
dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
 
assert(img->tiling != GEN8_TILING_W);
dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
 
if (img->walk == ILO_IMAGE_WALK_LOD)
dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
else
dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
}
 
if (is_rt)
dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
 
if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
 
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
assert(img->walk_layer_height % 4 == 0);
dw[1] = img->walk_layer_height / 4;
} else {
dw[1] = 0;
}
 
dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
 
dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
(pitch - 1);
 
dw[4] = first_layer << 18 |
(num_layers - 1) << 7;
 
/*
* MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
* means the samples are interleaved. The layouts are the same when the
* number of samples is 1.
*/
if (img->interleaved_samples && img->sample_count > 1) {
assert(!is_rt);
dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
}
else {
dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
}
 
switch (img->sample_count) {
case 0:
case 1:
default:
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
break;
case 2:
dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
break;
case 4:
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
break;
case 8:
dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
break;
case 16:
dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
break;
}
 
dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
lod;
 
dw[6] = 0;
dw[7] = 0;
 
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(8))
memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
}
 
void
ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
view_init_null_gen7(dev,
width, height, depth, level, surf);
} else {
view_init_null_gen6(dev,
width, height, depth, level, surf);
}
 
surf->bo = NULL;
surf->scanout = false;
}
 
void
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
view_init_for_buffer_gen7(dev, buf, offset, size,
struct_size, elem_format, is_rt, render_cache_rw, surf);
} else {
view_init_for_buffer_gen6(dev, buf, offset, size,
struct_size, elem_format, is_rt, render_cache_rw, surf);
}
 
/* do not increment reference count */
surf->bo = buf->bo;
surf->scanout = false;
}
 
void
ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
const struct ilo_image *img,
enum pipe_texture_target target,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt,
struct ilo_view_surface *surf)
{
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
view_init_for_image_gen7(dev, img, target, format,
first_level, num_levels, first_layer, num_layers,
is_rt, surf);
} else {
view_init_for_image_gen6(dev, img, target, format,
first_level, num_levels, first_layer, num_layers,
is_rt, surf);
}
 
surf->scanout = img->scanout;
/* do not increment reference count */
surf->bo = img->bo;
}
 
static void
sampler_init_border_color_gen6(const struct ilo_dev *dev,
const union pipe_color_union *color,
uint32_t *dw, int num_dwords)
{
float rgba[4] = {
color->f[0], color->f[1], color->f[2], color->f[3],
};
 
ILO_DEV_ASSERT(dev, 6, 6);
 
assert(num_dwords >= 12);
 
/*
* This state is not documented in the Sandy Bridge PRM, but in the
* Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
*/
 
/* IEEE_FP */
dw[1] = fui(rgba[0]);
dw[2] = fui(rgba[1]);
dw[3] = fui(rgba[2]);
dw[4] = fui(rgba[3]);
 
/* FLOAT_16 */
dw[5] = util_float_to_half(rgba[0]) |
util_float_to_half(rgba[1]) << 16;
dw[6] = util_float_to_half(rgba[2]) |
util_float_to_half(rgba[3]) << 16;
 
/* clamp to [-1.0f, 1.0f] */
rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
 
/* SNORM16 */
dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
(int16_t) util_iround(rgba[1] * 32767.0f) << 16;
dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
(int16_t) util_iround(rgba[3] * 32767.0f) << 16;
 
/* SNORM8 */
dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
(int8_t) util_iround(rgba[1] * 127.0f) << 8 |
(int8_t) util_iround(rgba[2] * 127.0f) << 16 |
(int8_t) util_iround(rgba[3] * 127.0f) << 24;
 
/* clamp to [0.0f, 1.0f] */
rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
 
/* UNORM8 */
dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
(uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
(uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
(uint8_t) util_iround(rgba[3] * 255.0f) << 24;
 
/* UNORM16 */
dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
(uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
(uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
}
 
/**
* Translate a pipe texture mipfilter to the matching hardware mipfilter.
*/
static int
gen6_translate_tex_mipfilter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
default:
assert(!"unknown mipfilter");
return GEN6_MIPFILTER_NONE;
}
}
 
/**
* Translate a pipe texture filter to the matching hardware mapfilter.
*/
static int
gen6_translate_tex_filter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
default:
assert(!"unknown sampler filter");
return GEN6_MAPFILTER_NEAREST;
}
}
 
/**
* Translate a pipe texture coordinate wrapping mode to the matching hardware
* wrapping mode.
*/
static int
gen6_translate_tex_wrap(unsigned wrap)
{
switch (wrap) {
case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER;
case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(!"unknown sampler wrap mode");
return GEN6_TEXCOORDMODE_WRAP;
}
}
 
/**
* Translate a pipe shadow compare function to the matching hardware shadow
* function.
*/
static int
gen6_translate_shadow_func(unsigned func)
{
/*
* For PIPE_FUNC_x, the reference value is on the left-hand side of the
* comparison, and 1.0 is returned when the comparison is true.
*
* For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
* the comparison, and 0.0 is returned when the comparison is true.
*/
switch (func) {
case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS;
case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL;
case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS;
case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL;
case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER;
case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER;
default:
assert(!"unknown shadow compare function");
return GEN6_COMPAREFUNCTION_NEVER;
}
}
 
void
ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
const struct pipe_sampler_state *state,
struct ilo_sampler_cso *sampler)
{
int mip_filter, min_filter, mag_filter, max_aniso;
int lod_bias, max_lod, min_lod;
int wrap_s, wrap_t, wrap_r, wrap_cube;
uint32_t dw0, dw1, dw3;
 
ILO_DEV_ASSERT(dev, 6, 8);
 
memset(sampler, 0, sizeof(*sampler));
 
mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
min_filter = gen6_translate_tex_filter(state->min_img_filter);
mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
 
sampler->anisotropic = state->max_anisotropy;
 
if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
max_aniso = state->max_anisotropy / 2 - 1;
else if (state->max_anisotropy > 16)
max_aniso = GEN6_ANISORATIO_16;
else
max_aniso = GEN6_ANISORATIO_2;
 
/*
*
* Here is how the hardware calculate per-pixel LOD, from my reading of the
* PRMs:
*
* 1) LOD is set to log2(ratio of texels to pixels) if not specified in
* other ways. The number of texels is measured using level
* SurfMinLod.
* 2) Bias is added to LOD.
* 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
* compared with Base to determine whether magnification or
* minification is needed. (if preclamp is disabled, LOD is compared
* with Base before clamping)
* 4) If magnification is needed, or no mipmapping is requested, LOD is
* set to floor(MinLod).
* 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
*
* With Gallium interface, Base is always zero and
* pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
*/
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
const float scale = 256.0f;
 
/* [-16.0, 16.0) in S4.8 */
lod_bias = (int)
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
lod_bias &= 0x1fff;
 
/* [0.0, 14.0] in U4.8 */
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
}
else {
const float scale = 64.0f;
 
/* [-16.0, 16.0) in S4.6 */
lod_bias = (int)
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
lod_bias &= 0x7ff;
 
/* [0.0, 13.0] in U4.6 */
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
}
 
/*
* We want LOD to be clamped to determine magnification/minification, and
* get set to zero when it is magnification or when mipmapping is disabled.
* The hardware would set LOD to floor(MinLod) and that is a problem when
* MinLod is greater than or equal to 1.0f.
*
* With Base being zero, it is always minification when MinLod is non-zero.
* To achieve our goal, we just need to set MinLod to zero and set
* MagFilter to MinFilter when mipmapping is disabled.
*/
if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
min_lod = 0;
mag_filter = min_filter;
}
 
/* determine wrap s/t/r */
wrap_s = gen6_translate_tex_wrap(state->wrap_s);
wrap_t = gen6_translate_tex_wrap(state->wrap_t);
wrap_r = gen6_translate_tex_wrap(state->wrap_r);
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
/*
* For nearest filtering, PIPE_TEX_WRAP_CLAMP means
* PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
* PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
* additionally clamping the texture coordinates to [0.0, 1.0].
*
* PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
* clamping has to be taken care of in the shaders. There are two
* filters here, but let the minification one has a say.
*/
const bool clamp_is_to_edge =
(state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
 
if (clamp_is_to_edge) {
if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
wrap_s = GEN6_TEXCOORDMODE_CLAMP;
if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
wrap_t = GEN6_TEXCOORDMODE_CLAMP;
if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
wrap_r = GEN6_TEXCOORDMODE_CLAMP;
} else {
if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler->saturate_s = true;
}
if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler->saturate_t = true;
}
if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler->saturate_r = true;
}
}
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 107:
*
* "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
* and TEXCOORDMODE_CUBE settings are valid, and each TC component
* must have the same Address Control mode."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 96:
*
* "This field (Cube Surface Control Mode) must be set to
* CUBECTRLMODE_PROGRAMMED"
*
* Therefore, we cannot use "Cube Surface Control Mode" for semless cube
* map filtering.
*/
if (state->seamless_cube_map &&
(state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
wrap_cube = GEN6_TEXCOORDMODE_CUBE;
}
else {
wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
}
 
if (!state->normalized_coords) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 98:
*
* "The following state must be set as indicated if this field
* (Non-normalized Coordinate Enable) is enabled:
*
* - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
* TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
* - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
* - Mag Mode Filter must be MAPFILTER_NEAREST or
* MAPFILTER_LINEAR.
* - Min Mode Filter must be MAPFILTER_NEAREST or
* MAPFILTER_LINEAR.
* - Mip Mode Filter must be MIPFILTER_NONE.
* - Min LOD must be 0.
* - Max LOD must be 0.
* - MIP Count must be 0.
* - Surface Min LOD must be 0.
* - Texture LOD Bias must be 0."
*/
assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
 
assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
mag_filter == GEN6_MAPFILTER_LINEAR);
assert(min_filter == GEN6_MAPFILTER_NEAREST ||
min_filter == GEN6_MAPFILTER_LINEAR);
 
/* work around a bug in util_blitter */
mip_filter = GEN6_MIPFILTER_NONE;
 
assert(mip_filter == GEN6_MIPFILTER_NONE);
}
 
if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
dw0 = 1 << 28 |
mip_filter << 20 |
lod_bias << 1;
 
sampler->dw_filter = mag_filter << 17 |
min_filter << 14;
 
sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
GEN6_MAPFILTER_ANISOTROPIC << 14 |
1;
 
dw1 = min_lod << 20 |
max_lod << 8;
 
if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
 
dw3 = max_aniso << 19;
 
/* round the coordinates for linear filtering */
if (min_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
GEN6_SAMPLER_DW3_V_MIN_ROUND |
GEN6_SAMPLER_DW3_R_MIN_ROUND);
}
if (mag_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
GEN6_SAMPLER_DW3_V_MAG_ROUND |
GEN6_SAMPLER_DW3_R_MAG_ROUND);
}
 
if (!state->normalized_coords)
dw3 |= 1 << 10;
 
sampler->dw_wrap = wrap_s << 6 |
wrap_t << 3 |
wrap_r;
 
/*
* As noted in the classic i965 driver, the HW may still reference
* wrap_t and wrap_r for 1D textures. We need to set them to a safe
* mode
*/
sampler->dw_wrap_1d = wrap_s << 6 |
GEN6_TEXCOORDMODE_WRAP << 3 |
GEN6_TEXCOORDMODE_WRAP;
 
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
wrap_cube;
 
STATIC_ASSERT(Elements(sampler->payload) >= 7);
 
sampler->payload[0] = dw0;
sampler->payload[1] = dw1;
sampler->payload[2] = dw3;
 
memcpy(&sampler->payload[3],
state->border_color.ui, sizeof(state->border_color.ui));
}
else {
dw0 = 1 << 28 |
mip_filter << 20 |
lod_bias << 3;
 
if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
dw0 |= gen6_translate_shadow_func(state->compare_func);
 
sampler->dw_filter = (min_filter != mag_filter) << 27 |
mag_filter << 17 |
min_filter << 14;
 
sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
GEN6_MAPFILTER_ANISOTROPIC << 14;
 
dw1 = min_lod << 22 |
max_lod << 12;
 
sampler->dw_wrap = wrap_s << 6 |
wrap_t << 3 |
wrap_r;
 
sampler->dw_wrap_1d = wrap_s << 6 |
GEN6_TEXCOORDMODE_WRAP << 3 |
GEN6_TEXCOORDMODE_WRAP;
 
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
wrap_cube;
 
dw3 = max_aniso << 19;
 
/* round the coordinates for linear filtering */
if (min_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
GEN6_SAMPLER_DW3_V_MIN_ROUND |
GEN6_SAMPLER_DW3_R_MIN_ROUND);
}
if (mag_filter != GEN6_MAPFILTER_NEAREST) {
dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
GEN6_SAMPLER_DW3_V_MAG_ROUND |
GEN6_SAMPLER_DW3_R_MAG_ROUND);
}
 
if (!state->normalized_coords)
dw3 |= 1;
 
STATIC_ASSERT(Elements(sampler->payload) >= 15);
 
sampler->payload[0] = dw0;
sampler->payload[1] = dw1;
sampler->payload[2] = dw3;
 
sampler_init_border_color_gen6(dev,
&state->border_color, &sampler->payload[3], 12);
}
}