0,0 → 1,535 |
/************************************************************************** |
* |
* Copyright 2009 VMware, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
|
|
#include "pipe/p_defines.h" |
|
#include "util/u_format.h" |
#include "util/u_memory.h" |
#include "util/u_string.h" |
|
#include "lp_bld_type.h" |
#include "lp_bld_const.h" |
#include "lp_bld_conv.h" |
#include "lp_bld_swizzle.h" |
#include "lp_bld_gather.h" |
#include "lp_bld_debug.h" |
#include "lp_bld_format.h" |
|
|
void |
lp_build_format_swizzle_soa(const struct util_format_description *format_desc, |
struct lp_build_context *bld, |
const LLVMValueRef *unswizzled, |
LLVMValueRef swizzled_out[4]) |
{ |
assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO); |
assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE); |
|
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { |
enum util_format_swizzle swizzle; |
LLVMValueRef depth_or_stencil; |
|
if (util_format_has_stencil(format_desc) && |
!util_format_has_depth(format_desc)) { |
assert(!bld->type.floating); |
swizzle = format_desc->swizzle[1]; |
} |
else { |
assert(bld->type.floating); |
swizzle = format_desc->swizzle[0]; |
} |
/* |
* Return zzz1 or sss1 for depth-stencil formats here. |
* Correct swizzling will be handled by apply_sampler_swizzle() later. |
*/ |
depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); |
|
swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil; |
swizzled_out[3] = bld->one; |
} |
else { |
unsigned chan; |
for (chan = 0; chan < 4; ++chan) { |
enum util_format_swizzle swizzle = format_desc->swizzle[chan]; |
swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); |
} |
} |
} |
|
|
/** |
* Unpack several pixels in SoA. |
* |
* It takes a vector of packed pixels: |
* |
* packed = {P0, P1, P2, P3, ..., Pn} |
* |
* And will produce four vectors: |
* |
* red = {R0, R1, R2, R3, ..., Rn} |
* green = {G0, G1, G2, G3, ..., Gn} |
* blue = {B0, B1, B2, B3, ..., Bn} |
* alpha = {A0, A1, A2, A3, ..., An} |
* |
* It requires that a packed pixel fits into an element of the output |
* channels. The common case is when converting pixel with a depth of 32 bit or |
* less into floats. |
* |
* \param format_desc the format of the 'packed' incoming pixel vector |
* \param type the desired type for rgba_out (type.length = n, above) |
* \param packed the incoming vector of packed pixels |
* \param rgba_out returns the SoA R,G,B,A vectors |
*/ |
void |
lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, |
const struct util_format_description *format_desc, |
struct lp_type type, |
LLVMValueRef packed, |
LLVMValueRef rgba_out[4]) |
{ |
LLVMBuilderRef builder = gallivm->builder; |
struct lp_build_context bld; |
LLVMValueRef inputs[4]; |
unsigned chan; |
|
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); |
assert(format_desc->block.width == 1); |
assert(format_desc->block.height == 1); |
assert(format_desc->block.bits <= type.width); |
/* FIXME: Support more output types */ |
assert(type.width == 32); |
|
lp_build_context_init(&bld, gallivm, type); |
|
/* Decode the input vector components */ |
for (chan = 0; chan < format_desc->nr_channels; ++chan) { |
const unsigned width = format_desc->channel[chan].size; |
const unsigned start = format_desc->channel[chan].shift; |
const unsigned stop = start + width; |
LLVMValueRef input; |
|
input = packed; |
|
switch(format_desc->channel[chan].type) { |
case UTIL_FORMAT_TYPE_VOID: |
input = lp_build_undef(gallivm, type); |
break; |
|
case UTIL_FORMAT_TYPE_UNSIGNED: |
/* |
* Align the LSB |
*/ |
|
if (start) { |
input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); |
} |
|
/* |
* Zero the MSBs |
*/ |
|
if (stop < format_desc->block.bits) { |
unsigned mask = ((unsigned long long)1 << width) - 1; |
input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); |
} |
|
/* |
* Type conversion |
*/ |
|
if (type.floating) { |
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { |
assert(width == 8); |
if (format_desc->swizzle[3] == chan) { |
input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); |
} |
else { |
struct lp_type conv_type = lp_uint_type(type); |
input = lp_build_srgb_to_linear(gallivm, conv_type, input); |
} |
} |
else { |
if(format_desc->channel[chan].normalized) |
input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); |
else |
input = LLVMBuildSIToFP(builder, input, |
lp_build_vec_type(gallivm, type), ""); |
} |
} |
else if (format_desc->channel[chan].pure_integer) { |
/* Nothing to do */ |
} else { |
/* FIXME */ |
assert(0); |
} |
|
break; |
|
case UTIL_FORMAT_TYPE_SIGNED: |
/* |
* Align the sign bit first. |
*/ |
|
if (stop < type.width) { |
unsigned bits = type.width - stop; |
LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); |
input = LLVMBuildShl(builder, input, bits_val, ""); |
} |
|
/* |
* Align the LSB (with an arithmetic shift to preserve the sign) |
*/ |
|
if (format_desc->channel[chan].size < type.width) { |
unsigned bits = type.width - format_desc->channel[chan].size; |
LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); |
input = LLVMBuildAShr(builder, input, bits_val, ""); |
} |
|
/* |
* Type conversion |
*/ |
|
if (type.floating) { |
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); |
if (format_desc->channel[chan].normalized) { |
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); |
LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); |
input = LLVMBuildFMul(builder, input, scale_val, ""); |
} |
} |
else if (format_desc->channel[chan].pure_integer) { |
/* Nothing to do */ |
} else { |
/* FIXME */ |
assert(0); |
} |
|
break; |
|
case UTIL_FORMAT_TYPE_FLOAT: |
if (type.floating) { |
assert(start == 0); |
assert(stop == 32); |
assert(type.width == 32); |
input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); |
} |
else { |
/* FIXME */ |
assert(0); |
input = lp_build_undef(gallivm, type); |
} |
break; |
|
case UTIL_FORMAT_TYPE_FIXED: |
if (type.floating) { |
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); |
LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); |
input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); |
input = LLVMBuildFMul(builder, input, scale_val, ""); |
} |
else { |
/* FIXME */ |
assert(0); |
input = lp_build_undef(gallivm, type); |
} |
break; |
|
default: |
assert(0); |
input = lp_build_undef(gallivm, type); |
break; |
} |
|
inputs[chan] = input; |
} |
|
lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); |
} |
|
|
/** |
* Convert a vector of rgba8 values into 32bit wide SoA vectors. |
* |
* \param dst_type The desired return type. For pure integer formats |
* this should be a 32bit wide int or uint vector type, |
* otherwise a float vector type. |
* |
* \param packed The rgba8 values to pack. |
* |
* \param rgba The 4 SoA return vectors. |
*/ |
void |
lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, |
struct lp_type dst_type, |
LLVMValueRef packed, |
LLVMValueRef *rgba) |
{ |
LLVMBuilderRef builder = gallivm->builder; |
LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); |
unsigned chan; |
|
/* XXX technically shouldn't use that for uint dst_type */ |
packed = LLVMBuildBitCast(builder, packed, |
lp_build_int_vec_type(gallivm, dst_type), ""); |
|
/* Decode the input vector components */ |
for (chan = 0; chan < 4; ++chan) { |
#ifdef PIPE_ARCH_LITTLE_ENDIAN |
unsigned start = chan*8; |
#else |
unsigned start = (3-chan)*8; |
#endif |
unsigned stop = start + 8; |
LLVMValueRef input; |
|
input = packed; |
|
if (start) |
input = LLVMBuildLShr(builder, input, |
lp_build_const_int_vec(gallivm, dst_type, start), ""); |
|
if (stop < 32) |
input = LLVMBuildAnd(builder, input, mask, ""); |
|
if (dst_type.floating) |
input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); |
|
rgba[chan] = input; |
} |
} |
|
|
|
/** |
* Fetch a texels from a texture, returning them in SoA layout. |
* |
* \param type the desired return type for 'rgba'. The vector length |
* is the number of texels to fetch |
* |
* \param base_ptr points to the base of the texture mip tree. |
* \param offset offset to start of the texture image block. For non- |
* compressed formats, this simply is an offset to the texel. |
* For compressed formats, it is an offset to the start of the |
* compressed data block. |
* |
* \param i, j the sub-block pixel coordinates. For non-compressed formats |
* these will always be (0,0). For compressed formats, i will |
* be in [0, block_width-1] and j will be in [0, block_height-1]. |
*/ |
void |
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, |
const struct util_format_description *format_desc, |
struct lp_type type, |
LLVMValueRef base_ptr, |
LLVMValueRef offset, |
LLVMValueRef i, |
LLVMValueRef j, |
LLVMValueRef rgba_out[4]) |
{ |
LLVMBuilderRef builder = gallivm->builder; |
|
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && |
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || |
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || |
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && |
format_desc->block.width == 1 && |
format_desc->block.height == 1 && |
format_desc->block.bits <= type.width && |
(format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || |
format_desc->channel[0].size == 32)) |
{ |
/* |
* The packed pixel fits into an element of the destination format. Put |
* the packed pixels into a vector and extract each component for all |
* vector elements in parallel. |
*/ |
|
LLVMValueRef packed; |
|
/* |
* gather the texels from the texture |
* Ex: packed = {XYZW, XYZW, XYZW, XYZW} |
*/ |
assert(format_desc->block.bits <= type.width); |
packed = lp_build_gather(gallivm, |
type.length, |
format_desc->block.bits, |
type.width, |
base_ptr, offset, FALSE); |
|
/* |
* convert texels to float rgba |
*/ |
lp_build_unpack_rgba_soa(gallivm, |
format_desc, |
type, |
packed, rgba_out); |
return; |
} |
|
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT || |
format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) { |
/* |
* similar conceptually to above but requiring special |
* AoS packed -> SoA float conversion code. |
*/ |
LLVMValueRef packed; |
|
assert(type.floating); |
assert(type.width == 32); |
|
packed = lp_build_gather(gallivm, type.length, |
format_desc->block.bits, |
type.width, base_ptr, offset, |
FALSE); |
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { |
lp_build_r11g11b10_to_float(gallivm, packed, rgba_out); |
} |
else { |
lp_build_rgb9e5_to_float(gallivm, packed, rgba_out); |
} |
return; |
} |
|
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS && |
format_desc->block.bits == 64) { |
/* |
* special case the format is 64 bits but we only require |
* 32bit (or 8bit) from each block. |
*/ |
LLVMValueRef packed; |
|
if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) { |
/* |
* for stencil simply fix up offsets - could in fact change |
* base_ptr instead even outside the shader. |
*/ |
unsigned mask = (1 << 8) - 1; |
LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4); |
offset = LLVMBuildAdd(builder, offset, s_offset, ""); |
packed = lp_build_gather(gallivm, type.length, |
32, type.width, base_ptr, offset, FALSE); |
packed = LLVMBuildAnd(builder, packed, |
lp_build_const_int_vec(gallivm, type, mask), ""); |
} |
else { |
assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
packed = lp_build_gather(gallivm, type.length, |
32, type.width, base_ptr, offset, TRUE); |
packed = LLVMBuildBitCast(builder, packed, |
lp_build_vec_type(gallivm, type), ""); |
} |
/* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */ |
rgba_out[0] = rgba_out[1] = rgba_out[2] = packed; |
rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f); |
return; |
} |
|
/* |
* Try calling lp_build_fetch_rgba_aos for all pixels. |
*/ |
|
if (util_format_fits_8unorm(format_desc) && |
type.floating && type.width == 32 && |
(type.length == 1 || (type.length % 4 == 0))) { |
struct lp_type tmp_type; |
LLVMValueRef tmp; |
|
memset(&tmp_type, 0, sizeof tmp_type); |
tmp_type.width = 8; |
tmp_type.length = type.length * 4; |
tmp_type.norm = TRUE; |
|
tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, |
base_ptr, offset, i, j); |
|
lp_build_rgba8_to_fi32_soa(gallivm, |
type, |
tmp, |
rgba_out); |
|
return; |
} |
|
/* |
* Fallback to calling lp_build_fetch_rgba_aos for each pixel. |
* |
* This is not the most efficient way of fetching pixels, as we |
* miss some opportunities to do vectorization, but this is |
* convenient for formats or scenarios for which there was no |
* opportunity or incentive to optimize. |
*/ |
|
{ |
unsigned k, chan; |
struct lp_type tmp_type; |
|
if (gallivm_debug & GALLIVM_DEBUG_PERF) { |
debug_printf("%s: scalar unpacking of %s\n", |
__FUNCTION__, format_desc->short_name); |
} |
|
tmp_type = type; |
tmp_type.length = 4; |
|
for (chan = 0; chan < 4; ++chan) { |
rgba_out[chan] = lp_build_undef(gallivm, type); |
} |
|
/* loop over number of pixels */ |
for(k = 0; k < type.length; ++k) { |
LLVMValueRef index = lp_build_const_int32(gallivm, k); |
LLVMValueRef offset_elem; |
LLVMValueRef i_elem, j_elem; |
LLVMValueRef tmp; |
|
offset_elem = LLVMBuildExtractElement(builder, offset, |
index, ""); |
|
i_elem = LLVMBuildExtractElement(builder, i, index, ""); |
j_elem = LLVMBuildExtractElement(builder, j, index, ""); |
|
/* Get a single float[4]={R,G,B,A} pixel */ |
tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, |
base_ptr, offset_elem, |
i_elem, j_elem); |
|
/* |
* Insert the AoS tmp value channels into the SoA result vectors at |
* position = 'index'. |
*/ |
for (chan = 0; chan < 4; ++chan) { |
LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan), |
tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); |
rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], |
tmp_chan, index, ""); |
} |
} |
} |
} |