WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

Rev	Author	Line No.	Line
4358	Serge	1	/**************************************************************************
		2	*
		3	* Copyright 2010 VMware, Inc.
		4	* All Rights Reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the
		8	* "Software"), to deal in the Software without restriction, including
		9	* without limitation the rights to use, copy, modify, merge, publish,
		10	* distribute, sub license, and/or sell copies of the Software, and to
		11	* permit persons to whom the Software is furnished to do so, subject to
		12	* the following conditions:
		13	*
		14	* The above copyright notice and this permission notice (including the
		15	* next paragraph) shall be included in all copies or substantial portions
		16	* of the Software.
		17	*
		18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		19	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
		21	* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
		22	* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
		23	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
		24	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25	*
		26	**************************************************************************/
		27
		28	/**
		29	* @file
		30	* Texture sampling -- AoS.
		31	*
		32	* @author Jose Fonseca
		33	* @author Brian Paul
		34	*/
		35
		36	#include "pipe/p_defines.h"
		37	#include "pipe/p_state.h"
		38	#include "util/u_debug.h"
		39	#include "util/u_dump.h"
		40	#include "util/u_memory.h"
		41	#include "util/u_math.h"
		42	#include "util/u_format.h"
		43	#include "util/u_cpu_detect.h"
		44	#include "lp_bld_debug.h"
		45	#include "lp_bld_type.h"
		46	#include "lp_bld_const.h"
		47	#include "lp_bld_conv.h"
		48	#include "lp_bld_arit.h"
		49	#include "lp_bld_bitarit.h"
		50	#include "lp_bld_logic.h"
		51	#include "lp_bld_swizzle.h"
		52	#include "lp_bld_pack.h"
		53	#include "lp_bld_flow.h"
		54	#include "lp_bld_gather.h"
		55	#include "lp_bld_format.h"
		56	#include "lp_bld_init.h"
		57	#include "lp_bld_sample.h"
		58	#include "lp_bld_sample_aos.h"
		59	#include "lp_bld_quad.h"
		60
		61
		62	/**
		63	* Build LLVM code for texture coord wrapping, for nearest filtering,
		64	* for scaled integer texcoords.
		65	* \param block_length is the length of the pixel block along the
		66	* coordinate axis
		67	* \param coord the incoming texcoord (s,t or r) scaled to the texture size
		68	* \param coord_f the incoming texcoord (s,t or r) as float vec
		69	* \param length the texture size along one dimension
		70	* \param stride pixel stride along the coordinate axis (in bytes)
		71	* \param offset the texel offset along the coord axis
		72	* \param is_pot if TRUE, length is a power of two
		73	* \param wrap_mode one of PIPE_TEX_WRAP_x
		74	* \param out_offset byte offset for the wrapped coordinate
		75	* \param out_i resulting sub-block pixel coordinate for coord0
		76	*/
		77	static void
		78	lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
		79	unsigned block_length,
		80	LLVMValueRef coord,
		81	LLVMValueRef coord_f,
		82	LLVMValueRef length,
		83	LLVMValueRef stride,
		84	LLVMValueRef offset,
		85	boolean is_pot,
		86	unsigned wrap_mode,
		87	LLVMValueRef *out_offset,
		88	LLVMValueRef *out_i)
		89	{
		90	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		91	LLVMBuilderRef builder = bld->gallivm->builder;
		92	LLVMValueRef length_minus_one;
		93
		94	length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
		95
		96	switch(wrap_mode) {
		97	case PIPE_TEX_WRAP_REPEAT:
		98	if(is_pot)
		99	coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
		100	else {
		101	struct lp_build_context *coord_bld = &bld->coord_bld;
		102	LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
		103	if (offset) {
		104	offset = lp_build_int_to_float(coord_bld, offset);
		105	offset = lp_build_div(coord_bld, offset, length_f);
		106	coord_f = lp_build_add(coord_bld, coord_f, offset);
		107	}
		108	coord = lp_build_fract_safe(coord_bld, coord_f);
		109	coord = lp_build_mul(coord_bld, coord, length_f);
		110	coord = lp_build_itrunc(coord_bld, coord);
		111	}
		112	break;
		113
		114	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		115	coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
		116	coord = lp_build_min(int_coord_bld, coord, length_minus_one);
		117	break;
		118
		119	case PIPE_TEX_WRAP_CLAMP:
		120	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
		121	case PIPE_TEX_WRAP_MIRROR_REPEAT:
		122	case PIPE_TEX_WRAP_MIRROR_CLAMP:
		123	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
		124	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
		125	default:
		126	assert(0);
		127	}
		128
		129	lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
		130	out_offset, out_i);
		131	}
		132
		133
		134	/**
		135	* Build LLVM code for texture coord wrapping, for nearest filtering,
		136	* for float texcoords.
		137	* \param coord the incoming texcoord (s,t or r)
		138	* \param length the texture size along one dimension
		139	* \param offset the texel offset along the coord axis
		140	* \param is_pot if TRUE, length is a power of two
		141	* \param wrap_mode one of PIPE_TEX_WRAP_x
		142	* \param icoord the texcoord after wrapping, as int
		143	*/
		144	static void
		145	lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
		146	LLVMValueRef coord,
		147	LLVMValueRef length,
		148	LLVMValueRef offset,
		149	boolean is_pot,
		150	unsigned wrap_mode,
		151	LLVMValueRef *icoord)
		152	{
		153	struct lp_build_context *coord_bld = &bld->coord_bld;
		154	LLVMValueRef length_minus_one;
		155
		156	switch(wrap_mode) {
		157	case PIPE_TEX_WRAP_REPEAT:
		158	if (offset) {
		159	/* this is definitely not ideal for POT case */
		160	offset = lp_build_int_to_float(coord_bld, offset);
		161	offset = lp_build_div(coord_bld, offset, length);
		162	coord = lp_build_add(coord_bld, coord, offset);
		163	}
		164	/* take fraction, unnormalize */
		165	coord = lp_build_fract_safe(coord_bld, coord);
		166	coord = lp_build_mul(coord_bld, coord, length);
		167	*icoord = lp_build_itrunc(coord_bld, coord);
		168	break;
		169	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		170	length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
		171	if (bld->static_sampler_state->normalized_coords) {
		172	/* scale coord to length */
		173	coord = lp_build_mul(coord_bld, coord, length);
		174	}
		175	if (offset) {
		176	offset = lp_build_int_to_float(coord_bld, offset);
		177	coord = lp_build_add(coord_bld, coord, offset);
		178	}
		179	coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
		180	length_minus_one);
		181	*icoord = lp_build_itrunc(coord_bld, coord);
		182	break;
		183
		184	case PIPE_TEX_WRAP_CLAMP:
		185	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
		186	case PIPE_TEX_WRAP_MIRROR_REPEAT:
		187	case PIPE_TEX_WRAP_MIRROR_CLAMP:
		188	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
		189	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
		190	default:
		191	assert(0);
		192	}
		193	}
		194
		195
		196	/**
		197	* Build LLVM code for texture coord wrapping, for linear filtering,
		198	* for scaled integer texcoords.
		199	* \param block_length is the length of the pixel block along the
		200	* coordinate axis
		201	* \param coord0 the incoming texcoord (s,t or r) scaled to the texture size
		202	* \param coord_f the incoming texcoord (s,t or r) as float vec
		203	* \param length the texture size along one dimension
		204	* \param stride pixel stride along the coordinate axis (in bytes)
		205	* \param offset the texel offset along the coord axis
		206	* \param is_pot if TRUE, length is a power of two
		207	* \param wrap_mode one of PIPE_TEX_WRAP_x
		208	* \param offset0 resulting relative offset for coord0
		209	* \param offset1 resulting relative offset for coord0 + 1
		210	* \param i0 resulting sub-block pixel coordinate for coord0
		211	* \param i1 resulting sub-block pixel coordinate for coord0 + 1
		212	*/
		213	static void
		214	lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
		215	unsigned block_length,
		216	LLVMValueRef coord0,
		217	LLVMValueRef *weight_i,
		218	LLVMValueRef coord_f,
		219	LLVMValueRef length,
		220	LLVMValueRef stride,
		221	LLVMValueRef offset,
		222	boolean is_pot,
		223	unsigned wrap_mode,
		224	LLVMValueRef *offset0,
		225	LLVMValueRef *offset1,
		226	LLVMValueRef *i0,
		227	LLVMValueRef *i1)
		228	{
		229	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		230	LLVMBuilderRef builder = bld->gallivm->builder;
		231	LLVMValueRef length_minus_one;
		232	LLVMValueRef lmask, umask, mask;
		233
		234	/*
		235	* If the pixel block covers more than one pixel then there is no easy
		236	* way to calculate offset1 relative to offset0. Instead, compute them
		237	* independently. Otherwise, try to compute offset0 and offset1 with
		238	* a single stride multiplication.
		239	*/
		240
		241	length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
		242
		243	if (block_length != 1) {
		244	LLVMValueRef coord1;
		245	switch(wrap_mode) {
		246	case PIPE_TEX_WRAP_REPEAT:
		247	if (is_pot) {
		248	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		249	coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
		250	coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
		251	}
		252	else {
		253	LLVMValueRef mask;
		254	LLVMValueRef weight;
		255	LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
		256	if (offset) {
		257	offset = lp_build_int_to_float(&bld->coord_bld, offset);
		258	offset = lp_build_div(&bld->coord_bld, offset, length_f);
		259	coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
		260	}
		261	lp_build_coord_repeat_npot_linear(bld, coord_f,
		262	length, length_f,
		263	&coord0, &weight);
		264	mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
		265	PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
		266	coord1 = LLVMBuildAnd(builder,
		267	lp_build_add(int_coord_bld, coord0,
		268	int_coord_bld->one),
		269	mask, "");
		270	weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
		271	*weight_i = lp_build_itrunc(&bld->coord_bld, weight);
		272	}
		273	break;
		274
		275	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		276	coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
		277	coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
		278	length_minus_one);
		279	coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
		280	length_minus_one);
		281	break;
		282
		283	case PIPE_TEX_WRAP_CLAMP:
		284	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
		285	case PIPE_TEX_WRAP_MIRROR_REPEAT:
		286	case PIPE_TEX_WRAP_MIRROR_CLAMP:
		287	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
		288	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
		289	default:
		290	assert(0);
		291	coord0 = int_coord_bld->zero;
		292	coord1 = int_coord_bld->zero;
		293	break;
		294	}
		295	lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
		296	offset0, i0);
		297	lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
		298	offset1, i1);
		299	return;
		300	}
		301
		302	*i0 = int_coord_bld->zero;
		303	*i1 = int_coord_bld->zero;
		304
		305	switch(wrap_mode) {
		306	case PIPE_TEX_WRAP_REPEAT:
		307	if (is_pot) {
		308	coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
		309	}
		310	else {
		311	LLVMValueRef weight;
		312	LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
		313	if (offset) {
		314	offset = lp_build_int_to_float(&bld->coord_bld, offset);
		315	offset = lp_build_div(&bld->coord_bld, offset, length_f);
		316	coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
		317	}
		318	lp_build_coord_repeat_npot_linear(bld, coord_f,
		319	length, length_f,
		320	&coord0, &weight);
		321	weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
		322	*weight_i = lp_build_itrunc(&bld->coord_bld, weight);
		323	}
		324
		325	mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
		326	PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
		327
		328	*offset0 = lp_build_mul(int_coord_bld, coord0, stride);
		329	*offset1 = LLVMBuildAnd(builder,
		330	lp_build_add(int_coord_bld, *offset0, stride),
		331	mask, "");
		332	break;
		333
		334	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		335	/* XXX this might be slower than the separate path
		336	* on some newer cpus. With sse41 this is 8 instructions vs. 7
		337	* - at least on SNB this is almost certainly slower since
		338	* min/max are cheaper than selects, and the muls aren't bad.
		339	*/
		340	lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
		341	PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
		342	umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
		343	PIPE_FUNC_LESS, coord0, length_minus_one);
		344
		345	coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
		346	coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
		347
		348	mask = LLVMBuildAnd(builder, lmask, umask, "");
		349
		350	*offset0 = lp_build_mul(int_coord_bld, coord0, stride);
		351	*offset1 = lp_build_add(int_coord_bld,
		352	*offset0,
		353	LLVMBuildAnd(builder, stride, mask, ""));
		354	break;
		355
		356	case PIPE_TEX_WRAP_CLAMP:
		357	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
		358	case PIPE_TEX_WRAP_MIRROR_REPEAT:
		359	case PIPE_TEX_WRAP_MIRROR_CLAMP:
		360	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
		361	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
		362	default:
		363	assert(0);
		364	*offset0 = int_coord_bld->zero;
		365	*offset1 = int_coord_bld->zero;
		366	break;
		367	}
		368	}
		369
		370
		371	/**
		372	* Build LLVM code for texture coord wrapping, for linear filtering,
		373	* for float texcoords.
		374	* \param block_length is the length of the pixel block along the
		375	* coordinate axis
		376	* \param coord the incoming texcoord (s,t or r)
		377	* \param length the texture size along one dimension
		378	* \param offset the texel offset along the coord axis
		379	* \param is_pot if TRUE, length is a power of two
		380	* \param wrap_mode one of PIPE_TEX_WRAP_x
		381	* \param coord0 the first texcoord after wrapping, as int
		382	* \param coord1 the second texcoord after wrapping, as int
		383	* \param weight the filter weight as int (0-255)
		384	* \param force_nearest if this coord actually uses nearest filtering
		385	*/
		386	static void
		387	lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
		388	unsigned block_length,
		389	LLVMValueRef coord,
		390	LLVMValueRef length,
		391	LLVMValueRef offset,
		392	boolean is_pot,
		393	unsigned wrap_mode,
		394	LLVMValueRef *coord0,
		395	LLVMValueRef *coord1,
		396	LLVMValueRef *weight,
		397	unsigned force_nearest)
		398	{
		399	struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
		400	struct lp_build_context *coord_bld = &bld->coord_bld;
		401	LLVMBuilderRef builder = bld->gallivm->builder;
		402	LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
		403	LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
		404
		405	switch(wrap_mode) {
		406	case PIPE_TEX_WRAP_REPEAT:
		407	if (is_pot) {
		408	/* mul by size and subtract 0.5 */
		409	coord = lp_build_mul(coord_bld, coord, length);
		410	if (offset) {
		411	offset = lp_build_int_to_float(coord_bld, offset);
		412	coord = lp_build_add(coord_bld, coord, offset);
		413	}
		414	if (!force_nearest)
		415	coord = lp_build_sub(coord_bld, coord, half);
		416	*coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
		417	/* convert to int, compute lerp weight */
		418	lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
		419	coord1 = lp_build_ifloor(coord_bld, coord1);
		420	/* repeat wrap */
		421	length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
		422	coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
		423	coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
		424	}
		425	else {
		426	LLVMValueRef mask;
		427	if (offset) {
		428	offset = lp_build_int_to_float(coord_bld, offset);
		429	offset = lp_build_div(coord_bld, offset, length);
		430	coord = lp_build_add(coord_bld, coord, offset);
		431	}
		432	/* wrap with normalized floats is just fract */
		433	coord = lp_build_fract(coord_bld, coord);
		434	/* unnormalize */
		435	coord = lp_build_mul(coord_bld, coord, length);
		436	/*
		437	* we avoided the 0.5/length division, have to fix up wrong
		438	* edge cases with selects
		439	*/
		440	*coord1 = lp_build_add(coord_bld, coord, half);
		441	coord = lp_build_sub(coord_bld, coord, half);
		442	*weight = lp_build_fract(coord_bld, coord);
		443	mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
		444	PIPE_FUNC_LESS, coord, coord_bld->zero);
		445	*coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
		446	coord0 = lp_build_itrunc(coord_bld, coord0);
		447	mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
		448	PIPE_FUNC_LESS, *coord1, length);
		449	coord1 = lp_build_select(coord_bld, mask, coord1, coord_bld->zero);
		450	coord1 = lp_build_itrunc(coord_bld, coord1);
		451	}
		452	break;
		453	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
		454	if (bld->static_sampler_state->normalized_coords) {
		455	/* mul by tex size */
		456	coord = lp_build_mul(coord_bld, coord, length);
		457	}
		458	if (offset) {
		459	offset = lp_build_int_to_float(coord_bld, offset);
		460	coord = lp_build_add(coord_bld, coord, offset);
		461	}
		462	/* subtract 0.5 */
		463	if (!force_nearest) {
		464	coord = lp_build_sub(coord_bld, coord, half);
		465	}
		466	/* clamp to [0, length - 1] */
		467	coord = lp_build_min(coord_bld, coord, length_minus_one);
		468	coord = lp_build_max(coord_bld, coord, coord_bld->zero);
		469	*coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
		470	/* convert to int, compute lerp weight */
		471	lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
		472	/* coord1 = min(coord1, length-1) */
		473	coord1 = lp_build_min(coord_bld, coord1, length_minus_one);
		474	coord1 = lp_build_itrunc(coord_bld, coord1);
		475	break;
		476	default:
		477	assert(0);
		478	*coord0 = int_coord_bld->zero;
		479	*coord1 = int_coord_bld->zero;
		480	*weight = coord_bld->zero;
		481	break;
		482	}
		483	weight = lp_build_mul_imm(coord_bld, weight, 256);
		484	weight = lp_build_itrunc(coord_bld, weight);
		485	return;
		486	}
		487
		488
		489	/**
		490	* Fetch texels for image with nearest sampling.
		491	* Return filtered color as two vectors of 16-bit fixed point values.
		492	*/
		493	static void
		494	lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
		495	LLVMValueRef data_ptr,
		496	LLVMValueRef offset,
		497	LLVMValueRef x_subcoord,
		498	LLVMValueRef y_subcoord,
		499	LLVMValueRef *colors)
		500	{
		501	/*
		502	* Fetch the pixels as 4 x 32bit (rgba order might differ):
		503	*
		504	* rgba0 rgba1 rgba2 rgba3
		505	*
		506	* bit cast them into 16 x u8
		507	*
		508	* r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
		509	*
		510	* unpack them into two 8 x i16:
		511	*
		512	* r0 g0 b0 a0 r1 g1 b1 a1
		513	* r2 g2 b2 a2 r3 g3 b3 a3
		514	*
		515	* The higher 8 bits of the resulting elements will be zero.
		516	*/
		517	LLVMBuilderRef builder = bld->gallivm->builder;
		518	LLVMValueRef rgba8;
		519	struct lp_build_context u8n;
		520	LLVMTypeRef u8n_vec_type;
		521
		522	lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
		523	u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
		524
		525	if (util_format_is_rgba8_variant(bld->format_desc)) {
		526	/*
		527	* Given the format is a rgba8, just read the pixels as is,
		528	* without any swizzling. Swizzling will be done later.
		529	*/
		530	rgba8 = lp_build_gather(bld->gallivm,
		531	bld->texel_type.length,
		532	bld->format_desc->block.bits,
		533	bld->texel_type.width,
		534	data_ptr, offset, TRUE);
		535
		536	rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
		537	}
		538	else {
		539	rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
		540	bld->format_desc,
		541	u8n.type,
		542	data_ptr, offset,
		543	x_subcoord,
		544	y_subcoord);
		545	}
		546
		547	*colors = rgba8;
		548	}
		549
		550
		551	/**
		552	* Sample a single texture image with nearest sampling.
		553	* If sampling a cube texture, r = cube face in [0,5].
		554	* Return filtered color as two vectors of 16-bit fixed point values.
		555	*/
		556	static void
		557	lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
		558	LLVMValueRef int_size,
		559	LLVMValueRef row_stride_vec,
		560	LLVMValueRef img_stride_vec,
		561	LLVMValueRef data_ptr,
		562	LLVMValueRef mipoffsets,
		563	LLVMValueRef s,
		564	LLVMValueRef t,
		565	LLVMValueRef r,
		566	const LLVMValueRef *offsets,
		567	LLVMValueRef *colors)
		568	{
		569	const unsigned dims = bld->dims;
		570	LLVMBuilderRef builder = bld->gallivm->builder;
		571	struct lp_build_context i32;
		572	LLVMTypeRef i32_vec_type;
		573	LLVMValueRef i32_c8;
		574	LLVMValueRef width_vec, height_vec, depth_vec;
		575	LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
		576	LLVMValueRef s_float, t_float = NULL, r_float = NULL;
		577	LLVMValueRef x_stride;
		578	LLVMValueRef x_offset, offset;
		579	LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
		580
		581	lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
		582
		583	i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
		584
		585	lp_build_extract_image_sizes(bld,
		586	&bld->int_size_bld,
		587	bld->int_coord_type,
		588	int_size,
		589	&width_vec,
		590	&height_vec,
		591	&depth_vec);
		592
		593	s_float = s; t_float = t; r_float = r;
		594
		595	if (bld->static_sampler_state->normalized_coords) {
		596	LLVMValueRef scaled_size;
		597	LLVMValueRef flt_size;
		598
		599	/* scale size by 256 (8 fractional bits) */
		600	scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
		601
		602	flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
		603
		604	lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
		605	}
		606	else {
		607	/* scale coords by 256 (8 fractional bits) */
		608	s = lp_build_mul_imm(&bld->coord_bld, s, 256);
		609	if (dims >= 2)
		610	t = lp_build_mul_imm(&bld->coord_bld, t, 256);
		611	if (dims >= 3)
		612	r = lp_build_mul_imm(&bld->coord_bld, r, 256);
		613	}
		614
		615	/* convert float to int */
		616	s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
		617	if (dims >= 2)
		618	t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
		619	if (dims >= 3)
		620	r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
		621
		622	/* compute floor (shift right 8) */
		623	i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
		624	s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
		625	if (dims >= 2)
		626	t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
		627	if (dims >= 3)
		628	r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
		629
		630	/* add texel offsets */
		631	if (offsets[0]) {
		632	s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
		633	if (dims >= 2) {
		634	t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
		635	if (dims >= 3) {
		636	r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
		637	}
		638	}
		639	}
		640
		641	/* get pixel, row, image strides */
		642	x_stride = lp_build_const_vec(bld->gallivm,
		643	bld->int_coord_bld.type,
		644	bld->format_desc->block.bits/8);
		645
		646	/* Do texcoord wrapping, compute texel offset */
		647	lp_build_sample_wrap_nearest_int(bld,
		648	bld->format_desc->block.width,
		649	s_ipart, s_float,
		650	width_vec, x_stride, offsets[0],
		651	bld->static_texture_state->pot_width,
		652	bld->static_sampler_state->wrap_s,
		653	&x_offset, &x_subcoord);
		654	offset = x_offset;
		655	if (dims >= 2) {
		656	LLVMValueRef y_offset;
		657	lp_build_sample_wrap_nearest_int(bld,
		658	bld->format_desc->block.height,
		659	t_ipart, t_float,
		660	height_vec, row_stride_vec, offsets[1],
		661	bld->static_texture_state->pot_height,
		662	bld->static_sampler_state->wrap_t,
		663	&y_offset, &y_subcoord);
		664	offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
		665	if (dims >= 3) {
		666	LLVMValueRef z_offset;
		667	lp_build_sample_wrap_nearest_int(bld,
		668	1, /* block length (depth) */
		669	r_ipart, r_float,
		670	depth_vec, img_stride_vec, offsets[2],
		671	bld->static_texture_state->pot_depth,
		672	bld->static_sampler_state->wrap_r,
		673	&z_offset, &z_subcoord);
		674	offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
		675	}
		676	}
		677	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		678	bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY \|\|
		679	bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
		680	LLVMValueRef z_offset;
		681	/* The r coord is the cube face in [0,5] or array layer */
		682	z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
		683	offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
		684	}
		685	if (mipoffsets) {
		686	offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
		687	}
		688
		689	lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
		690	x_subcoord, y_subcoord,
		691	colors);
		692	}
		693
		694
		695	/**
		696	* Sample a single texture image with nearest sampling.
		697	* If sampling a cube texture, r = cube face in [0,5].
		698	* Return filtered color as two vectors of 16-bit fixed point values.
		699	* Does address calcs (except offsets) with floats.
		700	* Useful for AVX which has support for 8x32 floats but not 8x32 ints.
		701	*/
		702	static void
		703	lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
		704	LLVMValueRef int_size,
		705	LLVMValueRef row_stride_vec,
		706	LLVMValueRef img_stride_vec,
		707	LLVMValueRef data_ptr,
		708	LLVMValueRef mipoffsets,
		709	LLVMValueRef s,
		710	LLVMValueRef t,
		711	LLVMValueRef r,
		712	const LLVMValueRef *offsets,
		713	LLVMValueRef *colors)
		714	{
		715	const unsigned dims = bld->dims;
		716	LLVMValueRef width_vec, height_vec, depth_vec;
		717	LLVMValueRef offset;
		718	LLVMValueRef x_subcoord, y_subcoord;
		719	LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
		720	LLVMValueRef flt_size;
		721
		722	flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
		723
		724	lp_build_extract_image_sizes(bld,
		725	&bld->float_size_bld,
		726	bld->coord_type,
		727	flt_size,
		728	&width_vec,
		729	&height_vec,
		730	&depth_vec);
		731
		732	/* Do texcoord wrapping */
		733	lp_build_sample_wrap_nearest_float(bld,
		734	s, width_vec, offsets[0],
		735	bld->static_texture_state->pot_width,
		736	bld->static_sampler_state->wrap_s,
		737	&x_icoord);
		738
		739	if (dims >= 2) {
		740	lp_build_sample_wrap_nearest_float(bld,
		741	t, height_vec, offsets[1],
		742	bld->static_texture_state->pot_height,
		743	bld->static_sampler_state->wrap_t,
		744	&y_icoord);
		745
		746	if (dims >= 3) {
		747	lp_build_sample_wrap_nearest_float(bld,
		748	r, depth_vec, offsets[2],
		749	bld->static_texture_state->pot_depth,
		750	bld->static_sampler_state->wrap_r,
		751	&z_icoord);
		752	}
		753	}
		754	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		755	bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY \|\|
		756	bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
		757	z_icoord = r;
		758	}
		759
		760	/*
		761	* From here on we deal with ints, and we should split up the 256bit
		762	* vectors manually for better generated code.
		763	*/
		764
		765	/*
		766	* compute texel offsets -
		767	* cannot do offset calc with floats, difficult for block-based formats,
		768	* and not enough precision anyway.
		769	*/
		770	lp_build_sample_offset(&bld->int_coord_bld,
		771	bld->format_desc,
		772	x_icoord, y_icoord,
		773	z_icoord,
		774	row_stride_vec, img_stride_vec,
		775	&offset,
		776	&x_subcoord, &y_subcoord);
		777	if (mipoffsets) {
		778	offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
		779	}
		780
		781	lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
		782	x_subcoord, y_subcoord,
		783	colors);
		784	}
		785
		786
		787	/**
		788	* Fetch texels for image with linear sampling.
		789	* Return filtered color as two vectors of 16-bit fixed point values.
		790	*/
		791	static void
		792	lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
		793	LLVMValueRef data_ptr,
		794	LLVMValueRef offset[2][2][2],
		795	LLVMValueRef x_subcoord[2],
		796	LLVMValueRef y_subcoord[2],
		797	LLVMValueRef s_fpart,
		798	LLVMValueRef t_fpart,
		799	LLVMValueRef r_fpart,
		800	LLVMValueRef *colors)
		801	{
		802	const unsigned dims = bld->dims;
		803	LLVMBuilderRef builder = bld->gallivm->builder;
		804	struct lp_build_context u8n;
		805	LLVMTypeRef u8n_vec_type;
		806	LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
		807	LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
		808	LLVMValueRef shuffle;
		809	LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
		810	LLVMValueRef packed;
		811	unsigned i, j, k;
		812	unsigned numj, numk;
		813
		814	lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
		815	u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
		816
		817	/*
		818	* Transform 4 x i32 in
		819	*
		820	* s_fpart = {s0, s1, s2, s3}
		821	*
		822	* where each value is between 0 and 0xff,
		823	*
		824	* into one 16 x i20
		825	*
		826	* s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
		827	*
		828	* and likewise for t_fpart. There is no risk of loosing precision here
		829	* since the fractional parts only use the lower 8bits.
		830	*/
		831	s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
		832	if (dims >= 2)
		833	t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
		834	if (dims >= 3)
		835	r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
		836
		837	for (j = 0; j < u8n.type.length; j += 4) {
		838	#ifdef PIPE_ARCH_LITTLE_ENDIAN
		839	unsigned subindex = 0;
		840	#else
		841	unsigned subindex = 3;
		842	#endif
		843	LLVMValueRef index;
		844
		845	index = LLVMConstInt(elem_type, j + subindex, 0);
		846	for (i = 0; i < 4; ++i)
		847	shuffles[j + i] = index;
		848	}
		849
		850	shuffle = LLVMConstVector(shuffles, u8n.type.length);
		851
		852	s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
		853	shuffle, "");
		854	if (dims >= 2) {
		855	t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
		856	shuffle, "");
		857	}
		858	if (dims >= 3) {
		859	r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
		860	shuffle, "");
		861	}
		862
		863	/*
		864	* Fetch the pixels as 4 x 32bit (rgba order might differ):
		865	*
		866	* rgba0 rgba1 rgba2 rgba3
		867	*
		868	* bit cast them into 16 x u8
		869	*
		870	* r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
		871	*
		872	* unpack them into two 8 x i16:
		873	*
		874	* r0 g0 b0 a0 r1 g1 b1 a1
		875	* r2 g2 b2 a2 r3 g3 b3 a3
		876	*
		877	* The higher 8 bits of the resulting elements will be zero.
		878	*/
		879	numj = 1 + (dims >= 2);
		880	numk = 1 + (dims >= 3);
		881
		882	for (k = 0; k < numk; k++) {
		883	for (j = 0; j < numj; j++) {
		884	for (i = 0; i < 2; i++) {
		885	LLVMValueRef rgba8;
		886
		887	if (util_format_is_rgba8_variant(bld->format_desc)) {
		888	/*
		889	* Given the format is a rgba8, just read the pixels as is,
		890	* without any swizzling. Swizzling will be done later.
		891	*/
		892	rgba8 = lp_build_gather(bld->gallivm,
		893	bld->texel_type.length,
		894	bld->format_desc->block.bits,
		895	bld->texel_type.width,
		896	data_ptr, offset[k][j][i], TRUE);
		897
		898	rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
		899	}
		900	else {
		901	rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
		902	bld->format_desc,
		903	u8n.type,
		904	data_ptr, offset[k][j][i],
		905	x_subcoord[i],
		906	y_subcoord[j]);
		907	}
		908
		909	neighbors[k][j][i] = rgba8;
		910	}
		911	}
		912	}
		913
		914	/*
		915	* Linear interpolation with 8.8 fixed point.
		916	*/
		917	if (bld->static_sampler_state->force_nearest_s) {
		918	/* special case 1-D lerp */
		919	packed = lp_build_lerp(&u8n,
		920	t_fpart,
		921	neighbors[0][0][0],
		922	neighbors[0][0][1],
		923	LP_BLD_LERP_PRESCALED_WEIGHTS);
		924	}
		925	else if (bld->static_sampler_state->force_nearest_t) {
		926	/* special case 1-D lerp */
		927	packed = lp_build_lerp(&u8n,
		928	s_fpart,
		929	neighbors[0][0][0],
		930	neighbors[0][0][1],
		931	LP_BLD_LERP_PRESCALED_WEIGHTS);
		932	}
		933	else {
		934	/* general 1/2/3-D lerping */
		935	if (dims == 1) {
		936	packed = lp_build_lerp(&u8n,
		937	s_fpart,
		938	neighbors[0][0][0],
		939	neighbors[0][0][1],
		940	LP_BLD_LERP_PRESCALED_WEIGHTS);
		941	} else if (dims == 2) {
		942	/* 2-D lerp */
		943	packed = lp_build_lerp_2d(&u8n,
		944	s_fpart, t_fpart,
		945	neighbors[0][0][0],
		946	neighbors[0][0][1],
		947	neighbors[0][1][0],
		948	neighbors[0][1][1],
		949	LP_BLD_LERP_PRESCALED_WEIGHTS);
		950	} else {
		951	/* 3-D lerp */
		952	assert(dims == 3);
		953	packed = lp_build_lerp_3d(&u8n,
		954	s_fpart, t_fpart, r_fpart,
		955	neighbors[0][0][0],
		956	neighbors[0][0][1],
		957	neighbors[0][1][0],
		958	neighbors[0][1][1],
		959	neighbors[1][0][0],
		960	neighbors[1][0][1],
		961	neighbors[1][1][0],
		962	neighbors[1][1][1],
		963	LP_BLD_LERP_PRESCALED_WEIGHTS);
		964	}
		965	}
		966
		967	*colors = packed;
		968	}
		969
		970	/**
		971	* Sample a single texture image with (bi-)(tri-)linear sampling.
		972	* Return filtered color as two vectors of 16-bit fixed point values.
		973	*/
		974	static void
		975	lp_build_sample_image_linear(struct lp_build_sample_context *bld,
		976	LLVMValueRef int_size,
		977	LLVMValueRef row_stride_vec,
		978	LLVMValueRef img_stride_vec,
		979	LLVMValueRef data_ptr,
		980	LLVMValueRef mipoffsets,
		981	LLVMValueRef s,
		982	LLVMValueRef t,
		983	LLVMValueRef r,
		984	const LLVMValueRef *offsets,
		985	LLVMValueRef *colors)
		986	{
		987	const unsigned dims = bld->dims;
		988	LLVMBuilderRef builder = bld->gallivm->builder;
		989	struct lp_build_context i32;
		990	LLVMTypeRef i32_vec_type;
		991	LLVMValueRef i32_c8, i32_c128, i32_c255;
		992	LLVMValueRef width_vec, height_vec, depth_vec;
		993	LLVMValueRef s_ipart, s_fpart, s_float;
		994	LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
		995	LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
		996	LLVMValueRef x_stride, y_stride, z_stride;
		997	LLVMValueRef x_offset0, x_offset1;
		998	LLVMValueRef y_offset0, y_offset1;
		999	LLVMValueRef z_offset0, z_offset1;
		1000	LLVMValueRef offset[2][2][2]; /* [z][y][x] */
		1001	LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
		1002	unsigned x, y, z;
		1003
		1004	lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
		1005
		1006	i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
		1007
		1008	lp_build_extract_image_sizes(bld,
		1009	&bld->int_size_bld,
		1010	bld->int_coord_type,
		1011	int_size,
		1012	&width_vec,
		1013	&height_vec,
		1014	&depth_vec);
		1015
		1016	s_float = s; t_float = t; r_float = r;
		1017
		1018	if (bld->static_sampler_state->normalized_coords) {
		1019	LLVMValueRef scaled_size;
		1020	LLVMValueRef flt_size;
		1021
		1022	/* scale size by 256 (8 fractional bits) */
		1023	scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
		1024
		1025	flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
		1026
		1027	lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
		1028	}
		1029	else {
		1030	/* scale coords by 256 (8 fractional bits) */
		1031	s = lp_build_mul_imm(&bld->coord_bld, s, 256);
		1032	if (dims >= 2)
		1033	t = lp_build_mul_imm(&bld->coord_bld, t, 256);
		1034	if (dims >= 3)
		1035	r = lp_build_mul_imm(&bld->coord_bld, r, 256);
		1036	}
		1037
		1038	/* convert float to int */
		1039	s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
		1040	if (dims >= 2)
		1041	t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
		1042	if (dims >= 3)
		1043	r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
		1044
		1045	/* subtract 0.5 (add -128) */
		1046	i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
		1047	if (!bld->static_sampler_state->force_nearest_s) {
		1048	s = LLVMBuildAdd(builder, s, i32_c128, "");
		1049	}
		1050	if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
		1051	t = LLVMBuildAdd(builder, t, i32_c128, "");
		1052	}
		1053	if (dims >= 3) {
		1054	r = LLVMBuildAdd(builder, r, i32_c128, "");
		1055	}
		1056
		1057	/* compute floor (shift right 8) */
		1058	i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
		1059	s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
		1060	if (dims >= 2)
		1061	t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
		1062	if (dims >= 3)
		1063	r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
		1064
		1065	/* add texel offsets */
		1066	if (offsets[0]) {
		1067	s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
		1068	if (dims >= 2) {
		1069	t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
		1070	if (dims >= 3) {
		1071	r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
		1072	}
		1073	}
		1074	}
		1075
		1076	/* compute fractional part (AND with 0xff) */
		1077	i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
		1078	s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
		1079	if (dims >= 2)
		1080	t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
		1081	if (dims >= 3)
		1082	r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
		1083
		1084	/* get pixel, row and image strides */
		1085	x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
		1086	bld->format_desc->block.bits/8);
		1087	y_stride = row_stride_vec;
		1088	z_stride = img_stride_vec;
		1089
		1090	/* do texcoord wrapping and compute texel offsets */
		1091	lp_build_sample_wrap_linear_int(bld,
		1092	bld->format_desc->block.width,
		1093	s_ipart, &s_fpart, s_float,
		1094	width_vec, x_stride, offsets[0],
		1095	bld->static_texture_state->pot_width,
		1096	bld->static_sampler_state->wrap_s,
		1097	&x_offset0, &x_offset1,
		1098	&x_subcoord[0], &x_subcoord[1]);
		1099
		1100	/* add potential cube/array/mip offsets now as they are constant per pixel */
		1101	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		1102	bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY \|\|
		1103	bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
		1104	LLVMValueRef z_offset;
		1105	z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
		1106	/* The r coord is the cube face in [0,5] or array layer */
		1107	x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
		1108	x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
		1109	}
		1110	if (mipoffsets) {
		1111	x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
		1112	x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
		1113	}
		1114
		1115	for (z = 0; z < 2; z++) {
		1116	for (y = 0; y < 2; y++) {
		1117	offset[z][y][0] = x_offset0;
		1118	offset[z][y][1] = x_offset1;
		1119	}
		1120	}
		1121
		1122	if (dims >= 2) {
		1123	lp_build_sample_wrap_linear_int(bld,
		1124	bld->format_desc->block.height,
		1125	t_ipart, &t_fpart, t_float,
		1126	height_vec, y_stride, offsets[1],
		1127	bld->static_texture_state->pot_height,
		1128	bld->static_sampler_state->wrap_t,
		1129	&y_offset0, &y_offset1,
		1130	&y_subcoord[0], &y_subcoord[1]);
		1131
		1132	for (z = 0; z < 2; z++) {
		1133	for (x = 0; x < 2; x++) {
		1134	offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
		1135	offset[z][0][x], y_offset0);
		1136	offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
		1137	offset[z][1][x], y_offset1);
		1138	}
		1139	}
		1140	}
		1141
		1142	if (dims >= 3) {
		1143	lp_build_sample_wrap_linear_int(bld,
		1144	bld->format_desc->block.height,
		1145	r_ipart, &r_fpart, r_float,
		1146	depth_vec, z_stride, offsets[2],
		1147	bld->static_texture_state->pot_depth,
		1148	bld->static_sampler_state->wrap_r,
		1149	&z_offset0, &z_offset1,
		1150	&z_subcoord[0], &z_subcoord[1]);
		1151	for (y = 0; y < 2; y++) {
		1152	for (x = 0; x < 2; x++) {
		1153	offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
		1154	offset[0][y][x], z_offset0);
		1155	offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
		1156	offset[1][y][x], z_offset1);
		1157	}
		1158	}
		1159	}
		1160
		1161	lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
		1162	x_subcoord, y_subcoord,
		1163	s_fpart, t_fpart, r_fpart,
		1164	colors);
		1165	}
		1166
		1167
		1168	/**
		1169	* Sample a single texture image with (bi-)(tri-)linear sampling.
		1170	* Return filtered color as two vectors of 16-bit fixed point values.
		1171	* Does address calcs (except offsets) with floats.
		1172	* Useful for AVX which has support for 8x32 floats but not 8x32 ints.
		1173	*/
		1174	static void
		1175	lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
		1176	LLVMValueRef int_size,
		1177	LLVMValueRef row_stride_vec,
		1178	LLVMValueRef img_stride_vec,
		1179	LLVMValueRef data_ptr,
		1180	LLVMValueRef mipoffsets,
		1181	LLVMValueRef s,
		1182	LLVMValueRef t,
		1183	LLVMValueRef r,
		1184	const LLVMValueRef *offsets,
		1185	LLVMValueRef *colors)
		1186	{
		1187	const unsigned dims = bld->dims;
		1188	LLVMValueRef width_vec, height_vec, depth_vec;
		1189	LLVMValueRef s_fpart;
		1190	LLVMValueRef t_fpart = NULL;
		1191	LLVMValueRef r_fpart = NULL;
		1192	LLVMValueRef x_stride, y_stride, z_stride;
		1193	LLVMValueRef x_offset0, x_offset1;
		1194	LLVMValueRef y_offset0, y_offset1;
		1195	LLVMValueRef z_offset0, z_offset1;
		1196	LLVMValueRef offset[2][2][2]; /* [z][y][x] */
		1197	LLVMValueRef x_subcoord[2], y_subcoord[2];
		1198	LLVMValueRef flt_size;
		1199	LLVMValueRef x_icoord0, x_icoord1;
		1200	LLVMValueRef y_icoord0, y_icoord1;
		1201	LLVMValueRef z_icoord0, z_icoord1;
		1202	unsigned x, y, z;
		1203
		1204	flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
		1205
		1206	lp_build_extract_image_sizes(bld,
		1207	&bld->float_size_bld,
		1208	bld->coord_type,
		1209	flt_size,
		1210	&width_vec,
		1211	&height_vec,
		1212	&depth_vec);
		1213
		1214	/* do texcoord wrapping and compute texel offsets */
		1215	lp_build_sample_wrap_linear_float(bld,
		1216	bld->format_desc->block.width,
		1217	s, width_vec, offsets[0],
		1218	bld->static_texture_state->pot_width,
		1219	bld->static_sampler_state->wrap_s,
		1220	&x_icoord0, &x_icoord1,
		1221	&s_fpart,
		1222	bld->static_sampler_state->force_nearest_s);
		1223
		1224	if (dims >= 2) {
		1225	lp_build_sample_wrap_linear_float(bld,
		1226	bld->format_desc->block.height,
		1227	t, height_vec, offsets[1],
		1228	bld->static_texture_state->pot_height,
		1229	bld->static_sampler_state->wrap_t,
		1230	&y_icoord0, &y_icoord1,
		1231	&t_fpart,
		1232	bld->static_sampler_state->force_nearest_t);
		1233
		1234	if (dims >= 3) {
		1235	lp_build_sample_wrap_linear_float(bld,
		1236	bld->format_desc->block.height,
		1237	r, depth_vec, offsets[2],
		1238	bld->static_texture_state->pot_depth,
		1239	bld->static_sampler_state->wrap_r,
		1240	&z_icoord0, &z_icoord1,
		1241	&r_fpart, 0);
		1242	}
		1243	}
		1244
		1245	/*
		1246	* From here on we deal with ints, and we should split up the 256bit
		1247	* vectors manually for better generated code.
		1248	*/
		1249
		1250	/* get pixel, row and image strides */
		1251	x_stride = lp_build_const_vec(bld->gallivm,
		1252	bld->int_coord_bld.type,
		1253	bld->format_desc->block.bits/8);
		1254	y_stride = row_stride_vec;
		1255	z_stride = img_stride_vec;
		1256
		1257	/*
		1258	* compute texel offset -
		1259	* cannot do offset calc with floats, difficult for block-based formats,
		1260	* and not enough precision anyway.
		1261	*/
		1262	lp_build_sample_partial_offset(&bld->int_coord_bld,
		1263	bld->format_desc->block.width,
		1264	x_icoord0, x_stride,
		1265	&x_offset0, &x_subcoord[0]);
		1266	lp_build_sample_partial_offset(&bld->int_coord_bld,
		1267	bld->format_desc->block.width,
		1268	x_icoord1, x_stride,
		1269	&x_offset1, &x_subcoord[1]);
		1270
		1271	/* add potential cube/array/mip offsets now as they are constant per pixel */
		1272	if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE \|\|
		1273	bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY \|\|
		1274	bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
		1275	LLVMValueRef z_offset;
		1276	z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
		1277	/* The r coord is the cube face in [0,5] or array layer */
		1278	x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
		1279	x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
		1280	}
		1281	if (mipoffsets) {
		1282	x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
		1283	x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
		1284	}
		1285
		1286	for (z = 0; z < 2; z++) {
		1287	for (y = 0; y < 2; y++) {
		1288	offset[z][y][0] = x_offset0;
		1289	offset[z][y][1] = x_offset1;
		1290	}
		1291	}
		1292
		1293	if (dims >= 2) {
		1294	lp_build_sample_partial_offset(&bld->int_coord_bld,
		1295	bld->format_desc->block.height,
		1296	y_icoord0, y_stride,
		1297	&y_offset0, &y_subcoord[0]);
		1298	lp_build_sample_partial_offset(&bld->int_coord_bld,
		1299	bld->format_desc->block.height,
		1300	y_icoord1, y_stride,
		1301	&y_offset1, &y_subcoord[1]);
		1302	for (z = 0; z < 2; z++) {
		1303	for (x = 0; x < 2; x++) {
		1304	offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
		1305	offset[z][0][x], y_offset0);
		1306	offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
		1307	offset[z][1][x], y_offset1);
		1308	}
		1309	}
		1310	}
		1311
		1312	if (dims >= 3) {
		1313	LLVMValueRef z_subcoord[2];
		1314	lp_build_sample_partial_offset(&bld->int_coord_bld,
		1315	1,
		1316	z_icoord0, z_stride,
		1317	&z_offset0, &z_subcoord[0]);
		1318	lp_build_sample_partial_offset(&bld->int_coord_bld,
		1319	1,
		1320	z_icoord1, z_stride,
		1321	&z_offset1, &z_subcoord[1]);
		1322	for (y = 0; y < 2; y++) {
		1323	for (x = 0; x < 2; x++) {
		1324	offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
		1325	offset[0][y][x], z_offset0);
		1326	offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
		1327	offset[1][y][x], z_offset1);
		1328	}
		1329	}
		1330	}
		1331
		1332	lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
		1333	x_subcoord, y_subcoord,
		1334	s_fpart, t_fpart, r_fpart,
		1335	colors);
		1336	}
		1337
		1338
		1339	/**
		1340	* Sample the texture/mipmap using given image filter and mip filter.
		1341	* data0_ptr and data1_ptr point to the two mipmap levels to sample
		1342	* from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
		1343	* If we're using nearest miplevel sampling the '1' values will be null/unused.
		1344	*/
		1345	static void
		1346	lp_build_sample_mipmap(struct lp_build_sample_context *bld,
		1347	unsigned img_filter,
		1348	unsigned mip_filter,
		1349	LLVMValueRef s,
		1350	LLVMValueRef t,
		1351	LLVMValueRef r,
		1352	const LLVMValueRef *offsets,
		1353	LLVMValueRef ilevel0,
		1354	LLVMValueRef ilevel1,
		1355	LLVMValueRef lod_fpart,
		1356	LLVMValueRef colors_var)
		1357	{
		1358	LLVMBuilderRef builder = bld->gallivm->builder;
		1359	LLVMValueRef size0;
		1360	LLVMValueRef size1;
		1361	LLVMValueRef row_stride0_vec = NULL;
		1362	LLVMValueRef row_stride1_vec = NULL;
		1363	LLVMValueRef img_stride0_vec = NULL;
		1364	LLVMValueRef img_stride1_vec = NULL;
		1365	LLVMValueRef data_ptr0;
		1366	LLVMValueRef data_ptr1;
		1367	LLVMValueRef mipoff0 = NULL;
		1368	LLVMValueRef mipoff1 = NULL;
		1369	LLVMValueRef colors0;
		1370	LLVMValueRef colors1;
		1371
		1372	/* sample the first mipmap level */
		1373	lp_build_mipmap_level_sizes(bld, ilevel0,
		1374	&size0,
		1375	&row_stride0_vec, &img_stride0_vec);
		1376	if (bld->num_lods == 1) {
		1377	data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
		1378	}
		1379	else {
		1380	/* This path should work for num_lods 1 too but slightly less efficient */
		1381	data_ptr0 = bld->base_ptr;
		1382	mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
		1383	}
		1384
		1385	if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
		1386	if (img_filter == PIPE_TEX_FILTER_NEAREST) {
		1387	lp_build_sample_image_nearest_afloat(bld,
		1388	size0,
		1389	row_stride0_vec, img_stride0_vec,
		1390	data_ptr0, mipoff0, s, t, r, offsets,
		1391	&colors0);
		1392	}
		1393	else {
		1394	assert(img_filter == PIPE_TEX_FILTER_LINEAR);
		1395	lp_build_sample_image_linear_afloat(bld,
		1396	size0,
		1397	row_stride0_vec, img_stride0_vec,
		1398	data_ptr0, mipoff0, s, t, r, offsets,
		1399	&colors0);
		1400	}
		1401	}
		1402	else {
		1403	if (img_filter == PIPE_TEX_FILTER_NEAREST) {
		1404	lp_build_sample_image_nearest(bld,
		1405	size0,
		1406	row_stride0_vec, img_stride0_vec,
		1407	data_ptr0, mipoff0, s, t, r, offsets,
		1408	&colors0);
		1409	}
		1410	else {
		1411	assert(img_filter == PIPE_TEX_FILTER_LINEAR);
		1412	lp_build_sample_image_linear(bld,
		1413	size0,
		1414	row_stride0_vec, img_stride0_vec,
		1415	data_ptr0, mipoff0, s, t, r, offsets,
		1416	&colors0);
		1417	}
		1418	}
		1419
		1420	/* Store the first level's colors in the output variables */
		1421	LLVMBuildStore(builder, colors0, colors_var);
		1422
		1423	if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
		1424	LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
		1425	bld->levelf_bld.type, 256.0);
		1426	LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
		1427	struct lp_build_if_state if_ctx;
		1428	LLVMValueRef need_lerp;
		1429	unsigned num_quads = bld->coord_bld.type.length / 4;
		1430	unsigned i;
		1431
		1432	lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
		1433	lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
		1434
		1435	/* need_lerp = lod_fpart > 0 */
		1436	if (bld->num_lods == 1) {
		1437	need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
		1438	lod_fpart, bld->leveli_bld.zero,
		1439	"need_lerp");
		1440	}
		1441	else {
		1442	/*
		1443	* We'll do mip filtering if any of the quads need it.
		1444	* It might be better to split the vectors here and only fetch/filter
		1445	* quads which need it.
		1446	*/
		1447	/*
		1448	* We need to clamp lod_fpart here since we can get negative
		1449	* values which would screw up filtering if not all
		1450	* lod_fpart values have same sign.
		1451	* We can however then skip the greater than comparison.
		1452	*/
		1453	lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
		1454	bld->leveli_bld.zero);
		1455	need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
		1456	}
		1457
		1458	lp_build_if(&if_ctx, bld->gallivm, need_lerp);
		1459	{
		1460	struct lp_build_context u8n_bld;
		1461
		1462	lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
		1463
		1464	/* sample the second mipmap level */
		1465	lp_build_mipmap_level_sizes(bld, ilevel1,
		1466	&size1,
		1467	&row_stride1_vec, &img_stride1_vec);
		1468	if (bld->num_lods == 1) {
		1469	data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
		1470	}
		1471	else {
		1472	data_ptr1 = bld->base_ptr;
		1473	mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
		1474	}
		1475
		1476	if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
		1477	if (img_filter == PIPE_TEX_FILTER_NEAREST) {
		1478	lp_build_sample_image_nearest_afloat(bld,
		1479	size1,
		1480	row_stride1_vec, img_stride1_vec,
		1481	data_ptr1, mipoff1, s, t, r, offsets,
		1482	&colors1);
		1483	}
		1484	else {
		1485	lp_build_sample_image_linear_afloat(bld,
		1486	size1,
		1487	row_stride1_vec, img_stride1_vec,
		1488	data_ptr1, mipoff1, s, t, r, offsets,
		1489	&colors1);
		1490	}
		1491	}
		1492	else {
		1493	if (img_filter == PIPE_TEX_FILTER_NEAREST) {
		1494	lp_build_sample_image_nearest(bld,
		1495	size1,
		1496	row_stride1_vec, img_stride1_vec,
		1497	data_ptr1, mipoff1, s, t, r, offsets,
		1498	&colors1);
		1499	}
		1500	else {
		1501	lp_build_sample_image_linear(bld,
		1502	size1,
		1503	row_stride1_vec, img_stride1_vec,
		1504	data_ptr1, mipoff1, s, t, r, offsets,
		1505	&colors1);
		1506	}
		1507	}
		1508
		1509	/* interpolate samples from the two mipmap levels */
		1510
		1511	if (num_quads == 1 && bld->num_lods == 1) {
		1512	lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
		1513	lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
		1514
		1515	#if HAVE_LLVM == 0x208
		1516	/* This was a work-around for a bug in LLVM 2.8.
		1517	* Evidently, something goes wrong in the construction of the
		1518	* lod_fpart short[8] vector. Adding this no-effect shuffle seems
		1519	* to force the vector to be properly constructed.
		1520	* Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
		1521	*/
		1522	#error Unsupported
		1523	#endif
		1524	}
		1525	else {
		1526	unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
		1527	LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
		1528	LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
		1529
		1530	/* Take the LSB of lod_fpart */
		1531	lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
		1532
		1533	/* Broadcast each lod weight into their respective channels */
		1534	for (i = 0; i < u8n_bld.type.length; ++i) {
		1535	shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
		1536	}
		1537	lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
		1538	LLVMConstVector(shuffle, u8n_bld.type.length), "");
		1539	}
		1540
		1541	colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
		1542	colors0, colors1,
		1543	LP_BLD_LERP_PRESCALED_WEIGHTS);
		1544
		1545	LLVMBuildStore(builder, colors0, colors_var);
		1546	}
		1547	lp_build_endif(&if_ctx);
		1548	}
		1549	}
		1550
		1551
		1552
		1553	/**
		1554	* Texture sampling in AoS format. Used when sampling common 32-bit/texel
		1555	* formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
		1556	* but only limited texture coord wrap modes.
		1557	*/
		1558	void
		1559	lp_build_sample_aos(struct lp_build_sample_context *bld,
		1560	unsigned sampler_unit,
		1561	LLVMValueRef s,
		1562	LLVMValueRef t,
		1563	LLVMValueRef r,
		1564	const LLVMValueRef *offsets,
		1565	LLVMValueRef lod_ipart,
		1566	LLVMValueRef lod_fpart,
		1567	LLVMValueRef ilevel0,
		1568	LLVMValueRef ilevel1,
		1569	LLVMValueRef texel_out[4])
		1570	{
		1571	struct lp_build_context *int_bld = &bld->int_bld;
		1572	LLVMBuilderRef builder = bld->gallivm->builder;
		1573	const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
		1574	const unsigned min_filter = bld->static_sampler_state->min_img_filter;
		1575	const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
		1576	const unsigned dims = bld->dims;
		1577	LLVMValueRef packed_var, packed;
		1578	LLVMValueRef unswizzled[4];
		1579	struct lp_build_context u8n_bld;
		1580
		1581	/* we only support the common/simple wrap modes at this time */
		1582	assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
		1583	if (dims >= 2)
		1584	assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
		1585	if (dims >= 3)
		1586	assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
		1587
		1588
		1589	/* make 8-bit unorm builder context */
		1590	lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
		1591
		1592	/*
		1593	* Get/interpolate texture colors.
		1594	*/
		1595
		1596	packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
		1597
		1598	if (min_filter == mag_filter) {
		1599	/* no need to distinguish between minification and magnification */
		1600	lp_build_sample_mipmap(bld,
		1601	min_filter, mip_filter,
		1602	s, t, r, offsets,
		1603	ilevel0, ilevel1, lod_fpart,
		1604	packed_var);
		1605	}
		1606	else {
		1607	/* Emit conditional to choose min image filter or mag image filter
		1608	* depending on the lod being > 0 or <= 0, respectively.
		1609	*/
		1610	struct lp_build_if_state if_ctx;
		1611	LLVMValueRef minify;
		1612
		1613	/*
		1614	* XXX this should to all lods into account, if some are min
		1615	* some max probably could hack up the coords/weights in the linear
		1616	* path with selects to work for nearest.
		1617	* If that's just two quads sitting next to each other it seems
		1618	* quite ok to do the same filtering method on both though, at
		1619	* least unless we have explicit lod (and who uses different
		1620	* min/mag filter with that?)
		1621	*/
		1622	if (bld->num_lods > 1)
		1623	lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
		1624	lp_build_const_int32(bld->gallivm, 0), "");
		1625
		1626	/* minify = lod >= 0.0 */
		1627	minify = LLVMBuildICmp(builder, LLVMIntSGE,
		1628	lod_ipart, int_bld->zero, "");
		1629
		1630	lp_build_if(&if_ctx, bld->gallivm, minify);
		1631	{
		1632	/* Use the minification filter */
		1633	lp_build_sample_mipmap(bld,
		1634	min_filter, mip_filter,
		1635	s, t, r, offsets,
		1636	ilevel0, ilevel1, lod_fpart,
		1637	packed_var);
		1638	}
		1639	lp_build_else(&if_ctx);
		1640	{
		1641	/* Use the magnification filter */
		1642	lp_build_sample_mipmap(bld,
		1643	mag_filter, PIPE_TEX_MIPFILTER_NONE,
		1644	s, t, r, offsets,
		1645	ilevel0, NULL, NULL,
		1646	packed_var);
		1647	}
		1648	lp_build_endif(&if_ctx);
		1649	}
		1650
		1651	packed = LLVMBuildLoad(builder, packed_var, "");
		1652
		1653	/*
		1654	* Convert to SoA and swizzle.
		1655	*/
		1656	lp_build_rgba8_to_fi32_soa(bld->gallivm,
		1657	bld->texel_type,
		1658	packed, unswizzled);
		1659
		1660	if (util_format_is_rgba8_variant(bld->format_desc)) {
		1661	lp_build_format_swizzle_soa(bld->format_desc,
		1662	&bld->texel_bld,
		1663	unswizzled, texel_out);
		1664	}
		1665	else {
		1666	texel_out[0] = unswizzled[0];
		1667	texel_out[1] = unswizzled[1];
		1668	texel_out[2] = unswizzled[2];
		1669	texel_out[3] = unswizzled[3];
		1670	}
		1671	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c – Rev 4826